From 94335808de3a9df9a9a17ceed77d97d5207df973 Mon Sep 17 00:00:00 2001 From: dariarom94 Date: Wed, 17 Jun 2026 17:46:27 +0200 Subject: [PATCH 1/8] add v2-3 conv --- src/data_processors/process_dataset/script.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/data_processors/process_dataset/script.py b/src/data_processors/process_dataset/script.py index b1e2312cb..4d8881434 100644 --- a/src/data_processors/process_dataset/script.py +++ b/src/data_processors/process_dataset/script.py @@ -4,6 +4,8 @@ import spatialdata as sd import os import shutil +import tempfile +from pathlib import Path ### VIASH START par = { @@ -172,8 +174,22 @@ def subsample_adata_group_balanced(adata, group_key, n_samples, seed=0): # Load the single-cell data adata = ad.read_h5ad(par["input_sc"]) +# Migrate zarr v2 stores to v3 before reading (zarr v2 uses .zattrs; zarr v3 uses zarr.json) +input_sp = par["input_sp"] +_tmp_dir = None +if Path(input_sp, ".zattrs").exists(): + print(f"Detected zarr v2 store at {input_sp}, migrating to zarr v3...") + _tmp_dir = tempfile.mkdtemp() + _tmp_path = os.path.join(_tmp_dir, "dataset.zarr") + _sdata_v2 = sd.read_zarr(input_sp) + _sdata_v2.write(_tmp_path) + del _sdata_v2 + input_sp = _tmp_path + # Load the spatial data -sdata = sd.read_zarr(par["input_sp"]) +sdata = sd.read_zarr(input_sp) +if _tmp_dir is not None: + shutil.rmtree(_tmp_dir) # Subset single-cell data if it is too large N_MAX_SC = 120000 From a74afca2532afbfe3245fe006d9202f792f685fb Mon Sep 17 00:00:00 2001 From: dariarom94 Date: Thu, 18 Jun 2026 15:50:41 +0200 Subject: [PATCH 2/8] fix deleting a tmp too early --- src/data_processors/process_dataset/script.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/data_processors/process_dataset/script.py b/src/data_processors/process_dataset/script.py index 4d8881434..be2c97054 100644 --- a/src/data_processors/process_dataset/script.py +++ b/src/data_processors/process_dataset/script.py @@ -188,8 +188,6 @@ def subsample_adata_group_balanced(adata, group_key, n_samples, seed=0): # Load the spatial data sdata = sd.read_zarr(input_sp) -if _tmp_dir is not None: - shutil.rmtree(_tmp_dir) # Subset single-cell data if it is too large N_MAX_SC = 120000 @@ -264,3 +262,7 @@ def subsample_adata_group_balanced(adata, group_key, n_samples, seed=0): # Save the spatial data sdata_output.write(par["output_sp"], overwrite=True) + +# Clean up zarr v2 migration temp dir only after all writes are complete +if _tmp_dir is not None: + shutil.rmtree(_tmp_dir) From cd4e481f84de82fa6c4063cfbb32b2cc3c2b4934 Mon Sep 17 00:00:00 2001 From: dariarom94 Date: Thu, 18 Jun 2026 17:15:48 +0200 Subject: [PATCH 3/8] adjust memory --- src/data_processors/process_dataset/config.vsh.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data_processors/process_dataset/config.vsh.yaml b/src/data_processors/process_dataset/config.vsh.yaml index 67ecba505..9184a3f8c 100644 --- a/src/data_processors/process_dataset/config.vsh.yaml +++ b/src/data_processors/process_dataset/config.vsh.yaml @@ -35,4 +35,4 @@ runners: - type: executable - type: nextflow directives: - label: [lowcpu, highmem, hightime] + label: [lowcpu, midmem, hightime] From 01be8d8b7b78099330b5fe1f24b1ef1feb9fb3bf Mon Sep 17 00:00:00 2001 From: dariarom94 Date: Thu, 18 Jun 2026 17:46:04 +0200 Subject: [PATCH 4/8] memory change --- src/base/labels_nebius.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/base/labels_nebius.config b/src/base/labels_nebius.config index f3c366c5e..c0a16df3a 100644 --- a/src/base/labels_nebius.config +++ b/src/base/labels_nebius.config @@ -54,10 +54,12 @@ process { withLabel: highmem { memory = { get_memory( 100.GB * task.attempt ) } disk = { 200.GB * task.attempt } + pod = [[nodeSelector: 'nebius.com/node-group-id=mk8snodegroup-e00hnqhyfdcsy9m09n']] } withLabel: veryhighmem { memory = { get_memory( 200.GB * task.attempt ) } disk = { 400.GB * task.attempt } + pod = [[nodeSelector: 'nebius.com/node-group-id=mk8snodegroup-e00hnqhyfdcsy9m09n']] } withLabel: lowsharedmem { containerOptions = { workflow.containerEngine != 'singularity' ? "--shm-size ${String.format("%.0f",task.memory.mega * 0.05)}" : ""} From d9b7ba77042e3ab500259199b1cdd3755dcacab0 Mon Sep 17 00:00:00 2001 From: dariarom94 Date: Thu, 18 Jun 2026 18:53:03 +0200 Subject: [PATCH 5/8] nebius config adjust --- src/base/labels_nebius.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/base/labels_nebius.config b/src/base/labels_nebius.config index c0a16df3a..a27ef5dbc 100644 --- a/src/base/labels_nebius.config +++ b/src/base/labels_nebius.config @@ -53,12 +53,12 @@ process { } withLabel: highmem { memory = { get_memory( 100.GB * task.attempt ) } - disk = { 200.GB * task.attempt } + disk = 200.GB pod = [[nodeSelector: 'nebius.com/node-group-id=mk8snodegroup-e00hnqhyfdcsy9m09n']] } withLabel: veryhighmem { memory = { get_memory( 200.GB * task.attempt ) } - disk = { 400.GB * task.attempt } + disk = 200.GB pod = [[nodeSelector: 'nebius.com/node-group-id=mk8snodegroup-e00hnqhyfdcsy9m09n']] } withLabel: lowsharedmem { From 7eb2158c239522878bdf357fc9c6696dc7a68173 Mon Sep 17 00:00:00 2001 From: dariarom94 Date: Thu, 18 Jun 2026 23:26:18 +0200 Subject: [PATCH 6/8] fix SPLIT setup --- src/methods_expression_correction/split/config.vsh.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/methods_expression_correction/split/config.vsh.yaml b/src/methods_expression_correction/split/config.vsh.yaml index 9cb227219..2f1c4f9ca 100644 --- a/src/methods_expression_correction/split/config.vsh.yaml +++ b/src/methods_expression_correction/split/config.vsh.yaml @@ -28,7 +28,7 @@ engines: setup: - type: docker run: | - apt-get update + apt-get update && apt-get install -y libcurl4-openssl-dev libssl-dev libgit2-dev libxml2-dev # - type: r # packages: [fs, rlang, lifecycle] - type: r @@ -41,8 +41,9 @@ engines: # The reinstall of SingleCellExperiment triggers the correct re-install of SpatialExperiment. - type: r github: dmcable/spacexr - - type: r - github: bdsc-tds/SPLIT + - type: docker + run: | + Rscript -e 'remotes::install_github("bdsc-tds/SPLIT")' - type: native From d6ab33a806b8cd7cff50fd86a8e996a80fc10035 Mon Sep 17 00:00:00 2001 From: dariarom94 Date: Fri, 19 Jun 2026 00:18:17 +0200 Subject: [PATCH 7/8] fix split config --- src/methods_expression_correction/split/config.vsh.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/methods_expression_correction/split/config.vsh.yaml b/src/methods_expression_correction/split/config.vsh.yaml index 2f1c4f9ca..d6fe7e14c 100644 --- a/src/methods_expression_correction/split/config.vsh.yaml +++ b/src/methods_expression_correction/split/config.vsh.yaml @@ -32,8 +32,8 @@ engines: # - type: r # packages: [fs, rlang, lifecycle] - type: r - bioc: [SingleCellExperiment, anndataR, rhdf5, devtools, scater] - # bioc: [SpatialExperiment, anndataR, rhdf5, devtools, scater] + bioc: [SingleCellExperiment, anndataR, rhdf5, scater] + # bioc: [SpatialExperiment, anndataR, rhdf5, scater] # SingleCellExperiment part can probably be left out again in the future. It currently fixes a bug described in these issues: # https://github.com/drighelli/SpatialExperiment/issues/171 From d55bc22a95d448cce9c2d090b249320397bc1057 Mon Sep 17 00:00:00 2001 From: dariarom94 Date: Fri, 19 Jun 2026 00:39:21 +0200 Subject: [PATCH 8/8] memory adjust --- src/datasets/loaders/bruker_cosmx/config.vsh.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/datasets/loaders/bruker_cosmx/config.vsh.yaml b/src/datasets/loaders/bruker_cosmx/config.vsh.yaml index 0c33c89d2..5fd748dfb 100644 --- a/src/datasets/loaders/bruker_cosmx/config.vsh.yaml +++ b/src/datasets/loaders/bruker_cosmx/config.vsh.yaml @@ -76,4 +76,4 @@ runners: - type: executable - type: nextflow directives: - label: [veryhighmem, midcpu, hightime] + label: [midmem, midcpu, hightime]