From 94335808de3a9df9a9a17ceed77d97d5207df973 Mon Sep 17 00:00:00 2001 From: dariarom94 Date: Wed, 17 Jun 2026 17:46:27 +0200 Subject: [PATCH 1/2] add v2-3 conv --- src/data_processors/process_dataset/script.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/data_processors/process_dataset/script.py b/src/data_processors/process_dataset/script.py index b1e2312cb..4d8881434 100644 --- a/src/data_processors/process_dataset/script.py +++ b/src/data_processors/process_dataset/script.py @@ -4,6 +4,8 @@ import spatialdata as sd import os import shutil +import tempfile +from pathlib import Path ### VIASH START par = { @@ -172,8 +174,22 @@ def subsample_adata_group_balanced(adata, group_key, n_samples, seed=0): # Load the single-cell data adata = ad.read_h5ad(par["input_sc"]) +# Migrate zarr v2 stores to v3 before reading (zarr v2 uses .zattrs; zarr v3 uses zarr.json) +input_sp = par["input_sp"] +_tmp_dir = None +if Path(input_sp, ".zattrs").exists(): + print(f"Detected zarr v2 store at {input_sp}, migrating to zarr v3...") + _tmp_dir = tempfile.mkdtemp() + _tmp_path = os.path.join(_tmp_dir, "dataset.zarr") + _sdata_v2 = sd.read_zarr(input_sp) + _sdata_v2.write(_tmp_path) + del _sdata_v2 + input_sp = _tmp_path + # Load the spatial data -sdata = sd.read_zarr(par["input_sp"]) +sdata = sd.read_zarr(input_sp) +if _tmp_dir is not None: + shutil.rmtree(_tmp_dir) # Subset single-cell data if it is too large N_MAX_SC = 120000 From a74afca2532afbfe3245fe006d9202f792f685fb Mon Sep 17 00:00:00 2001 From: dariarom94 Date: Thu, 18 Jun 2026 15:50:41 +0200 Subject: [PATCH 2/2] fix deleting a tmp too early --- src/data_processors/process_dataset/script.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/data_processors/process_dataset/script.py b/src/data_processors/process_dataset/script.py index 4d8881434..be2c97054 100644 --- a/src/data_processors/process_dataset/script.py +++ b/src/data_processors/process_dataset/script.py @@ -188,8 +188,6 @@ def subsample_adata_group_balanced(adata, group_key, n_samples, seed=0): # Load the spatial data sdata = sd.read_zarr(input_sp) -if _tmp_dir is not None: - shutil.rmtree(_tmp_dir) # Subset single-cell data if it is too large N_MAX_SC = 120000 @@ -264,3 +262,7 @@ def subsample_adata_group_balanced(adata, group_key, n_samples, seed=0): # Save the spatial data sdata_output.write(par["output_sp"], overwrite=True) + +# Clean up zarr v2 migration temp dir only after all writes are complete +if _tmp_dir is not None: + shutil.rmtree(_tmp_dir)