diff --git a/scripts/create_resources/spatial/process_allen_brain_cell_atlas_merfish_nebius.sh b/scripts/create_resources/spatial/process_allen_brain_cell_atlas_merfish_nebius.sh
new file mode 100755
index 000000000..51a28c574
--- /dev/null
+++ b/scripts/create_resources/spatial/process_allen_brain_cell_atlas_merfish_nebius.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+# Get the root of the repository
+REPO_ROOT=$(git rev-parse --show-toplevel)
+
+# Ensure that the command below is run from the root of the repository
+cd "$REPO_ROOT"
+
+set -e
+
+publish_dir="s3://openproblems-data/resources/datasets"
+
+cat > /tmp/params.yaml << HERE
+param_list:
+
+  - id: "allen_brain_cell_atlas_merfish/mouse1_coronal/rep1"
+    mouse: "mouse1_coronal"
+    experiment_id: "220422_wb3_co1_1B_6z18R_merfish5"
+    dataset_name: "Allen Brain Cell Atlas MERFISH Mouse 1 Coronal"
+    dataset_url: "https://download.brainimagelibrary.org/29/3c/293cc39ceea87f6d/"
+    dataset_summary: "Brain-wide MERFISH spatial transcriptomics of mouse 1 (coronal), Allen Brain Cell Atlas."
+    dataset_description: "Brain-wide MERFISH spatial transcriptomics data from the Zhuang lab. Mouse 1 coronal section imaged with ~1100 gene panel."
+    dataset_organism: "mus_musculus"
+    dataset_reference: "@article{Yao2023, author={Yao, Zizhen and others}, title={A high-resolution transcriptomic and spatial atlas of cell types in the whole mouse brain}, journal={Nature}, year={2023}}"
+    segmentation_id: ["cell"]
+
+  - id: "allen_brain_cell_atlas_merfish/mouse2_coronal/rep1"
+    mouse: "mouse2_coronal"
+    experiment_id: "220601_wb3_co2_1_5z18R2bd_merfish5"
+    dataset_name: "Allen Brain Cell Atlas MERFISH Mouse 2 Coronal"
+    dataset_url: "https://download.brainimagelibrary.org/29/3c/293cc39ceea87f6d/"
+    dataset_summary: "Brain-wide MERFISH spatial transcriptomics of mouse 2 (coronal), Allen Brain Cell Atlas."
+    dataset_description: "Brain-wide MERFISH spatial transcriptomics data from the Zhuang lab. Mouse 2 coronal section imaged with ~1100 gene panel."
+    dataset_organism: "mus_musculus"
+    dataset_reference: "@article{Yao2023, author={Yao, Zizhen and others}, title={A high-resolution transcriptomic and spatial atlas of cell types in the whole mouse brain}, journal={Nature}, year={2023}}"
+    segmentation_id: ["cell"]
+
+  - id: "allen_brain_cell_atlas_merfish/mouse3_sagittal/rep1"
+    mouse: "mouse3_sagittal"
+    experiment_id: "220609_wb3_sa1_1_5z18R_merfish5"
+    dataset_name: "Allen Brain Cell Atlas MERFISH Mouse 3 Sagittal"
+    dataset_url: "https://download.brainimagelibrary.org/29/3c/293cc39ceea87f6d/"
+    dataset_summary: "Brain-wide MERFISH spatial transcriptomics of mouse 3 (sagittal), Allen Brain Cell Atlas."
+    dataset_description: "Brain-wide MERFISH spatial transcriptomics data from the Zhuang lab. Mouse 3 sagittal section imaged with ~1100 gene panel."
+    dataset_organism: "mus_musculus"
+    dataset_reference: "@article{Yao2023, author={Yao, Zizhen and others}, title={A high-resolution transcriptomic and spatial atlas of cell types in the whole mouse brain}, journal={Nature}, year={2023}}"
+    segmentation_id: ["cell"]
+
+  - id: "allen_brain_cell_atlas_merfish/mouse4_sagittal/rep1"
+    mouse: "mouse4_sagittal"
+    experiment_id: "220912_wb3_sa2_2_5z18R_merfish5"
+    dataset_name: "Allen Brain Cell Atlas MERFISH Mouse 4 Sagittal"
+    dataset_url: "https://download.brainimagelibrary.org/29/3c/293cc39ceea87f6d/"
+    dataset_summary: "Brain-wide MERFISH spatial transcriptomics of mouse 4 (sagittal), Allen Brain Cell Atlas."
+    dataset_description: "Brain-wide MERFISH spatial transcriptomics data from the Zhuang lab. Mouse 4 sagittal section imaged with ~1100 gene panel."
+    dataset_organism: "mus_musculus"
+    dataset_reference: "@article{Yao2023, author={Yao, Zizhen and others}, title={A high-resolution transcriptomic and spatial atlas of cell types in the whole mouse brain}, journal={Nature}, year={2023}}"
+    segmentation_id: ["cell"]
+
+output_dataset: "\$id/dataset.zarr"
+output_state: "\$id/state.yaml"
+publish_dir: "$publish_dir"
+HERE
+
+tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \
+  --revision build/main \
+  --pull-latest \
+  --main-script target/nextflow/datasets/workflows/process_allen_brain_cell_atlas_merfish/main.nf \
+  --workspace 167877437119966 \
+  --compute-env 5hfmdCBxMRd4nHZaJKYEQZ \
+  --params-file /tmp/params.yaml \
+  --config src/base/labels_nebius.config \
+  --labels datasets,allen_brain_cell_atlas_merfish
diff --git a/scripts/create_resources/spatial/process_bruker_cosmx_nebius.sh b/scripts/create_resources/spatial/process_bruker_cosmx_nebius.sh
new file mode 100644
index 000000000..f826be96b
--- /dev/null
+++ b/scripts/create_resources/spatial/process_bruker_cosmx_nebius.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Get the root of the repository
+REPO_ROOT=$(git rev-parse --show-toplevel)
+
+# Ensure that the command below is run from the root of the repository
+cd "$REPO_ROOT"
+
+set -e
+
+publish_dir="s3://openproblems-data/resources/datasets"
+
+cat > /tmp/params.yaml << HERE
+param_list:
+
+  - id: "bruker_cosmx/bruker_mouse_brain_cosmx/rep1"
+    input_raw: "https://smi-public.objects.liquidweb.services/HalfBrain.zip"
+    input_flat_files: "https://smi-public.objects.liquidweb.services/Half%20%20Brain%20simple%20%20files%20.zip"
+    dataset_name: "Bruker CosMx Mouse Brain"
+    dataset_url: "https://nanostring.com/products/cosmx-spatial-molecular-imager/ffpe-dataset/cosmx-smi-mouse-brain-ffpe-dataset/"
+    dataset_summary: "Bruker CosMx Mouse Brain dataset on FFPE covering a full hemisphere of a mouse brain."
+    dataset_description: "Bruker CosMx Mouse Brain dataset on FFPE covering a full hemisphere of a mouse brain."
+    dataset_organism: "mus_musculus"
+    segmentation_id: ["cell"]
+
+  - id: "bruker_cosmx/bruker_human_liver_cosmx"
+    input_raw: "https://smi-public.objects.liquidweb.services/NormalLiverFiles.zip"
+    input_flat_files: "https://syncandshare.desy.de/index.php/s/zYT4fM28y86cZeW/download/NormalLiver.zip"
+    dataset_name: "Bruker CosMx Human Liver"
+    dataset_url: "https://nanostring.com/products/cosmx-spatial-molecular-imager/ffpe-dataset/human-liver-rna-ffpe-dataset/"
+    dataset_summary: "Bruker CosMx Human Liver dataset on FFPE."
+    dataset_description: "Bruker CosMx Human Liver dataset on FFPE."
+    dataset_organism: "homo_sapiens"
+    segmentation_id: ["cell"]
+
+
+output_dataset: "\$id/dataset.zarr"
+output_state: "\$id/state.yaml"
+publish_dir: "$publish_dir"
+HERE
+
+tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \
+  --revision build/main \
+  --pull-latest \
+  --main-script target/nextflow/datasets/workflows/process_bruker_cosmx/main.nf \
+  --workspace 167877437119966 \
+  --compute-env 5hfmdCBxMRd4nHZaJKYEQZ \
+  --params-file /tmp/params.yaml \
+  --config src/base/labels_nebius.config \
+  --labels datasets,bruker_cosmx
diff --git a/src/data_processors/process_dataset/script.py b/src/data_processors/process_dataset/script.py
index 897f59ba6..be2c97054 100644
--- a/src/data_processors/process_dataset/script.py
+++ b/src/data_processors/process_dataset/script.py
@@ -188,8 +188,6 @@ def subsample_adata_group_balanced(adata, group_key, n_samples, seed=0):
 
 # Load the spatial data
 sdata = sd.read_zarr(input_sp)
-if _tmp_dir is not None:
-    shutil.rmtree(_tmp_dir)
 
 # Subset single-cell data if it is too large
 N_MAX_SC = 120000
diff --git a/src/datasets/loaders/allen_brain_cell_atlas_merfish/config.vsh.yaml b/src/datasets/loaders/allen_brain_cell_atlas_merfish/config.vsh.yaml
new file mode 100644
index 000000000..8c7a249ac
--- /dev/null
+++ b/src/datasets/loaders/allen_brain_cell_atlas_merfish/config.vsh.yaml
@@ -0,0 +1,94 @@
+name: allen_brain_cell_atlas_merfish
+namespace: datasets/loaders
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - type: string
+        name: --mouse
+        description: "Mouse identifier. One of: mouse1_coronal, mouse2_coronal, mouse3_sagittal, mouse4_sagittal."
+        required: true
+        choices:
+          - mouse1_coronal
+          - mouse2_coronal
+          - mouse3_sagittal
+          - mouse4_sagittal
+      - type: string
+        name: --experiment_id
+        description: "Experiment identifier (brain section). Must match an entry in the experiment_metadata.csv for the selected mouse."
+        required: true
+      - type: string
+        name: --abca_version
+        description: "Version tag for the Allen Brain Cell Atlas cell metadata release."
+        required: false
+        default: "20231215"
+      - type: string
+        name: --segmentation_id
+        default: ["cell"]
+        description: The segmentation identifier.
+        multiple: true
+  - name: Caching settings
+    arguments:
+      - type: boolean
+        name: --keep_files
+        default: true
+        description: "Keep downloaded intermediate files after processing."
+  - name: Metadata
+    arguments:
+      - type: string
+        name: --dataset_id
+        description: "A unique identifier for the dataset."
+        required: true
+      - name: --dataset_name
+        type: string
+        description: Nicely formatted name.
+        required: true
+      - type: string
+        name: --dataset_url
+        description: Link to the original source of the dataset.
+        required: false
+      - name: --dataset_reference
+        type: string
+        description: Bibtex reference of the paper in which the dataset was published.
+        required: false
+      - name: --dataset_summary
+        type: string
+        description: Short description of the dataset.
+        required: true
+      - name: --dataset_description
+        type: string
+        description: Long description of the dataset.
+        required: true
+      - name: --dataset_organism
+        type: string
+        description: The organism of the sample in the dataset.
+        required: false
+  - name: Outputs
+    arguments:
+      - name: "--output"
+        __merge__: /src/api/file_common_ist.yaml
+        direction: output
+        required: true
+
+resources:
+  - type: python_script
+    path: script.py
+
+engines:
+  - type: docker
+    image: openproblems/base_python:1
+    __merge__:
+      - /src/base/setup_spatialdata_partial.yaml
+    setup:
+      - type: python
+        pypi:
+          - geopandas
+          - tifffile
+          - shapely
+  - type: native
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [midmem, midcpu, hightime]
diff --git a/src/datasets/loaders/allen_brain_cell_atlas_merfish/script.py b/src/datasets/loaders/allen_brain_cell_atlas_merfish/script.py
new file mode 100644
index 000000000..28ebf44ba
--- /dev/null
+++ b/src/datasets/loaders/allen_brain_cell_atlas_merfish/script.py
@@ -0,0 +1,517 @@
+import math
+import os
+import re
+import urllib.request
+from datetime import datetime
+from pathlib import Path
+
+import anndata
+import geopandas as gpd
+import numpy as np
+import pandas as pd
+import tifffile
+from shapely.geometry import Polygon
+import spatialdata as sd
+from spatialdata.models import Image2DModel, PointsModel, ShapesModel, TableModel
+
+## VIASH START
+par = {
+    "mouse": "mouse1_coronal",
+    "experiment_id": "220329_wb3_co1_1_3z19R_merfish1",
+    "abca_version": "20231215",
+    "segmentation_id": ["cell"],
+    "output": "output.zarr",
+    "dataset_id": "allen_brain_cell_atlas_merfish/mouse1_coronal/rep1",
+    "dataset_name": "Allen Brain Cell Atlas MERFISH mouse1_coronal",
+    "dataset_url": "https://download.brainimagelibrary.org/29/3c/293cc39ceea87f6d/",
+    "dataset_reference": "@article{Yao2023, author={Yao, Zizhen and others}, title={A high-resolution transcriptomic and spatial atlas of cell types in the whole mouse brain}, journal={Nature}, year={2023}}",
+    "dataset_summary": "Brain-wide MERFISH spatial transcriptomics atlas of mouse brain.",
+    "dataset_description": "Brain-wide MERFISH spatial transcriptomics data from the Zhuang lab. Four mice (2 coronal, 2 sagittal) imaged with ~1100 gene panel across the full brain volume.",
+    "dataset_organism": "Mus musculus",
+    "keep_files": True,
+}
+meta = {"temp_dir": "/tmp"}
+## VIASH END
+
+# ─── Constants ────────────────────────────────────────────────────────────────
+
+MICRONS_PER_PIXEL = 0.109
+FRAME_SIZE_PIXELS = 2048
+PERCENT_SCALING = 0.3
+OVERLAP_SIZE = 213
+
+BASE_URL = "https://download.brainimagelibrary.org/29/3c/293cc39ceea87f6d/"
+DATA_DOWNLOAD_LINKS = {
+    "mouse1_coronal": BASE_URL + "coronal_1/",
+    "mouse2_coronal": BASE_URL + "coronal_2/",
+    "mouse3_sagittal": BASE_URL + "sagittal_1/",
+    "mouse4_sagittal": BASE_URL + "sagittal_2/",
+}
+MOUSE_TO_ABCA_ID = {
+    "mouse1_coronal": "Zhuang-ABCA-1",
+    "mouse2_coronal": "Zhuang-ABCA-2",
+    "mouse3_sagittal": "Zhuang-ABCA-3",
+    "mouse4_sagittal": "Zhuang-ABCA-4",
+}
+# Known per-experiment image channel prefix corrections
+DOWNLOAD_PREFIX_CORRECTIONS = {
+    ("mouse1_coronal", "220422_wb3_co1_6B_6z18R_merfish6"): "Epi-750s6-635s6-545s1-473s6-408s6",
+    ("mouse1_coronal", "220411_wb3_co1_5B_6z19R_merfish5"): "Epi-750s6-635s6-545s1-473s6-408s6",
+    ("mouse1_coronal", "220329_wb3_co1_24_3z19R_merfish5_dryhyb"): "Epi-750s3-635s3-545s1-473s3-408s3",
+    ("mouse2_coronal", "220506_wb3_co2_10_5z18R_merfish2"): "Epi-750s5-650s5-560s1-488s5-405s5",
+}
+
+t0 = datetime.now()
+mouse = par["mouse"]
+experiment_id = par["experiment_id"]
+
+TMP_DIR = Path(meta.get("temp_dir") or "/tmp") / "allen_brain_merfish"
+TMP_DIR.mkdir(parents=True, exist_ok=True)
+
+
+# ─── Helpers ──────────────────────────────────────────────────────────────────
+
+def download_if_missing(url, path):
+    if not Path(path).exists():
+        print(datetime.now() - t0, f"Downloading {url}", flush=True)
+        urllib.request.urlretrieve(url, path)
+
+
+def check_url(url):
+    try:
+        return urllib.request.urlopen(url).getcode() == 200
+    except (urllib.error.HTTPError, urllib.error.URLError):
+        return False
+
+
+def compute_weight(location, dimensions, percent_scaling):
+    """Linear blending weight for FOV overlap. Adapted from Fiji Stitching plugin."""
+    min_distance = 1
+    for dim in range(len(location)):
+        value = max(1, min(location[dim] + 1, (dimensions[dim] - 1) - location[dim] + 1))
+        img_area_blend = round(percent_scaling * 0.5 * dimensions[dim])
+        value = value / img_area_blend if value < img_area_blend else 1
+        min_distance *= value
+    if min_distance == 1:
+        return 1
+    if min_distance <= 0:
+        return 0.0000001
+    return (math.cos((1 - min_distance) * math.pi) + 1) / 2
+
+
+# ─── Dax file reader (Zhuang lab custom format) ────────────────────────────────
+
+class DaxReader:
+    def __init__(self, filename):
+        self.filename = filename
+        inf_path = os.path.splitext(filename)[0] + ".inf"
+        self.image_height = self.image_width = self.number_frames = None
+        self.bigendian = 0
+
+        with open(inf_path) as f:
+            for line in f:
+                m = re.match(r"frame dimensions = ([\d]+) x ([\d]+)", line)
+                if m:
+                    self.image_height, self.image_width = int(m.group(2)), int(m.group(1))
+                m = re.match(r"number of frames = ([\d]+)", line)
+                if m:
+                    self.number_frames = int(m.group(1))
+                m = re.search(r" (big|little) endian", line)
+                if m:
+                    self.bigendian = int(m.group(1) == "big")
+
+        if not self.image_height:
+            self.image_height = self.image_width = 256
+
+        self.fileptr = open(filename, "rb")
+
+    def load_frame(self, frame_number):
+        self.fileptr.seek(frame_number * self.image_height * self.image_width * 2)
+        img = np.fromfile(self.fileptr, dtype="uint16", count=self.image_height * self.image_width)
+        img = img.reshape(self.image_height, self.image_width)
+        if self.bigendian:
+            img.byteswap(True)
+        return img
+
+    def close(self):
+        self.fileptr.close()
+
+
+def download_fov_image(fov_idx, n_format, prefix, suffix, base_url, frames, stain):
+    """Download one FOV dax file and return a (n_z, px, py) uint16 array."""
+    fmt = f"{fov_idx:03d}" if n_format == 3 else f"{fov_idx:04d}"
+    stem = prefix + fmt + suffix
+    dax_path = TMP_DIR / (stem + ".dax")
+    inf_path = TMP_DIR / (stem + ".inf")
+    tif_path = TMP_DIR / (stem + f"_{stain}.tif")
+
+    if tif_path.exists():
+        return tifffile.imread(tif_path)
+
+    if not dax_path.exists():
+        urllib.request.urlretrieve(base_url + stem + ".dax", dax_path)
+        urllib.request.urlretrieve(base_url + stem + ".inf", inf_path)
+
+    reader = DaxReader(str(dax_path))
+    img = np.array([reader.load_frame(f) for f in frames])
+    reader.close()
+    tifffile.imsave(tif_path, img)
+    return img
+
+
+def delete_dax_files():
+    for f in TMP_DIR.glob("*.dax"):
+        f.unlink()
+    for f in TMP_DIR.glob("*.inf"):
+        f.unlink()
+
+
+# ─── Step 1: Experiment metadata ──────────────────────────────────────────────
+
+print(datetime.now() - t0, "Loading experiment metadata...", flush=True)
+
+exp_metadata_path = TMP_DIR / "experiment_metadata.csv"
+download_if_missing(BASE_URL + "additional_files/experiment_metadata.csv", exp_metadata_path)
+
+exp_metadata = pd.read_csv(exp_metadata_path)
+exp_metadata = exp_metadata[exp_metadata["experiment_id"] != "220514_sa_8_merfish4_adaptor"]
+exp_metadata.loc[
+    exp_metadata["experiment_id"] == "220627_wb3_sa1_5_5z18R_merfish5", "experiment_id"
+] = "220627_wb3_sa1_6_5z18R_merfish6"
+
+exp_row = exp_metadata[
+    (exp_metadata["animal_id"] == mouse) & (exp_metadata["experiment_id"] == experiment_id)
+].iloc[0]
+sample_id = exp_row["sample_id"]
+
+# Load data organization file to find DAPI channel and z-plane indices
+data_org_path = TMP_DIR / f"dataorganization_{experiment_id}.csv"
+download_if_missing(
+    BASE_URL + f"additional_files/dataorganization/{mouse}/dataorganization_{experiment_id}.csv",
+    data_org_path,
+)
+data_org = pd.read_csv(data_org_path)
+dapi_row = data_org[data_org["channelName"] == "DAPI"].iloc[0]
+dapi_frames = [int(n) for n in re.findall(r"\d+", dapi_row["frame"])]
+dapi_prefix = dapi_row["imageType"]
+n_z_planes = len(dapi_frames)
+
+# Apply known per-experiment prefix corrections
+correction = DOWNLOAD_PREFIX_CORRECTIONS.get((mouse, experiment_id))
+if correction:
+    dapi_prefix = correction
+
+# Determine FOV file naming convention (3 or 4-digit index, optional _0 suffix)
+image_base_url = DATA_DOWNLOAD_LINKS[mouse] + experiment_id + "/data/"
+n_format, download_suffix = 3, ""
+test_stem = dapi_prefix + "_"
+if not check_url(image_base_url + test_stem + "002.dax"):
+    if check_url(image_base_url + test_stem + "002_0.dax"):
+        download_suffix = "_0"
+    elif check_url(image_base_url + test_stem + "0002.dax"):
+        n_format = 4
+    elif check_url(image_base_url + test_stem + "0002_0.dax"):
+        n_format, download_suffix = 4, "_0"
+
+dapi_file_prefix = dapi_prefix + "_"
+
+# ─── Step 2: FOV layout from raw count matrix ─────────────────────────────────
+
+print(datetime.now() - t0, "Loading raw count matrix...", flush=True)
+
+matrix_path = TMP_DIR / f"{mouse}_raw_matrix.h5ad"
+download_if_missing(
+    BASE_URL + f"processed_data/counts_updated/raw_counts_{mouse}.h5ad",
+    matrix_path,
+)
+
+raw_adata = anndata.read_h5ad(matrix_path)
+sample_adata = raw_adata[raw_adata.obs["sample_id"] == sample_id].copy()
+
+fov_df = sample_adata.obs[["fov", "fov_x", "fov_y"]].drop_duplicates().copy()
+n_fovs = int(sample_adata.obs["fov"].max())
+present_fovs = set(fov_df["fov"].values)
+missing_fovs = {i for i in range(n_fovs) if i not in present_fovs}
+print(datetime.now() - t0, f"FOVs: {n_fovs} total, {len(missing_fovs)} missing", flush=True)
+
+frame_size_um = FRAME_SIZE_PIXELS * MICRONS_PER_PIXEL
+xmin, xmax = fov_df["fov_x"].min(), fov_df["fov_x"].max()
+ymin, ymax = fov_df["fov_y"].min(), fov_df["fov_y"].max()
+
+img_w = int(((xmax - xmin) + frame_size_um) / MICRONS_PER_PIXEL)
+img_h = int(((ymax - ymin) + frame_size_um) / MICRONS_PER_PIXEL)
+
+
+def um_to_px_x(x):
+    # x-axis is flipped in the stitched image
+    return img_w - int((x - xmin) / MICRONS_PER_PIXEL)
+
+
+def um_to_px_y(y):
+    return int((y - ymin) / MICRONS_PER_PIXEL)
+
+
+fov_df["x_min"] = fov_df["fov_x"].apply(um_to_px_x)
+fov_df["x_max"] = fov_df["x_min"] + FRAME_SIZE_PIXELS
+fov_df["y_min"] = fov_df["fov_y"].apply(um_to_px_y)
+fov_df["y_max"] = fov_df["y_min"] + FRAME_SIZE_PIXELS
+
+# ─── Step 3: Stitch DAPI image ────────────────────────────────────────────────
+
+print(datetime.now() - t0, "Stitching DAPI image...", flush=True)
+
+# Precompute blending weight matrices for overlap regions
+w1 = np.zeros((OVERLAP_SIZE, FRAME_SIZE_PIXELS))
+w2 = np.zeros((OVERLAP_SIZE, FRAME_SIZE_PIXELS))
+for xi in range(FRAME_SIZE_PIXELS):
+    for yi in range(FRAME_SIZE_PIXELS):
+        if xi >= FRAME_SIZE_PIXELS - OVERLAP_SIZE:
+            w1[xi - (FRAME_SIZE_PIXELS - OVERLAP_SIZE), yi] = max(
+                0.00001, compute_weight([xi, yi], [FRAME_SIZE_PIXELS, FRAME_SIZE_PIXELS], PERCENT_SCALING)
+            )
+        if xi < OVERLAP_SIZE:
+            w2[xi, yi] = max(
+                0.00001, compute_weight([xi, yi], [FRAME_SIZE_PIXELS, FRAME_SIZE_PIXELS], PERCENT_SCALING)
+            )
+
+complete_img = np.zeros((n_z_planes, img_w, img_h), dtype="uint16")
+
+for fov in range(n_fovs):
+    if fov in missing_fovs:
+        continue
+
+    row = fov_df[fov_df["fov"] == fov].iloc[0]
+    xp, xp_max = int(row["x_min"]), int(row["x_max"])
+    yp, yp_max = int(row["y_min"]), int(row["y_max"])
+
+    fov_img = download_fov_image(fov, n_format, dapi_file_prefix, download_suffix, image_base_url, dapi_frames, "DAPI")
+
+    # Find neighbors for linear blending at overlaps
+    fov_right = fov_df[
+        (fov_df["x_min"] == xp) & (fov_df["y_min"] > yp) & (fov_df["y_min"] < yp_max)
+    ]["fov"].values
+    fov_bottom = fov_df[
+        (fov_df["y_min"] == yp) & (fov_df["x_min"] > xp) & (fov_df["x_min"] < xp_max)
+    ]["fov"].values
+    fov_br = fov_df[
+        (fov_df["y_min"] > yp) & (fov_df["y_min"] < yp_max) &
+        (fov_df["x_min"] > xp) & (fov_df["x_min"] < xp_max)
+    ]["fov"].values
+
+    right = len(fov_right) > 0 and fov_right[0] not in missing_fovs
+    bottom = len(fov_bottom) > 0 and fov_bottom[0] not in missing_fovs
+
+    if bottom:
+        bot_img = download_fov_image(int(fov_bottom[0]), n_format, dapi_file_prefix, download_suffix, image_base_url, dapi_frames, "DAPI")
+        fov_img[:, FRAME_SIZE_PIXELS - OVERLAP_SIZE:, :] = (
+            np.multiply(fov_img[:, FRAME_SIZE_PIXELS - OVERLAP_SIZE:, :], w1) +
+            np.multiply(bot_img[:, :OVERLAP_SIZE, :], w2)
+        ) / (w1 + w2)
+
+    if right:
+        right_img = download_fov_image(int(fov_right[0]), n_format, dapi_file_prefix, download_suffix, image_base_url, dapi_frames, "DAPI")
+        fov_img[:, :, FRAME_SIZE_PIXELS - OVERLAP_SIZE:] = (
+            np.multiply(fov_img[:, :, FRAME_SIZE_PIXELS - OVERLAP_SIZE:], w1.T) +
+            np.multiply(right_img[:, :, :OVERLAP_SIZE], w2.T)
+        ) / (w1.T + w2.T)
+
+    if len(fov_br) > 0 and fov_br[0] not in missing_fovs:
+        br_img = download_fov_image(int(fov_br[0]), n_format, dapi_file_prefix, download_suffix, image_base_url, dapi_frames, "DAPI")
+        sl_x = slice(FRAME_SIZE_PIXELS - OVERLAP_SIZE, None)
+        sl_y = slice(FRAME_SIZE_PIXELS - OVERLAP_SIZE, None)
+        if bottom and right:
+            fov_img[:, sl_x, sl_y] = np.divide(
+                np.multiply(fov_img[:, sl_x, sl_y], w1[:, FRAME_SIZE_PIXELS - OVERLAP_SIZE:]) +
+                np.multiply(right_img[:, sl_x, :OVERLAP_SIZE], w1[:, :OVERLAP_SIZE]) +
+                np.multiply(bot_img[:, :OVERLAP_SIZE, sl_y], w2[:, FRAME_SIZE_PIXELS - OVERLAP_SIZE:]) +
+                np.multiply(br_img[:, :OVERLAP_SIZE, :OVERLAP_SIZE], w2[:, :OVERLAP_SIZE]),
+                w1[:, FRAME_SIZE_PIXELS - OVERLAP_SIZE:] + w1[:, :OVERLAP_SIZE] +
+                w2[:, FRAME_SIZE_PIXELS - OVERLAP_SIZE:] + w2[:, :OVERLAP_SIZE],
+            )
+        elif bottom:
+            fov_img[:, sl_x, sl_y] = np.divide(
+                np.multiply(fov_img[:, sl_x, sl_y], w1[:, FRAME_SIZE_PIXELS - OVERLAP_SIZE:]) +
+                np.multiply(bot_img[:, :OVERLAP_SIZE, sl_y], w2[:, FRAME_SIZE_PIXELS - OVERLAP_SIZE:]) +
+                np.multiply(br_img[:, :OVERLAP_SIZE, :OVERLAP_SIZE], w2[:, :OVERLAP_SIZE]),
+                w1[:, FRAME_SIZE_PIXELS - OVERLAP_SIZE:] + w2[:, FRAME_SIZE_PIXELS - OVERLAP_SIZE:] + w2[:, :OVERLAP_SIZE],
+            )
+
+    # Skip overlap region already filled by a previously placed FOV
+    if np.any(complete_img[:, xp:xp_max, yp:yp + OVERLAP_SIZE] != 0):
+        fov_img = fov_img[:, :, OVERLAP_SIZE:]
+        yp += OVERLAP_SIZE
+    if np.any(complete_img[:, xp:xp + OVERLAP_SIZE, yp:yp_max - OVERLAP_SIZE] != 0):
+        fov_img = fov_img[:, OVERLAP_SIZE:, :]
+        xp += OVERLAP_SIZE
+
+    complete_img[:, xp:xp + fov_img.shape[1], yp:yp + fov_img.shape[2]] = fov_img
+
+    if fov % 10 == 0:
+        delete_dax_files()
+        print(datetime.now() - t0, f"Stitched {fov}/{n_fovs} FOVs", flush=True)
+
+print(datetime.now() - t0, "Image stitching complete", flush=True)
+
+dapi_mip = complete_img.max(axis=0)  # (img_w, img_h) max-projection over z
+del complete_img
+
+# ─── Step 4: Load and pixel-register transcripts ──────────────────────────────
+
+print(datetime.now() - t0, "Loading transcripts...", flush=True)
+
+spots_path = TMP_DIR / f"spots_{experiment_id}.csv"
+download_if_missing(
+    BASE_URL + f"processed_data/decoded_spots/{mouse}/spots_{experiment_id}.csv",
+    spots_path,
+)
+spots_df = pd.read_csv(spots_path)
+spots_df["x"] = spots_df["global_x"].apply(um_to_px_x).astype(float)
+spots_df["y"] = spots_df["global_y"].apply(um_to_px_y).astype(float)
+spots_df = spots_df.rename(columns={
+    "target_gene": "feature_name",
+    "barcode_id": "transcript_id",
+    "global_z": "z",
+})
+
+# ─── Step 5: Load and pixel-register cell boundaries ─────────────────────────
+
+print(datetime.now() - t0, "Loading cell boundaries...", flush=True)
+
+boundaries_path = TMP_DIR / f"{experiment_id}_boundaries.csv"
+download_if_missing(
+    BASE_URL + f"processed_data/cell_boundaries_updated/{mouse}/{experiment_id}.csv",
+    boundaries_path,
+)
+cell_df = pd.read_csv(boundaries_path)
+
+def parse_polygon_z0(row):
+    if not isinstance(row.get("boundaryX_z0"), str):
+        return None
+    xs = [float(v) for v in row["boundaryX_z0"].split(",")]
+    ys = [float(v) for v in row["boundaryY_z0"].split(",")]
+    return Polygon(zip([um_to_px_x(x) for x in xs], [um_to_px_y(y) for y in ys]))
+
+print(datetime.now() - t0, "Parsing cell boundary polygons (z=0)...", flush=True)
+cell_df["geometry"] = cell_df.apply(parse_polygon_z0, axis=1)
+cell_df = cell_df.dropna(subset=["geometry"])
+gdf = gpd.GeoDataFrame(cell_df[["Unnamed: 0", "geometry"]], geometry="geometry")
+gdf = gdf.rename(columns={"Unnamed: 0": "cell_id"}).set_index("cell_id")
+gdf.index = gdf.index.astype(str)
+
+# ─── Step 6: Assign transcripts to cells (spatial join) ───────────────────────
+
+print(datetime.now() - t0, "Assigning transcripts to cells...", flush=True)
+
+spots_gdf = gpd.GeoDataFrame(
+    spots_df,
+    geometry=gpd.points_from_xy(spots_df["x"], spots_df["y"]),
+)
+joined = gpd.sjoin(
+    spots_gdf,
+    gdf.reset_index()[["cell_id", "geometry"]],
+    how="left",
+    predicate="within",
+)
+spots_df["cell_id"] = joined["cell_id"].values
+print(
+    datetime.now() - t0,
+    f"Transcripts assigned: {spots_df['cell_id'].notna().sum()} / {len(spots_df)}",
+    flush=True,
+)
+
+# ─── Step 7: Build metadata AnnData table ─────────────────────────────────────
+
+print(datetime.now() - t0, "Loading cell type metadata...", flush=True)
+
+abca_id = MOUSE_TO_ABCA_ID[mouse]
+cell_meta_path = TMP_DIR / f"{mouse}_cell_metadata.csv"
+download_if_missing(
+    f"https://allen-brain-cell-atlas.s3-us-west-2.amazonaws.com/metadata/{abca_id}/{par['abca_version']}/views/cell_metadata_with_cluster_annotation.csv",
+    cell_meta_path,
+)
+cell_meta = pd.read_csv(cell_meta_path, index_col=0)
+cell_meta.index = cell_meta.index.astype(str)
+
+print(datetime.now() - t0, "Building metadata AnnData table...", flush=True)
+
+# Keep only cells present in both the count matrix and the boundary polygons
+gdf_cell_ids = set(gdf.index)
+adata = sample_adata[sample_adata.obs.index.isin(gdf_cell_ids)].copy()
+adata.obs.index = adata.obs.index.astype(str)
+
+# Add cell type annotations
+annotation_cols = ["cluster_alias", "class", "subclass", "supertype", "cluster"]
+adata.obs = adata.obs.join(cell_meta[annotation_cols], how="left")
+
+# Add spatial coordinates from cell boundary centroids
+centroids = {str(k): (v.centroid.x, v.centroid.y) for k, v in gdf.geometry.items()}
+adata.obsm["spatial"] = np.array([
+    centroids.get(str(idx), (np.nan, np.nan)) for idx in adata.obs.index
+])
+
+adata.obs["cell_id"] = adata.obs.index.astype(str)
+adata.obs["region"] = "cell_boundaries"
+
+adata.var["gene_ids"] = adata.var.index.astype(str)
+adata.var["feature_types"] = "Gene Expression"
+
+for key in ["dataset_id", "dataset_name", "dataset_url", "dataset_reference",
+            "dataset_summary", "dataset_description", "dataset_organism", "segmentation_id"]:
+    adata.uns[key] = par[key]
+
+# ─── Step 8: Build SpatialData object ─────────────────────────────────────────
+
+print(datetime.now() - t0, "Building SpatialData object...", flush=True)
+
+identity = sd.transformations.Identity()
+
+# Image: (c, x, y) — DAPI max-projection over z-planes
+image = Image2DModel.parse(
+    dapi_mip[np.newaxis, :, :],
+    dims=("c", "x", "y"),
+    c_coords=["DAPI"],
+    transformations={"global": identity},
+)
+
+# Transcripts
+transcript_cols = [c for c in ["x", "y", "z", "feature_name", "transcript_id", "cell_id"] if c in spots_df.columns]
+transcripts = PointsModel.parse(
+    spots_df[transcript_cols],
+    coordinates={"x": "x", "y": "y", "z": "z"} if "z" in spots_df.columns else {"x": "x", "y": "y"},
+    feature_key="feature_name",
+    instance_key="transcript_id",
+    transformations={"global": identity},
+)
+
+# Cell boundary polygons
+shapes = ShapesModel.parse(
+    gdf,
+    transformations={"global": identity},
+)
+
+# Cell x gene table annotated to cell_boundaries
+table = TableModel.parse(
+    adata,
+    region="cell_boundaries",
+    region_key="region",
+    instance_key="cell_id",
+)
+
+sdata = sd.SpatialData(
+    images={"image": image},
+    points={"transcripts": transcripts},
+    shapes={"cell_boundaries": shapes},
+    tables={"table": table},
+)
+
+# ─── Write ────────────────────────────────────────────────────────────────────
+
+print(datetime.now() - t0, f"Writing to {par['output']}...", flush=True)
+
+sdata.write(par["output"])
+
+if not par.get("keep_files"):
+    import shutil
+    shutil.rmtree(TMP_DIR)
+
+print(datetime.now() - t0, "Done", flush=True)
diff --git a/src/datasets/workflows/process_allen_brain_cell_atlas_merfish/config.vsh.yaml b/src/datasets/workflows/process_allen_brain_cell_atlas_merfish/config.vsh.yaml
new file mode 100644
index 000000000..70ff71804
--- /dev/null
+++ b/src/datasets/workflows/process_allen_brain_cell_atlas_merfish/config.vsh.yaml
@@ -0,0 +1,99 @@
+name: process_allen_brain_cell_atlas_merfish
+namespace: datasets/workflows
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - type: string
+        name: --mouse
+        description: "Mouse identifier. One of: mouse1_coronal, mouse2_coronal, mouse3_sagittal, mouse4_sagittal."
+        required: true
+        choices:
+          - mouse1_coronal
+          - mouse2_coronal
+          - mouse3_sagittal
+          - mouse4_sagittal
+      - type: string
+        name: --experiment_id
+        description: "Experiment identifier (brain section). Must match an entry in the experiment_metadata.csv for the selected mouse."
+        required: true
+      - type: string
+        name: --abca_version
+        description: "Version tag for the Allen Brain Cell Atlas cell metadata release."
+        required: false
+        default: "20231215"
+      - type: string
+        name: --segmentation_id
+        default: ["cell"]
+        description: The segmentation identifier.
+        multiple: true
+  - name: Metadata
+    arguments:
+      - type: string
+        name: --id
+        description: "A unique identifier for the dataset."
+        required: true
+      - name: --dataset_name
+        type: string
+        description: Nicely formatted name.
+        required: true
+      - type: string
+        name: --dataset_url
+        description: Link to the original source of the dataset.
+        required: false
+      - name: --dataset_reference
+        type: string
+        description: Bibtex reference of the paper in which the dataset was published.
+        required: false
+      - name: --dataset_summary
+        type: string
+        description: Short description of the dataset.
+        required: true
+      - name: --dataset_description
+        type: string
+        description: Long description of the dataset.
+        required: true
+      - name: --dataset_organism
+        type: string
+        description: The organism of the sample in the dataset.
+        required: false
+  - name: Crop region
+    description: If provided, the dataset will be cropped to the specified pixel coordinates.
+    arguments:
+      - type: integer
+        name: --crop_region_min_x
+        required: false
+        description: The minimum x coordinate of the region to crop.
+      - type: integer
+        name: --crop_region_max_x
+        required: false
+        description: The maximum x coordinate of the region to crop.
+      - type: integer
+        name: --crop_region_min_y
+        required: false
+        description: The minimum y coordinate of the region to crop.
+      - type: integer
+        name: --crop_region_max_y
+        required: false
+        description: The maximum y coordinate of the region to crop.
+  - name: Outputs
+    arguments:
+      - name: "--output_dataset"
+        __merge__: /src/api/file_common_ist.yaml
+        direction: output
+        required: true
+        default: "$id/dataset.zarr"
+
+resources:
+  - type: nextflow_script
+    path: main.nf
+    entrypoint: run_wf
+
+dependencies:
+  - name: datasets/loaders/allen_brain_cell_atlas_merfish
+  - name: datasets/processors/crop_region
+
+runners:
+  - type: nextflow
+    directives:
+      label: [veryhighmem, midcpu, hightime]
diff --git a/src/datasets/workflows/process_allen_brain_cell_atlas_merfish/main.nf b/src/datasets/workflows/process_allen_brain_cell_atlas_merfish/main.nf
new file mode 100644
index 000000000..155e98eaf
--- /dev/null
+++ b/src/datasets/workflows/process_allen_brain_cell_atlas_merfish/main.nf
@@ -0,0 +1,53 @@
+workflow auto {
+  findStates(params, meta.config)
+    | meta.workflow.run(
+      auto: [publish: "state"]
+    )
+}
+
+workflow run_wf {
+  take:
+  input_ch
+
+  main:
+  output_ch = input_ch
+
+    | map { id, state ->
+      def new_state = state + [dataset_id: id]
+      [id, new_state]
+    }
+
+    | allen_brain_cell_atlas_merfish.run(
+      fromState: [
+        "mouse",
+        "experiment_id",
+        "abca_version",
+        "segmentation_id",
+        "dataset_id",
+        "dataset_name",
+        "dataset_url",
+        "dataset_reference",
+        "dataset_summary",
+        "dataset_description",
+        "dataset_organism",
+      ],
+      toState: ["output"]
+    )
+
+    | crop_region.run(
+      runIf: { id, state -> state.crop_region_min_x },
+      fromState: [
+        "input": "output",
+        "min_x": "crop_region_min_x",
+        "min_y": "crop_region_min_y",
+        "max_x": "crop_region_max_x",
+        "max_y": "crop_region_max_y"
+      ],
+      toState: ["output"]
+    )
+
+    | setState([output_dataset: "output"])
+
+  emit:
+  output_ch
+}