Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 25 additions & 7 deletions docs/source/reference/geotiff.rst
Original file line number Diff line number Diff line change
Expand Up @@ -270,13 +270,31 @@ ambiguous when another raster backend (e.g. rioxarray's ``rasterio``) is
installed and also claims those extensions; xarray then raises and asks
for an explicit ``engine=``.

The engine forwards to the standalone ``open_geotiff`` function, so the
coregistered-read options (``coregister``, ``auto_reproject``,
``resampling``) are *not* available through it; they live on the
``.xrs.open_geotiff`` accessor because they reproject and resample onto a
target array's grid, and the engine opens a single source from scratch
with no target. Passing them through ``backend_kwargs`` raises
``TypeError``. Use the accessor on the target array instead, e.g.
By default the engine forwards to the standalone ``open_geotiff``
function, which opens a single source from scratch with no target grid.
The coregistered-read options (``coregister``, ``auto_reproject``,
``resampling``) reproject and resample onto a target array's grid, so
they need that target. Pass it as a ``like=`` backend kwarg (a DataArray
or Dataset); the engine then routes through ``like``'s
``.xrs.open_geotiff`` accessor:

.. code-block:: python

xr.open_dataset(
"scene.tif", engine="xrspatial",
backend_kwargs={"like": target, "coregister": True,
"auto_reproject": True})

When ``like`` is a Dataset, the ``var=`` backend kwarg picks the variable
used for backend/CRS inference. ``open_mfdataset`` with a shared ``like=``
coregisters every source onto the same grid in one call. The returned
variable is named by the same ``default_name`` / source-stem rule as the
plain engine path, and the accessor's GPU / ``.vrt`` / ``allow_rotated``
rejections apply through the engine too.

Passing ``coregister`` / ``auto_reproject`` / ``resampling`` / ``var``
*without* a ``like=`` raises ``ValueError`` pointing at ``like=``. The
accessor on the target array remains available directly, e.g.
``target.xrs.open_geotiff("scene.tif", coregister=True)``.

Coregistered reads (experimental)
Expand Down
51 changes: 49 additions & 2 deletions xrspatial/geotiff/_xarray_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,41 @@
a dask-backed dataset (xarray wraps the eager read)::

xr.open_dataset("dem.tif", engine="xrspatial", chunks={})

Coregistered reads (``coregister`` / ``auto_reproject`` / ``resampling``)
reproject and resample a source onto an existing array's grid, so they
need a target grid that the plain ``open_dataset`` path does not have.
Pass that target as a ``like=`` backend kwarg (a DataArray or Dataset);
the engine then routes to the ``.xrs.open_geotiff`` accessor on ``like``
instead of the standalone reader::

xr.open_dataset(
"scene.tif", engine="xrspatial",
backend_kwargs={"like": target, "coregister": True,
"auto_reproject": True},
)

``coregister`` / ``auto_reproject`` / ``resampling`` / ``var`` without a
``like=`` raise ``ValueError`` pointing at it, rather than the opaque
``TypeError`` the standalone reader would emit for the unknown kwarg.
"""
from __future__ import annotations

import os

import xarray as xr
from xarray.backends import BackendEntrypoint

# Name for the one data variable when ``open_geotiff`` cannot derive one
# from the source (e.g. an in-memory file-like object with no path).
_DEFAULT_VARIABLE_NAME = "band_data"

# Backend kwargs only the coregistered-read path (``.xrs.open_geotiff`` on
# ``like``) understands. Supplied without ``like=`` they would reach the
# standalone reader and raise an opaque ``TypeError``; the engine raises a
# pointed ``ValueError`` instead.
_COREGISTER_ONLY_KWARGS = ("coregister", "auto_reproject", "resampling", "var")

# Extensions ``guess_can_open`` claims so ``xr.open_dataset`` /
# ``open_mfdataset`` can auto-select this engine without ``engine=``.
_SUPPORTED_EXTENSIONS = (".tif", ".tiff", ".vrt")
Expand Down Expand Up @@ -67,13 +91,36 @@ class GeoTIFFBackendEntrypoint(BackendEntrypoint):
# ``backend_kwargs`` instead.
open_dataset_parameters = ("filename_or_obj", "drop_variables")

def open_dataset(self, filename_or_obj, *, drop_variables=None, **kwargs):
def open_dataset(self, filename_or_obj, *, drop_variables=None,
like=None, **kwargs):
# Imported here rather than at module scope so importing this
# backend module stays cheap; the heavy reader package only loads
# when a source is actually opened.
from . import open_geotiff

da = open_geotiff(filename_or_obj, **kwargs)
if like is not None:
if not isinstance(like, (xr.DataArray, xr.Dataset)):
raise TypeError(
"'like=' must be an xarray DataArray or Dataset whose "
"grid the read coregisters onto, got "
f"{type(like).__name__}."
)
# Importing the accessor module registers the ``.xrs``
# accessor that carries the coregistered-read path; ``like``
# may be a DataArray or a Dataset and the accessor dispatches
# on its type (Datasets also honour the ``var=`` kwarg).
from .. import accessor # noqa: F401
da = like.xrs.open_geotiff(filename_or_obj, **kwargs)
else:
offending = [k for k in _COREGISTER_ONLY_KWARGS if k in kwargs]
if offending:
raise ValueError(
f"{', '.join(offending)} only apply when reading onto a "
"target grid, so they need a target. Pass it as a "
"'like=' backend kwarg (a DataArray or Dataset), e.g. "
"backend_kwargs={'like': target, 'coregister': True}."
)
da = open_geotiff(filename_or_obj, **kwargs)
name = da.name if da.name is not None else _DEFAULT_VARIABLE_NAME
ds = da.to_dataset(name=name)
if drop_variables is not None:
Expand Down
232 changes: 232 additions & 0 deletions xrspatial/geotiff/tests/test_xarray_backend_coregister_3376.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
"""Coregistered reads through the xarray backend engine (issue #3376).

The ``xrspatial`` engine forwards to the standalone ``open_geotiff`` by
default, which has no target grid. Passing ``like=`` (a DataArray or
Dataset) routes the read through that object's ``.xrs.open_geotiff``
accessor instead, so ``coregister`` / ``auto_reproject`` / ``resampling``
become available through the standard ``xr.open_dataset`` API. These
tests pin that routing, the parity with the accessor, the variable
naming, the ``open_mfdataset`` composition, and the guard that turns the
opaque ``TypeError`` (from the standalone reader) into a pointed
``ValueError`` when a coregister kwarg arrives without ``like=``.
"""
from __future__ import annotations

import numpy as np
import pytest
import xarray as xr

from xrspatial.geotiff import to_geotiff
from xrspatial.geotiff._xarray_backend import GeoTIFFBackendEntrypoint


def _file_4326(tmp_path, name, nodata=None):
"""Write a 30x30 EPSG:4326 GeoTIFF; return its path."""
height, width = 30, 30
arr = np.arange(height * width, dtype=np.float32).reshape(height, width)
y = np.linspace(45.5, 44.5, height)
x = np.linspace(-120.5, -119.5, width)
attrs = {"crs": 4326}
if nodata is not None:
attrs["nodata"] = nodata
arr[14:17, 14:17] = nodata
da = xr.DataArray(arr, dims=["y", "x"],
coords={"y": y, "x": x}, attrs=attrs)
path = str(tmp_path / name)
to_geotiff(da, path, compression="none")
return path


def _template_4326(n=5):
"""A coarser, offset same-CRS grid inside the file footprint."""
return xr.DataArray(
np.zeros((n, n), dtype=np.float32),
dims=["y", "x"],
coords={"y": np.linspace(45.3, 44.7, n),
"x": np.linspace(-120.3, -119.7, n)},
attrs={"crs": 4326},
)


def _template_3857(n=6):
"""A grid in a different CRS overlapping the file footprint."""
from pyproj import Transformer
tr = Transformer.from_crs(4326, 3857, always_xy=True)
x0, y0 = tr.transform(-120.25, 45.25)
x1, y1 = tr.transform(-119.75, 44.75)
return xr.DataArray(
np.zeros((n, n), dtype=np.float32),
dims=["y", "x"],
coords={"y": np.linspace(max(y0, y1), min(y0, y1), n),
"x": np.linspace(min(x0, x1), max(x0, x1), n)},
attrs={"crs": 3857},
)


# ---------------------------------------------------------------------------
# like= routes to the coregister path
# ---------------------------------------------------------------------------

def test_coregister_via_engine_matches_template_grid(tmp_path):
path = _file_4326(tmp_path, "cg_3376_same.tif")
template = _template_4326(5)
ds = xr.open_dataset(
path, engine=GeoTIFFBackendEntrypoint,
backend_kwargs={"like": template, "coregister": True},
)
assert isinstance(ds, xr.Dataset)
var = ds[list(ds.data_vars)[0]]
assert var.shape == template.shape
assert np.allclose(var.coords["x"].values, template.coords["x"].values)
assert np.allclose(var.coords["y"].values, template.coords["y"].values)


def test_coregister_via_engine_matches_accessor(tmp_path):
# The engine must produce exactly what the accessor produces; the
# engine only promotes the DataArray to a one-variable Dataset.
path = _file_4326(tmp_path, "cg_3376_parity.tif")
template = _template_4326(5)
ds = xr.open_dataset(
path, engine=GeoTIFFBackendEntrypoint,
backend_kwargs={"like": template, "coregister": True},
)
accessor_da = template.xrs.open_geotiff(path, coregister=True)
engine_da = ds[list(ds.data_vars)[0]]
np.testing.assert_array_equal(engine_da.values, accessor_da.values)
for coord in accessor_da.coords:
np.testing.assert_array_equal(
engine_da[coord].values, accessor_da[coord].values)


def test_coregister_via_engine_crs_mismatch(tmp_path):
path = _file_4326(tmp_path, "cg_3376_mismatch.tif")
template = _template_3857(6)
ds = xr.open_dataset(
path, engine=GeoTIFFBackendEntrypoint,
backend_kwargs={"like": template, "coregister": True},
)
var = ds[list(ds.data_vars)[0]]
assert var.shape == template.shape
assert np.allclose(var.coords["x"].values, template.coords["x"].values)
assert np.allclose(var.coords["y"].values, template.coords["y"].values)


def test_auto_reproject_via_engine(tmp_path):
# auto_reproject (no coregister) keeps the file resolution but still
# needs the target's bbox/CRS, so it routes through like= too.
path = _file_4326(tmp_path, "ar_3376.tif")
template = _template_3857(6)
ds = xr.open_dataset(
path, engine=GeoTIFFBackendEntrypoint,
backend_kwargs={"like": template, "auto_reproject": True},
)
accessor_da = template.xrs.open_geotiff(path, auto_reproject=True)
engine_da = ds[list(ds.data_vars)[0]]
np.testing.assert_array_equal(engine_da.values, accessor_da.values)


def test_dataset_like_with_var(tmp_path):
# A Dataset target dispatches to the Dataset accessor, which honours
# the var= kwarg for backend/CRS inference.
path = _file_4326(tmp_path, "cg_3376_dsvar.tif")
template = _template_4326(5).to_dataset(name="elev")
ds = xr.open_dataset(
path, engine=GeoTIFFBackendEntrypoint,
backend_kwargs={"like": template, "coregister": True, "var": "elev"},
)
accessor_da = template.xrs.open_geotiff(path, coregister=True, var="elev")
engine_da = ds[list(ds.data_vars)[0]]
np.testing.assert_array_equal(engine_da.values, accessor_da.values)


# ---------------------------------------------------------------------------
# Variable naming follows the same default_name / stem rule
# ---------------------------------------------------------------------------

def test_coregister_variable_name_follows_stem(tmp_path):
path = _file_4326(tmp_path, "cg_3376_stem.tif")
template = _template_4326(5)
ds = xr.open_dataset(
path, engine=GeoTIFFBackendEntrypoint,
backend_kwargs={"like": template, "coregister": True},
)
assert "cg_3376_stem" in ds.data_vars


def test_coregister_default_name_renames_variable(tmp_path):
path = _file_4326(tmp_path, "cg_3376_rename.tif")
template = _template_4326(5)
ds = xr.open_dataset(
path, engine=GeoTIFFBackendEntrypoint,
backend_kwargs={"like": template, "coregister": True,
"default_name": "elevation"},
)
assert "elevation" in ds.data_vars


# ---------------------------------------------------------------------------
# open_mfdataset composes onto one shared grid
# ---------------------------------------------------------------------------

def test_open_mfdataset_coregisters_onto_shared_grid(tmp_path):
pytest.importorskip("dask")
template = _template_4326(5)
paths = [
_file_4326(tmp_path, f"mf_3376_{i}.tif")
for i in range(2)
]
ds = xr.open_mfdataset(
paths, engine=GeoTIFFBackendEntrypoint,
combine="nested", concat_dim="tile",
backend_kwargs={"like": template, "coregister": True,
"default_name": "band_data"},
)
assert list(ds.data_vars) == ["band_data"]
assert ds.sizes["tile"] == 2
# Every tile sits on the template grid.
assert np.allclose(ds.coords["x"].values, template.coords["x"].values)
assert np.allclose(ds.coords["y"].values, template.coords["y"].values)


# ---------------------------------------------------------------------------
# Guard: coregister kwargs without like= raise a pointed ValueError
# ---------------------------------------------------------------------------

@pytest.mark.parametrize("kwargs", [
{"coregister": True},
{"auto_reproject": True},
{"resampling": "bilinear"},
{"var": "elev"},
])
def test_coregister_kwarg_without_like_raises(tmp_path, kwargs):
path = _file_4326(tmp_path, "cg_3376_nolike.tif")
with pytest.raises(ValueError, match="like="):
xr.open_dataset(path, engine=GeoTIFFBackendEntrypoint,
backend_kwargs=kwargs)


def test_non_array_like_raises_typeerror(tmp_path):
# A path or other non-array passed as like= would otherwise blow up on
# `.xrs` with an opaque AttributeError; the guard names the right type.
path = _file_4326(tmp_path, "cg_3376_badlike.tif")
with pytest.raises(TypeError, match="DataArray or Dataset"):
xr.open_dataset(path, engine=GeoTIFFBackendEntrypoint,
backend_kwargs={"like": "not_an_array",
"coregister": True})


# ---------------------------------------------------------------------------
# GPU / .vrt rejections are inherited from the accessor path
# ---------------------------------------------------------------------------

def test_coregister_gpu_rejected_through_engine(tmp_path):
# The unpack-and-reproject pipeline is CPU-only; the accessor raises on
# gpu=True, and the engine inherits that rejection for free.
path = _file_4326(tmp_path, "cg_3376_gpu.tif")
template = _template_4326(5)
with pytest.raises(ValueError, match="CPU only"):
xr.open_dataset(
path, engine=GeoTIFFBackendEntrypoint,
backend_kwargs={"like": template, "coregister": True,
"gpu": True},
)
Loading