diff --git a/docs/source/reference/geotiff.rst b/docs/source/reference/geotiff.rst index 4bab63ab0..14660bbef 100644 --- a/docs/source/reference/geotiff.rst +++ b/docs/source/reference/geotiff.rst @@ -272,30 +272,34 @@ for an explicit ``engine=``. By default the engine forwards to the standalone ``open_geotiff`` function, which opens a single source from scratch with no target grid. -The coregistered-read options (``coregister``, ``auto_reproject``, -``resampling``) reproject and resample onto a target array's grid, so -they need that target. Pass it as a ``like=`` backend kwarg (a DataArray -or Dataset); the engine then routes through ``like``'s -``.xrs.open_geotiff`` accessor: +A reprojecting read needs a target grid, so pass it as the *value* of the +mode you want -- a DataArray or Dataset. ``coregister=target`` reprojects +and resamples onto ``target``'s exact grid; ``auto_reproject=target`` +reprojects onto ``target``'s CRS but keeps the file's native resolution. +The engine routes through the target's ``.xrs.open_geotiff`` accessor: .. code-block:: python - xr.open_dataset( - "scene.tif", engine="xrspatial", - backend_kwargs={"like": target, "coregister": True, - "auto_reproject": True}) - -When ``like`` is a Dataset, the ``var=`` backend kwarg picks the variable -used for backend/CRS inference. ``open_mfdataset`` with a shared ``like=`` -coregisters every source onto the same grid in one call. The returned -variable is named by the same ``default_name`` / source-stem rule as the -plain engine path, and the accessor's GPU / ``.vrt`` / ``allow_rotated`` -rejections apply through the engine too. - -Passing ``coregister`` / ``auto_reproject`` / ``resampling`` / ``var`` -*without* a ``like=`` raises ``ValueError`` pointing at ``like=``. The -accessor on the target array remains available directly, e.g. -``target.xrs.open_geotiff("scene.tif", coregister=True)``. + xr.open_dataset("scene.tif", engine="xrspatial", + backend_kwargs={"coregister": target}) + + xr.open_dataset("scene.tif", engine="xrspatial", + backend_kwargs={"auto_reproject": target}) + +When the target is a Dataset, the ``var=`` backend kwarg picks the +variable used for backend/CRS inference, and ``resampling=`` sets the +resample mode; both are modifiers of the reprojecting read. +``open_mfdataset`` with a shared ``coregister=`` target snaps every source +onto the same grid in one call. The returned variable is named by the same +``default_name`` / source-stem rule as the plain engine path, and the +accessor's GPU / ``.vrt`` / ``allow_rotated`` rejections apply through the +engine too. + +A bare ``coregister=True`` / ``auto_reproject=True`` (no grid), a lone +``resampling`` / ``var``, a target on both ``coregister`` and +``auto_reproject``, or the removed ``like=`` kwarg each raise a pointed +``ValueError``. The accessor on the target array remains available +directly, e.g. ``target.xrs.open_geotiff("scene.tif", coregister=True)``. Coregistered reads (experimental) ================================= diff --git a/xrspatial/geotiff/_xarray_backend.py b/xrspatial/geotiff/_xarray_backend.py index 7ab66df59..370fff92d 100644 --- a/xrspatial/geotiff/_xarray_backend.py +++ b/xrspatial/geotiff/_xarray_backend.py @@ -30,22 +30,29 @@ xr.open_dataset("dem.tif", engine="xrspatial", chunks={}) -Coregistered reads (``coregister`` / ``auto_reproject`` / ``resampling``) -reproject and resample a source onto an existing array's grid, so they -need a target grid that the plain ``open_dataset`` path does not have. -Pass that target as a ``like=`` backend kwarg (a DataArray or Dataset); -the engine then routes to the ``.xrs.open_geotiff`` accessor on ``like`` -instead of the standalone reader:: +Reprojecting reads need a target grid that the plain ``open_dataset`` +path does not have, so pass the target as the *value* of the mode you +want -- a DataArray or Dataset. ``coregister=target`` reprojects and +resamples the source onto ``target``'s exact grid; ``auto_reproject= +target`` reprojects onto ``target``'s CRS but keeps the file's native +resolution. The engine routes the read through the target's +``.xrs.open_geotiff`` accessor:: xr.open_dataset( "scene.tif", engine="xrspatial", - backend_kwargs={"like": target, "coregister": True, - "auto_reproject": True}, + backend_kwargs={"coregister": target}, ) -``coregister`` / ``auto_reproject`` / ``resampling`` / ``var`` without a -``like=`` raise ``ValueError`` pointing at it, rather than the opaque -``TypeError`` the standalone reader would emit for the unknown kwarg. + xr.open_dataset( + "scene.tif", engine="xrspatial", + backend_kwargs={"auto_reproject": target}, + ) + +``resampling`` / ``var`` are modifiers of those reads, so they need a +target too. A bare ``coregister=True`` / ``auto_reproject=True`` (no +grid), a lone ``resampling`` / ``var``, passing a target on both +``coregister`` and ``auto_reproject``, or the removed ``like=`` kwarg +each raise a pointed ``ValueError``. """ from __future__ import annotations @@ -58,10 +65,10 @@ # from the source (e.g. an in-memory file-like object with no path). _DEFAULT_VARIABLE_NAME = "band_data" -# Backend kwargs only the coregistered-read path (``.xrs.open_geotiff`` on -# ``like``) understands. Supplied without ``like=`` they would reach the -# standalone reader and raise an opaque ``TypeError``; the engine raises a -# pointed ``ValueError`` instead. +# Backend kwargs only the reprojecting-read path (``.xrs.open_geotiff`` on +# the target) understands. Supplied without a target grid they would reach +# the standalone reader and raise an opaque ``TypeError``; the engine +# raises a pointed ``ValueError`` instead. _COREGISTER_ONLY_KWARGS = ("coregister", "auto_reproject", "resampling", "var") # Extensions ``guess_can_open`` claims so ``xr.open_dataset`` / @@ -91,35 +98,59 @@ class GeoTIFFBackendEntrypoint(BackendEntrypoint): # ``backend_kwargs`` instead. open_dataset_parameters = ("filename_or_obj", "drop_variables") - def open_dataset(self, filename_or_obj, *, drop_variables=None, - like=None, **kwargs): + def open_dataset(self, filename_or_obj, *, drop_variables=None, **kwargs): # Imported here rather than at module scope so importing this # backend module stays cheap; the heavy reader package only loads # when a source is actually opened. from . import open_geotiff - if like is not None: - if not isinstance(like, (xr.DataArray, xr.Dataset)): - raise TypeError( - "'like=' must be an xarray DataArray or Dataset whose " - "grid the read coregisters onto, got " - f"{type(like).__name__}." - ) - # Importing the accessor module registers the ``.xrs`` - # accessor that carries the coregistered-read path; ``like`` - # may be a DataArray or a Dataset and the accessor dispatches - # on its type (Datasets also honour the ``var=`` kwarg). + if 'like' in kwargs: + raise ValueError( + "the 'like=' backend kwarg was removed; pass the target grid " + "as the value of 'coregister' or 'auto_reproject', e.g. " + "backend_kwargs={'coregister': target}." + ) + + # The target grid rides on the mode parameter's value: a DataArray + # or Dataset on ``coregister`` / ``auto_reproject`` is the grid to + # read onto. An xarray object is ambiguous in a boolean context, so + # detect the array form here and hand the accessor a plain ``True``; + # the accessor's bool-based logic stays untouched. + cg = kwargs.get('coregister') + ar = kwargs.get('auto_reproject') + cg_target = cg if isinstance(cg, (xr.DataArray, xr.Dataset)) else None + ar_target = ar if isinstance(ar, (xr.DataArray, xr.Dataset)) else None + if cg_target is not None and ar_target is not None: + raise ValueError( + "pass the target grid on exactly one of 'coregister' or " + "'auto_reproject', not both." + ) + target = cg_target if cg_target is not None else ar_target + + if target is not None: + # Importing the accessor module registers the ``.xrs`` accessor + # that carries the reprojecting-read path; the target may be a + # DataArray or a Dataset and the accessor dispatches on its type + # (Datasets also honour the ``var=`` kwarg). from .. import accessor # noqa: F401 - da = like.xrs.open_geotiff(filename_or_obj, **kwargs) + kwargs['coregister' if cg_target is not None + else 'auto_reproject'] = True + da = target.xrs.open_geotiff(filename_or_obj, **kwargs) else: - offending = [k for k in _COREGISTER_ONLY_KWARGS if k in kwargs] + # No target grid: a truthy ``coregister`` / ``auto_reproject`` or + # a lone ``resampling`` / ``var`` cannot run. + offending = [k for k in _COREGISTER_ONLY_KWARGS if kwargs.get(k)] if offending: raise ValueError( - f"{', '.join(offending)} only apply when reading onto a " - "target grid, so they need a target. Pass it as a " - "'like=' backend kwarg (a DataArray or Dataset), e.g. " - "backend_kwargs={'like': target, 'coregister': True}." + f"{', '.join(offending)} need a target grid. Pass it as " + "the value of 'coregister' or 'auto_reproject', e.g. " + "backend_kwargs={'coregister': target}." ) + # A falsy mode flag (e.g. ``coregister=False``) just means a plain + # read; the standalone reader does not take these kwargs, so drop + # them rather than leak an opaque ``TypeError``. + for k in _COREGISTER_ONLY_KWARGS: + kwargs.pop(k, None) da = open_geotiff(filename_or_obj, **kwargs) name = da.name if da.name is not None else _DEFAULT_VARIABLE_NAME ds = da.to_dataset(name=name) diff --git a/xrspatial/geotiff/tests/test_xarray_backend_coregister_3376.py b/xrspatial/geotiff/tests/test_xarray_backend_coregister_3376.py index 4be2026b5..b74a3ed15 100644 --- a/xrspatial/geotiff/tests/test_xarray_backend_coregister_3376.py +++ b/xrspatial/geotiff/tests/test_xarray_backend_coregister_3376.py @@ -1,14 +1,14 @@ -"""Coregistered reads through the xarray backend engine (issue #3376). +"""Reprojecting reads through the xarray backend engine (issue #3376). The ``xrspatial`` engine forwards to the standalone ``open_geotiff`` by -default, which has no target grid. Passing ``like=`` (a DataArray or -Dataset) routes the read through that object's ``.xrs.open_geotiff`` -accessor instead, so ``coregister`` / ``auto_reproject`` / ``resampling`` -become available through the standard ``xr.open_dataset`` API. These -tests pin that routing, the parity with the accessor, the variable -naming, the ``open_mfdataset`` composition, and the guard that turns the -opaque ``TypeError`` (from the standalone reader) into a pointed -``ValueError`` when a coregister kwarg arrives without ``like=``. +default, which has no target grid. Passing the target as the value of +``coregister`` or ``auto_reproject`` (a DataArray or Dataset) routes the +read through that object's ``.xrs.open_geotiff`` accessor instead, so the +reprojecting reads become available through the standard +``xr.open_dataset`` API. These tests pin that routing, the parity with +the accessor, the variable naming, the ``open_mfdataset`` composition, +and the guards that turn an opaque error into a pointed ``ValueError`` +when a reprojecting kwarg arrives without a target. """ from __future__ import annotations @@ -64,7 +64,7 @@ def _template_3857(n=6): # --------------------------------------------------------------------------- -# like= routes to the coregister path +# a target on coregister=/auto_reproject= routes to the reprojecting path # --------------------------------------------------------------------------- def test_coregister_via_engine_matches_template_grid(tmp_path): @@ -72,7 +72,7 @@ def test_coregister_via_engine_matches_template_grid(tmp_path): template = _template_4326(5) ds = xr.open_dataset( path, engine=GeoTIFFBackendEntrypoint, - backend_kwargs={"like": template, "coregister": True}, + backend_kwargs={"coregister": template}, ) assert isinstance(ds, xr.Dataset) var = ds[list(ds.data_vars)[0]] @@ -88,7 +88,7 @@ def test_coregister_via_engine_matches_accessor(tmp_path): template = _template_4326(5) ds = xr.open_dataset( path, engine=GeoTIFFBackendEntrypoint, - backend_kwargs={"like": template, "coregister": True}, + backend_kwargs={"coregister": template}, ) accessor_da = template.xrs.open_geotiff(path, coregister=True) engine_da = ds[list(ds.data_vars)[0]] @@ -103,7 +103,7 @@ def test_coregister_via_engine_crs_mismatch(tmp_path): template = _template_3857(6) ds = xr.open_dataset( path, engine=GeoTIFFBackendEntrypoint, - backend_kwargs={"like": template, "coregister": True}, + backend_kwargs={"coregister": template}, ) var = ds[list(ds.data_vars)[0]] assert var.shape == template.shape @@ -112,27 +112,27 @@ def test_coregister_via_engine_crs_mismatch(tmp_path): def test_auto_reproject_via_engine(tmp_path): - # auto_reproject (no coregister) keeps the file resolution but still - # needs the target's bbox/CRS, so it routes through like= too. + # auto_reproject keeps the file resolution but still needs the target's + # bbox/CRS, so the target rides on auto_reproject= itself. path = _file_4326(tmp_path, "ar_3376.tif") template = _template_3857(6) ds = xr.open_dataset( path, engine=GeoTIFFBackendEntrypoint, - backend_kwargs={"like": template, "auto_reproject": True}, + backend_kwargs={"auto_reproject": template}, ) accessor_da = template.xrs.open_geotiff(path, auto_reproject=True) engine_da = ds[list(ds.data_vars)[0]] np.testing.assert_array_equal(engine_da.values, accessor_da.values) -def test_dataset_like_with_var(tmp_path): +def test_dataset_target_with_var(tmp_path): # A Dataset target dispatches to the Dataset accessor, which honours # the var= kwarg for backend/CRS inference. path = _file_4326(tmp_path, "cg_3376_dsvar.tif") template = _template_4326(5).to_dataset(name="elev") ds = xr.open_dataset( path, engine=GeoTIFFBackendEntrypoint, - backend_kwargs={"like": template, "coregister": True, "var": "elev"}, + backend_kwargs={"coregister": template, "var": "elev"}, ) accessor_da = template.xrs.open_geotiff(path, coregister=True, var="elev") engine_da = ds[list(ds.data_vars)[0]] @@ -148,7 +148,7 @@ def test_coregister_variable_name_follows_stem(tmp_path): template = _template_4326(5) ds = xr.open_dataset( path, engine=GeoTIFFBackendEntrypoint, - backend_kwargs={"like": template, "coregister": True}, + backend_kwargs={"coregister": template}, ) assert "cg_3376_stem" in ds.data_vars @@ -158,7 +158,7 @@ def test_coregister_default_name_renames_variable(tmp_path): template = _template_4326(5) ds = xr.open_dataset( path, engine=GeoTIFFBackendEntrypoint, - backend_kwargs={"like": template, "coregister": True, + backend_kwargs={"coregister": template, "default_name": "elevation"}, ) assert "elevation" in ds.data_vars @@ -178,7 +178,7 @@ def test_open_mfdataset_coregisters_onto_shared_grid(tmp_path): ds = xr.open_mfdataset( paths, engine=GeoTIFFBackendEntrypoint, combine="nested", concat_dim="tile", - backend_kwargs={"like": template, "coregister": True, + backend_kwargs={"coregister": template, "default_name": "band_data"}, ) assert list(ds.data_vars) == ["band_data"] @@ -189,7 +189,7 @@ def test_open_mfdataset_coregisters_onto_shared_grid(tmp_path): # --------------------------------------------------------------------------- -# Guard: coregister kwargs without like= raise a pointed ValueError +# Guard: reprojecting kwargs without a target raise a pointed ValueError # --------------------------------------------------------------------------- @pytest.mark.parametrize("kwargs", [ @@ -198,21 +198,55 @@ def test_open_mfdataset_coregisters_onto_shared_grid(tmp_path): {"resampling": "bilinear"}, {"var": "elev"}, ]) -def test_coregister_kwarg_without_like_raises(tmp_path, kwargs): - path = _file_4326(tmp_path, "cg_3376_nolike.tif") - with pytest.raises(ValueError, match="like="): +def test_reprojecting_kwarg_without_target_raises(tmp_path, kwargs): + # A bare bool mode or a lone modifier has no grid to read onto. + path = _file_4326(tmp_path, "cg_3376_notarget.tif") + with pytest.raises(ValueError, match="target grid"): xr.open_dataset(path, engine=GeoTIFFBackendEntrypoint, backend_kwargs=kwargs) -def test_non_array_like_raises_typeerror(tmp_path): - # A path or other non-array passed as like= would otherwise blow up on - # `.xrs` with an opaque AttributeError; the guard names the right type. - path = _file_4326(tmp_path, "cg_3376_badlike.tif") - with pytest.raises(TypeError, match="DataArray or Dataset"): +def test_like_kwarg_removed_raises(tmp_path): + # like= was removed; the engine names the replacement instead of letting + # the standalone reader raise an opaque TypeError. + path = _file_4326(tmp_path, "cg_3376_like.tif") + template = _template_4326(5) + with pytest.raises(ValueError, match="'like='.*removed"): + xr.open_dataset(path, engine=GeoTIFFBackendEntrypoint, + backend_kwargs={"like": template}) + + +@pytest.mark.parametrize("flag", ["coregister", "auto_reproject"]) +def test_falsy_mode_flag_alone_reads_plain(tmp_path, flag): + # A falsy mode flag means "no reprojecting read"; it must not leak into + # the standalone reader (which has no such kwarg) as an opaque TypeError. + path = _file_4326(tmp_path, f"cg_3376_falsy_{flag}.tif") + ds = xr.open_dataset(path, engine=GeoTIFFBackendEntrypoint, + backend_kwargs={flag: False}) + plain = xr.open_dataset(path, engine=GeoTIFFBackendEntrypoint) + a = ds[list(ds.data_vars)[0]] + b = plain[list(plain.data_vars)[0]] + assert a.shape == b.shape + np.testing.assert_array_equal(a.values, b.values) + + +def test_target_on_both_modes_raises(tmp_path): + # A grid on both coregister= and auto_reproject= is ambiguous. + path = _file_4326(tmp_path, "cg_3376_both.tif") + template = _template_4326(5) + with pytest.raises(ValueError, match="exactly one"): + xr.open_dataset(path, engine=GeoTIFFBackendEntrypoint, + backend_kwargs={"coregister": template, + "auto_reproject": template}) + + +def test_non_array_target_raises(tmp_path): + # A truthy non-array on coregister= is not a grid, so it falls through to + # the no-target guard rather than blowing up on `.xrs`. + path = _file_4326(tmp_path, "cg_3376_badtarget.tif") + with pytest.raises(ValueError, match="target grid"): xr.open_dataset(path, engine=GeoTIFFBackendEntrypoint, - backend_kwargs={"like": "not_an_array", - "coregister": True}) + backend_kwargs={"coregister": "not_an_array"}) # --------------------------------------------------------------------------- @@ -227,6 +261,5 @@ def test_coregister_gpu_rejected_through_engine(tmp_path): with pytest.raises(ValueError, match="CPU only"): xr.open_dataset( path, engine=GeoTIFFBackendEntrypoint, - backend_kwargs={"like": template, "coregister": True, - "gpu": True}, + backend_kwargs={"coregister": template, "gpu": True}, ) diff --git a/xrspatial/geotiff/tests/test_xarray_backend_coregister_target_3379.py b/xrspatial/geotiff/tests/test_xarray_backend_coregister_target_3379.py new file mode 100644 index 000000000..2cdff813e --- /dev/null +++ b/xrspatial/geotiff/tests/test_xarray_backend_coregister_target_3379.py @@ -0,0 +1,104 @@ +"""The reprojecting target rides on the mode parameter's value (#3379). + +The ``xrspatial`` engine takes the target grid as the value of +``coregister`` or ``auto_reproject`` (a DataArray or Dataset), so +``coregister=template`` is the whole call -- no separate ``like=`` and no +redundant ``coregister=True``. These tests pin the two target-carrying +modes, the Dataset target with ``var=``, parity with the accessor, and +``open_mfdataset`` composing onto one shared grid. +""" +from __future__ import annotations + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import to_geotiff +from xrspatial.geotiff._xarray_backend import GeoTIFFBackendEntrypoint + + +def _file_4326(tmp_path, name): + """Write a 30x30 EPSG:4326 GeoTIFF; return its path.""" + height, width = 30, 30 + arr = np.arange(height * width, dtype=np.float32).reshape(height, width) + y = np.linspace(45.5, 44.5, height) + x = np.linspace(-120.5, -119.5, width) + da = xr.DataArray(arr, dims=["y", "x"], + coords={"y": y, "x": x}, attrs={"crs": 4326}) + path = str(tmp_path / name) + to_geotiff(da, path, compression="none") + return path + + +def _template_4326(n=5): + """A coarser, offset same-CRS grid inside the file footprint.""" + return xr.DataArray( + np.zeros((n, n), dtype=np.float32), + dims=["y", "x"], + coords={"y": np.linspace(45.3, 44.7, n), + "x": np.linspace(-120.3, -119.7, n)}, + attrs={"crs": 4326}, + ) + + +def test_coregister_target_snaps_onto_grid(tmp_path): + # coregister= is the whole call: snap onto the grid, matching the + # accessor's coregister=True read. + path = _file_4326(tmp_path, "cgt_3379_snap.tif") + template = _template_4326(5) + ds = xr.open_dataset( + path, engine=GeoTIFFBackendEntrypoint, + backend_kwargs={"coregister": template}, + ) + var = ds[list(ds.data_vars)[0]] + accessor_da = template.xrs.open_geotiff(path, coregister=True) + assert var.shape == template.shape + assert np.allclose(var.coords["x"].values, template.coords["x"].values) + assert np.allclose(var.coords["y"].values, template.coords["y"].values) + np.testing.assert_array_equal(var.values, accessor_da.values) + + +def test_auto_reproject_target_keeps_native_resolution(tmp_path): + # auto_reproject= reprojects onto the target's CRS but keeps the + # file's resolution, so it does NOT take the template's coarse shape. + path = _file_4326(tmp_path, "cgt_3379_ar.tif") + template = _template_4326(5) + ds = xr.open_dataset( + path, engine=GeoTIFFBackendEntrypoint, + backend_kwargs={"auto_reproject": template}, + ) + var = ds[list(ds.data_vars)[0]] + accessor_da = template.xrs.open_geotiff(path, auto_reproject=True) + np.testing.assert_array_equal(var.values, accessor_da.values) + # Same CRS here, so auto_reproject is a windowed read at native + # resolution -- not the template's 5x5 grid. + assert var.shape != template.shape + + +def test_dataset_target_with_var(tmp_path): + # A Dataset target dispatches to the Dataset accessor and honours var=. + path = _file_4326(tmp_path, "cgt_3379_ds.tif") + template = _template_4326(5).to_dataset(name="elev") + ds = xr.open_dataset( + path, engine=GeoTIFFBackendEntrypoint, + backend_kwargs={"coregister": template, "var": "elev"}, + ) + var = ds[list(ds.data_vars)[0]] + accessor_da = template.xrs.open_geotiff(path, coregister=True, var="elev") + assert var.shape == template["elev"].shape + np.testing.assert_array_equal(var.values, accessor_da.values) + + +def test_open_mfdataset_coregister_target_shares_grid(tmp_path): + pytest.importorskip("dask") + template = _template_4326(5) + paths = [_file_4326(tmp_path, f"cgt_3379_mf_{i}.tif") for i in range(2)] + ds = xr.open_mfdataset( + paths, engine=GeoTIFFBackendEntrypoint, + combine="nested", concat_dim="tile", + backend_kwargs={"coregister": template, "default_name": "band_data"}, + ) + assert list(ds.data_vars) == ["band_data"] + assert ds.sizes["tile"] == 2 + assert np.allclose(ds.coords["x"].values, template.coords["x"].values) + assert np.allclose(ds.coords["y"].values, template.coords["y"].values)