From 1dd30afb7b904540d78ad027c9c2227de6512b28 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 15 May 2026 21:09:41 -0700 Subject: [PATCH 1/2] geotiff: reject bool nodata at write_vrt entry point (#1921) write_vrt(nodata=True) used to write True into the VRT XML. bool is a subclass of int, so the typo slipped past the isinstance(nodata, (int, float)) guard and str(True) ended up in the XML. No reader parses "True" as numeric, so the round-trip silently dropped the sentinel. Fix: - Public write_vrt wrapper routes nodata through _validate_nodata_arg, matching the to_geotiff / write_geotiff_gpu surface. - Internal _vrt.write_vrt adds an explicit isinstance(bool, np.bool_) guard as defense-in-depth for direct callers and any future split of the wrapper. - Tests: flip the xfail(strict=True) parametrized bool-rejection cases to plain passes, drop the "current buggy behaviour" pin, and add a direct-internal-call test so the duplicate guard does not bit-rot. --- xrspatial/geotiff/_vrt.py | 11 ++++ xrspatial/geotiff/_writers/vrt.py | 10 ++++ .../tests/test_write_vrt_bool_nodata_1921.py | 59 ++++++++----------- 3 files changed, 46 insertions(+), 34 deletions(-) diff --git a/xrspatial/geotiff/_vrt.py b/xrspatial/geotiff/_vrt.py index 84578398..68ba21c4 100644 --- a/xrspatial/geotiff/_vrt.py +++ b/xrspatial/geotiff/_vrt.py @@ -1414,6 +1414,17 @@ def write_vrt(vrt_path: str, source_files: list[str], *, from ._reader import _FileSource from ._dtypes import resolve_bits_per_sample + # Defense-in-depth: the public ``write_vrt`` wrapper in + # ``_writers/vrt.py`` already rejects bool / non-numeric ``nodata`` via + # ``_validate_nodata_arg`` so callers see the parity error from the + # entry point. Repeat the check here so direct callers of the + # internal symbol (and any future refactor that splits the public + # wrapper) cannot regress to ``True``. See + # issue #1921. + if isinstance(nodata, (bool, np.bool_)): + raise TypeError( + f"nodata must be numeric (int or float), got {nodata!r}") + if not source_files: raise ValueError("source_files must not be empty") diff --git a/xrspatial/geotiff/_writers/vrt.py b/xrspatial/geotiff/_writers/vrt.py index a2411cf3..ec837392 100644 --- a/xrspatial/geotiff/_writers/vrt.py +++ b/xrspatial/geotiff/_writers/vrt.py @@ -15,6 +15,7 @@ _VRT_PATH_DEPRECATED_SENTINEL, _VRT_PATH_MISSING_SENTINEL, ) +from .._validation import _validate_nodata_arg def write_vrt(path: str = _VRT_PATH_MISSING_SENTINEL, @@ -141,6 +142,15 @@ def write_vrt(path: str = _VRT_PATH_MISSING_SENTINEL, ) crs = crs_wkt + # Reject bool / non-numeric nodata at the entry point so write_vrt + # matches the to_geotiff / write_geotiff_gpu surface. ``bool`` is a + # subclass of ``int`` in Python, so a typo like ``nodata=True`` would + # slip past every downstream ``isinstance(nodata, (int, float))`` + # guard and the VRT XML emitter would write ``True + # ``. No reader parses ``"True"`` as numeric, so the + # round-trip would silently drop the sentinel. See issue #1921. + _validate_nodata_arg(nodata) + resolved_wkt = _resolve_crs_to_wkt(crs) from .._vrt import write_vrt as _write_vrt_internal diff --git a/xrspatial/geotiff/tests/test_write_vrt_bool_nodata_1921.py b/xrspatial/geotiff/tests/test_write_vrt_bool_nodata_1921.py index 3d2fec7c..2aa52a2d 100644 --- a/xrspatial/geotiff/tests/test_write_vrt_bool_nodata_1921.py +++ b/xrspatial/geotiff/tests/test_write_vrt_bool_nodata_1921.py @@ -2,19 +2,18 @@ Issue #1911 added the ``isinstance(nodata, (bool, np.bool_)) -> TypeError`` guard at the ``to_geotiff`` entry point, with a belt-and-braces copy in -``_geotags.build_geo_tags``. The same parity check was not added to the +``_geotags.build_geo_tags``. Issue #1921 extended the same parity to the sibling writers: -* ``write_vrt`` -- currently writes ``True`` - into the VRT XML. No reader parses ``"True"`` as numeric, so the - round-trip silently drops the sentinel (bug, see issue #1921). -* ``write_geotiff_gpu`` (direct call) -- currently raises ``TypeError``, - but only because the deeper ``build_geo_tags`` guard fires; there is - no explicit top-of-function check. A future refactor moving the - ``build_geo_tags`` guard could regress this without anyone noticing. - -This file pins both behaviours so the next refactor that touches either -writer surfaces the parity gap. +* ``write_vrt`` -- now rejects bool nodata at the public wrapper via + ``_validate_nodata_arg`` and again inside ``_vrt.write_vrt`` as + defense-in-depth. Previously wrote ``True`` + into the VRT XML, which no reader parses as numeric, so the + round-trip silently dropped the sentinel. +* ``write_geotiff_gpu`` (direct call) -- already routes through + ``_validate_nodata_arg`` near the top of the function. Pinning the + behaviour here so a refactor that drops that call surfaces the + regression at the parity boundary, not inside ``build_geo_tags``. Found by ``/sweep-test-coverage`` (pass 15 / 2026-05-15). """ @@ -54,39 +53,31 @@ def src_geotiff(uint8_da, tmp_path): "bad", [True, False, np.bool_(True), np.bool_(False)], ) -@pytest.mark.xfail( - reason="issue #1921: write_vrt currently writes str(bool) into " - "; the fix should raise TypeError up front.", - strict=True, -) def test_write_vrt_rejects_bool_nodata(src_geotiff, tmp_path, bad): - """``write_vrt`` should raise ``TypeError`` for any bool nodata. + """``write_vrt`` raises ``TypeError`` for any bool nodata. - Will start passing once issue #1921 is fixed; flip the xfail to a - plain assertion at that point. + Fixed in issue #1921 by routing the public ``write_vrt`` wrapper + through ``_validate_nodata_arg`` and adding a defense-in-depth check + inside the internal ``_vrt.write_vrt``. """ vrt_path = str(tmp_path / "out_1921_bad.vrt") with pytest.raises(TypeError, match="nodata must be numeric"): write_vrt(vrt_path, [src_geotiff], nodata=bad) -def test_write_vrt_with_bool_nodata_currently_emits_string( - src_geotiff, tmp_path): - """Pins the current (buggy) behaviour so the fix is visible as a diff. +def test_write_vrt_internal_rejects_bool_nodata(src_geotiff, tmp_path): + """Direct call to the internal ``_vrt.write_vrt`` also rejects bool. - Today the VRT XML contains ``True``. Once - issue #1921 is fixed, ``write_vrt`` will raise ``TypeError`` instead; - this test is then expected to fail and gets removed in the same PR. + Defense-in-depth: the public wrapper's ``_validate_nodata_arg`` is + skipped when callers reach the internal symbol directly (e.g. the + auto-dispatch ``to_geotiff`` path that builds a sibling VRT, or a + future split of the wrapper). Pinning the duplicate check so a + refactor that drops the wrapper guard surfaces here. See #1921. """ - vrt_path = str(tmp_path / "out_1921_pin.vrt") - try: - write_vrt(vrt_path, [src_geotiff], nodata=True) - except TypeError: - pytest.skip("issue #1921 fixed: write_vrt now rejects bool nodata.") - with open(vrt_path) as f: - content = f.read() - assert "True" in content, ( - "expected the current buggy str(True) emission") + from xrspatial.geotiff._vrt import write_vrt as _internal_write_vrt + vrt_path = str(tmp_path / "out_1921_internal.vrt") + with pytest.raises(TypeError, match="nodata must be numeric"): + _internal_write_vrt(vrt_path, [src_geotiff], nodata=True) @pytest.mark.parametrize( From bcaa2d26af3c1cb2f4de5674ed6930abfec9b29b Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 15 May 2026 21:12:04 -0700 Subject: [PATCH 2/2] geotiff: review fixes for bool-nodata test docstrings (#1921) Address review feedback on PR #1990: - Parametrize ``test_write_vrt_internal_rejects_bool_nodata`` over ``[True, False, np.bool_(True), np.bool_(False)]`` so a future refactor that narrows the internal guard to just ``bool`` (dropping ``np.bool_``) surfaces in the test, not in user code. - Update ``test_write_geotiff_gpu_rejects_bool_nodata`` docstring to name the proximate cause (``_validate_nodata_arg`` added by #1973) rather than the deeper ``build_geo_tags`` guard. --- .../tests/test_write_vrt_bool_nodata_1921.py | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/xrspatial/geotiff/tests/test_write_vrt_bool_nodata_1921.py b/xrspatial/geotiff/tests/test_write_vrt_bool_nodata_1921.py index 2aa52a2d..9f7afd9c 100644 --- a/xrspatial/geotiff/tests/test_write_vrt_bool_nodata_1921.py +++ b/xrspatial/geotiff/tests/test_write_vrt_bool_nodata_1921.py @@ -65,19 +65,25 @@ def test_write_vrt_rejects_bool_nodata(src_geotiff, tmp_path, bad): write_vrt(vrt_path, [src_geotiff], nodata=bad) -def test_write_vrt_internal_rejects_bool_nodata(src_geotiff, tmp_path): +@pytest.mark.parametrize( + "bad", + [True, False, np.bool_(True), np.bool_(False)], +) +def test_write_vrt_internal_rejects_bool_nodata(src_geotiff, tmp_path, bad): """Direct call to the internal ``_vrt.write_vrt`` also rejects bool. Defense-in-depth: the public wrapper's ``_validate_nodata_arg`` is skipped when callers reach the internal symbol directly (e.g. the - auto-dispatch ``to_geotiff`` path that builds a sibling VRT, or a - future split of the wrapper). Pinning the duplicate check so a - refactor that drops the wrapper guard surfaces here. See #1921. + multi-tile dask write path in ``_writers/eager.py`` that calls + ``_vrt.write_vrt`` after writing per-tile GeoTIFFs, or a future + split of the wrapper). Parametrize over both ``bool`` and + ``np.bool_`` polarities so a refactor that narrows the internal + guard to just ``bool`` surfaces here, not in user code. See #1921. """ from xrspatial.geotiff._vrt import write_vrt as _internal_write_vrt vrt_path = str(tmp_path / "out_1921_internal.vrt") with pytest.raises(TypeError, match="nodata must be numeric"): - _internal_write_vrt(vrt_path, [src_geotiff], nodata=True) + _internal_write_vrt(vrt_path, [src_geotiff], nodata=bad) @pytest.mark.parametrize( @@ -116,9 +122,11 @@ def test_write_vrt_accepts_none_nodata(src_geotiff, tmp_path): def test_write_geotiff_gpu_rejects_bool_nodata(uint8_da, tmp_path, bad): """Direct ``write_geotiff_gpu`` call rejects bool nodata. - Currently raises ``TypeError`` only because the deeper - ``build_geo_tags`` guard fires. Pinning the behaviour so a refactor - that drops the deeper guard surfaces here, not in user code. + The top-of-function ``_validate_nodata_arg`` call (added by #1973) + fires first; the deeper ``build_geo_tags`` guard is a second line + of defense. Pinning the behaviour so a refactor that drops the + top-of-function call surfaces here, not deep inside the geotag + builder. """ from xrspatial.geotiff import write_geotiff_gpu path = str(tmp_path / "gpu_1921_bad.tif")