diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2809ea9002d..65b4943c97e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -151,6 +151,9 @@ Bug Fixes By `David Bold `_. - Improve error message when scipy is missing for :py:class:`~xarray.indexes.NDPointIndex` (:pull:`11085`). By `Sakshee_D `_. +- Fix :py:meth:`Dataset.interp` silently dropping datetime64 and timedelta64 + variables, through enabling their interpolation (:issue:`10900`, :pull:`11081`). + By `Emmanuel Ferdman `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 5f14e280f34..3ac154095f5 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2407,7 +2407,7 @@ def interp( * x (x) float64 32B 0.0 0.75 1.25 1.75 * y (y) int64 24B 11 13 15 """ - if self.dtype.kind not in "uifc": + if self.dtype.kind not in "uifcMm": raise TypeError( f"interp only works for a numeric type array. Given {self.dtype}." ) @@ -2548,7 +2548,7 @@ def interp_like( * y (y) int64 24B 70 80 90 """ - if self.dtype.kind not in "uifc": + if self.dtype.kind not in "uifcMm": raise TypeError( f"interp only works for a numeric type array. Given {self.dtype}." ) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 054668a0317..5ade28e5059 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -34,7 +34,7 @@ from xarray.coding.calendar_ops import convert_calendar, interp_calendar from xarray.coding.cftimeindex import CFTimeIndex, _parse_array_of_cftime_strings from xarray.compat.array_api_compat import to_like_array -from xarray.computation import ops +from xarray.computation import computation, ops from xarray.computation.arithmetic import DatasetArithmetic from xarray.core import dtypes as xrdtypes from xarray.core import duck_array_ops, formatting, formatting_html, utils @@ -3940,6 +3940,21 @@ def _validate_interp_indexer(x, new_x): # For normal number types do the interpolation: var_indexers = {k: v for k, v in use_indexers.items() if k in var.dims} variables[name] = missing.interp(var, var_indexers, method, **kwargs) + elif dtype_kind in "Mm" and (use_indexers.keys() & var.dims): + # For datetime-like types, interpolate as float64: + var_indexers = {k: v for k, v in use_indexers.items() if k in var.dims} + int_data = var.astype(np.int64) + nat = np.iinfo(np.int64).min + as_float = computation.where( + int_data != nat, int_data.astype(np.float64), np.nan + ) + result = missing.interp(as_float, var_indexers, method, **kwargs) + as_int = computation.where( + ~result.isnull(), + result.fillna(0).round().astype(np.int64), + nat, + ) + variables[name] = as_int.astype(var.dtype) elif dtype_kind in "ObU" and (use_indexers.keys() & var.dims): if all(var.sizes[d] == 1 for d in (use_indexers.keys() & var.dims)): # Broadcastable, can be handled quickly without reindex: diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index b02df976d47..18166173033 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -1189,3 +1189,88 @@ def test_interp_vectorized_shared_dims(chunk: bool) -> None: coords={"u": [45, 55], "t": [10, 12], "x": dx, "y": dy}, ) assert_identical(actual, expected) + + +@requires_scipy +def test_dataset_interp_datetime_variable() -> None: + # GH#10900 + ds = xr.Dataset( + data_vars={ + "something": (["x", "y"], np.arange(25, dtype=float).reshape(5, 5)), + "time": ( + ["x", "y"], + np.datetime64("2024-01-01") + + np.arange(25).reshape(5, 5) * np.timedelta64(1, "D"), + ), + }, + coords={"x": np.arange(5), "y": np.arange(5)}, + ) + + result = ds.interp(x=[0.5, 1.5], y=[0.5, 1.5]) + + assert "time" in result.data_vars + expected_time = np.datetime64("2024-01-01") + np.timedelta64(3, "D") + np.testing.assert_equal(result["time"].values[0, 0], expected_time) + + +@requires_scipy +def test_dataset_interp_timedelta_variable() -> None: + # GH#10900 + ds = xr.Dataset( + data_vars={ + "duration": (["x"], np.array([1, 2, 3, 4, 5], dtype="timedelta64[D]")), + }, + coords={"x": np.arange(5)}, + ) + + result = ds.interp(x=[0.5, 1.5, 2.5]) + + assert "duration" in result.data_vars + expected_seconds = np.array([1.5, 2.5, 3.5]) * 86400 + actual_seconds = result["duration"].values.astype("timedelta64[s]").astype(float) + np.testing.assert_allclose(actual_seconds, expected_seconds, rtol=1e-10) + + +@requires_scipy +def test_dataset_interp_datetime_nat() -> None: + # GH#10900 - NaT propagates like NaN + time_data = np.array( + ["2024-01-01", "2024-01-02", "NaT", "2024-01-04", "2024-01-05"], + dtype="datetime64[D]", + ) + ds = xr.Dataset( + data_vars={"time": (["x"], time_data)}, + coords={"x": np.arange(5)}, + ) + + result = ds.interp(x=[0.5, 1.5, 2.5, 3.5]) + + assert not np.isnat(result["time"].values[0]) + assert np.isnat(result["time"].values[1]) + assert np.isnat(result["time"].values[2]) + assert not np.isnat(result["time"].values[3]) + + +@requires_scipy +@requires_dask +def test_dataset_interp_datetime_dask() -> None: + # GH#10900 + ds = xr.Dataset( + data_vars={ + "something": (["x", "y"], np.arange(25, dtype=float).reshape(5, 5)), + "time": ( + ["x", "y"], + np.datetime64("2024-01-01") + + np.arange(25).reshape(5, 5) * np.timedelta64(1, "D"), + ), + }, + coords={"x": np.arange(5), "y": np.arange(5)}, + ).chunk({"x": 2, "y": 2}) + + with raise_if_dask_computes(): + result = ds.interp(x=[0.5, 1.5], y=[0.5, 1.5]) + + assert "time" in result.data_vars + computed = result.compute() + expected_time = np.datetime64("2024-01-01") + np.timedelta64(3, "D") + np.testing.assert_equal(computed["time"].values[0, 0], expected_time)