Skip to content

Commit 92a97b9

Browse files
authored
BUG: pytables with non-nano timedelta64 (#63239)
1 parent 501478f commit 92a97b9

File tree

5 files changed

+39
-15
lines changed

5 files changed

+39
-15
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1192,6 +1192,7 @@ MultiIndex
11921192
I/O
11931193
^^^
11941194
- Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping` elements. (:issue:`57915`)
1195+
- Bug in :meth:`DataFrame.to_hdf` and :func:`read_hdf` with ``timedelta64`` dtypes with non-nanosecond resolution failing to round-trip correctly (:issue:`63239`)
11951196
- Fix bug in ``on_bad_lines`` callable when returning too many fields: now emits
11961197
``ParserWarning`` and truncates extra fields regardless of ``index_col`` (:issue:`61837`)
11971198
- Bug in :func:`pandas.json_normalize` inconsistently handling non-dict items in ``data`` when ``max_level`` was set. The function will now raise a ``TypeError`` if ``data`` is a list containing non-dict items (:issue:`62829`)

pandas/core/computation/pytables.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
Any,
1414
ClassVar,
1515
Self,
16+
cast,
1617
)
1718

1819
import numpy as np
@@ -44,7 +45,10 @@
4445
)
4546

4647
if TYPE_CHECKING:
47-
from pandas._typing import npt
48+
from pandas._typing import (
49+
TimeUnit,
50+
npt,
51+
)
4852

4953

5054
class PyTablesScope(_scope.Scope):
@@ -225,15 +229,19 @@ def stringify(value):
225229
if conv_val.tz is not None:
226230
conv_val = conv_val.tz_convert("UTC")
227231
return TermValue(conv_val, conv_val._value, kind)
228-
elif kind in ("timedelta64", "timedelta"):
232+
elif kind.startswith("timedelta"):
233+
unit = "ns"
234+
if "[" in kind:
235+
unit = cast("TimeUnit", kind.split("[")[-1][:-1])
229236
if isinstance(conv_val, str):
230237
conv_val = Timedelta(conv_val)
231238
elif lib.is_integer(conv_val) or lib.is_float(conv_val):
232239
conv_val = Timedelta(conv_val, unit="s")
233240
else:
234241
conv_val = Timedelta(conv_val)
235-
conv_val = conv_val.as_unit("ns")._value
242+
conv_val = conv_val.as_unit(unit)._value
236243
return TermValue(int(conv_val), conv_val, kind)
244+
237245
elif meta == "category":
238246
metadata = extract_array(self.metadata, extract_numpy=True)
239247
result: npt.NDArray[np.intp] | np.intp | int

pandas/io/pytables.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2702,8 +2702,12 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
27022702
# recreate with tz if indicated
27032703
converted = _set_tz(converted, tz, dtype)
27042704

2705-
elif dtype == "timedelta64":
2706-
converted = np.asarray(converted, dtype="m8[ns]")
2705+
elif dtype.startswith("timedelta64"):
2706+
if dtype == "timedelta64":
2707+
# from before we started storing timedelta64 unit
2708+
converted = np.asarray(converted, dtype="m8[ns]")
2709+
else:
2710+
converted = np.asarray(converted, dtype=dtype)
27072711
elif dtype == "date":
27082712
try:
27092713
converted = np.asarray(
@@ -3086,8 +3090,13 @@ def read_array(self, key: str, start: int | None = None, stop: int | None = None
30863090
tz = getattr(attrs, "tz", None)
30873091
ret = _set_tz(ret, tz, dtype)
30883092

3089-
elif dtype == "timedelta64":
3090-
ret = np.asarray(ret, dtype="m8[ns]")
3093+
elif dtype and dtype.startswith("timedelta64"):
3094+
if dtype == "timedelta64":
3095+
# This was written back before we started writing
3096+
# timedelta64 units
3097+
ret = np.asarray(ret, dtype="m8[ns]")
3098+
else:
3099+
ret = np.asarray(ret, dtype=dtype)
30913100

30923101
if transposed:
30933102
return ret.T
@@ -3324,7 +3333,7 @@ def write_array(
33243333
node._v_attrs.value_type = f"datetime64[{value.dtype.unit}]"
33253334
elif lib.is_np_dtype(value.dtype, "m"):
33263335
self._handle.create_array(self.group, key, value.view("i8"))
3327-
getattr(self.group, key)._v_attrs.value_type = "timedelta64"
3336+
getattr(self.group, key)._v_attrs.value_type = str(value.dtype)
33283337
elif isinstance(value, BaseStringArray):
33293338
vlarr = self._handle.create_vlarray(self.group, key, _tables().ObjectAtom())
33303339
vlarr.append(value.to_numpy())
@@ -5175,8 +5184,12 @@ def _unconvert_index(data, kind: str, encoding: str, errors: str) -> np.ndarray
51755184
index = DatetimeIndex(data)
51765185
else:
51775186
index = DatetimeIndex(data.view(kind))
5178-
elif kind == "timedelta64":
5179-
index = TimedeltaIndex(data)
5187+
elif kind.startswith("timedelta64"):
5188+
if kind == "timedelta64":
5189+
# created before we stored resolution information
5190+
index = TimedeltaIndex(data)
5191+
else:
5192+
index = TimedeltaIndex(data.view(kind))
51805193
elif kind == "date":
51815194
try:
51825195
index = np.asarray([date.fromordinal(v) for v in data], dtype=object)
@@ -5413,7 +5426,7 @@ def _dtype_to_kind(dtype_str: str) -> str:
54135426
elif dtype_str.startswith("datetime64"):
54145427
kind = dtype_str
54155428
elif dtype_str.startswith("timedelta"):
5416-
kind = "timedelta64"
5429+
kind = dtype_str
54175430
elif dtype_str.startswith("bool"):
54185431
kind = "bool"
54195432
elif dtype_str.startswith("category"):

pandas/tests/io/pytables/test_append.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -848,7 +848,7 @@ def test_append_raise(tmp_path, using_infer_string):
848848
store.append("df", df)
849849

850850

851-
def test_append_with_timedelta(tmp_path):
851+
def test_append_with_timedelta(tmp_path, unit):
852852
# GH 3577
853853
# append timedelta
854854

@@ -860,6 +860,7 @@ def test_append_with_timedelta(tmp_path):
860860
}
861861
)
862862
df["C"] = df["A"] - df["B"]
863+
df["C"] = df["C"].astype(f"m8[{unit}]")
863864
df.loc[3:5, "C"] = np.nan
864865

865866
path = tmp_path / "test_append_with_timedelta.h5"

pandas/tests/io/pytables/test_store.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1017,11 +1017,12 @@ def test_duplicate_column_name(tmp_path, setup_path):
10171017
assert other.equals(df)
10181018

10191019

1020-
@pytest.mark.xfail(reason="non-nano TimedeltaIndex does not round-trip")
1021-
def test_preserve_timedeltaindex_type(setup_path):
1020+
def test_preserve_timedeltaindex_type(setup_path, unit):
10221021
# GH9635
10231022
df = DataFrame(np.random.default_rng(2).normal(size=(10, 5)))
1024-
df.index = timedelta_range(start="0s", periods=10, freq="1s", name="example")
1023+
df.index = timedelta_range(
1024+
start="0s", periods=10, freq="1s", name="example", unit=unit
1025+
)
10251026

10261027
with ensure_clean_store(setup_path) as store:
10271028
store["df"] = df

0 commit comments

Comments
 (0)