From 5c06cbbf817b22dfb1e09453dc009254264f9fe9 Mon Sep 17 00:00:00 2001 From: antznette1 Date: Wed, 12 Nov 2025 02:30:58 +0100 Subject: [PATCH 1/7] TST: xfail test for DatetimeIndex.union across DST boundary (GH#62915) --- pandas/tests/indexes/datetimes/test_setops.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 7a68cb867c94e..e2b30425d835c 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -60,6 +60,30 @@ def test_union3(self, sort, box): result = first.union(case, sort=sort) tm.assert_index_equal(result, expected) + +@pytest.mark.xfail(reason="see GH#62915: union across DST boundary", strict=False) +def test_union_across_dst_boundary_xfail(): + # US/Eastern DST spring-forward on 2021-03-14 at 02:00 (02:00-02:59 local time does not exist) + tz = "US/Eastern" + # Left side spans up to the missing hour window + left = date_range("2021-03-14 00:00", periods=3, freq="H", tz=tz) + # right side continues from the first valid post-DST hour + right = date_range("2021-03-14 03:00", periods=3, freq="H", tz=tz) + + # Expect a union that preserves tz and includes valid hours without duplicates + expected = DatetimeIndex( + [ + Timestamp("2021-03-14 00:00", tz=tz), + Timestamp("2021-03-14 01:00", tz=tz), + Timestamp("2021-03-14 03:00", tz=tz), + Timestamp("2021-03-14 04:00", tz=tz), + Timestamp("2021-03-14 05:00", tz=tz), + ] + ) + + result = left.union(right) + tm.assert_index_equal(result, expected) + @pytest.mark.parametrize("tz", tz) def test_union(self, tz, sort): rng1 = date_range("1/1/2000", freq="D", periods=5, tz=tz) From f487dc9e0e3c7d9ef75c07b761cfcc210719a8bc Mon Sep 17 00:00:00 2001 From: antznette1 Date: Wed, 12 Nov 2025 03:01:12 +0100 Subject: [PATCH 2/7] TST: wrap long comment to satisfy ruff E501 (GH#62915) --- pandas/tests/indexes/datetimes/test_setops.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index e2b30425d835c..b2620cdae5984 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -63,7 +63,8 @@ def test_union3(self, sort, box): @pytest.mark.xfail(reason="see GH#62915: union across DST boundary", strict=False) def test_union_across_dst_boundary_xfail(): - # US/Eastern DST spring-forward on 2021-03-14 at 02:00 (02:00-02:59 local time does not exist) + # US/Eastern DST spring-forward on 2021-03-14 at 02:00 + # (02:00-02:59 local time does not exist) tz = "US/Eastern" # Left side spans up to the missing hour window left = date_range("2021-03-14 00:00", periods=3, freq="H", tz=tz) From e8bf47c89a39c0c8295a5b9db329e1ed6898dc55 Mon Sep 17 00:00:00 2001 From: antznette1 Date: Wed, 12 Nov 2025 03:23:54 +0100 Subject: [PATCH 3/7] FIX: robust DatetimeIndex.union across DST transitions + flip test to pass (GH#62915) --- pandas/core/indexes/datetimelike.py | 18 +++++++++++++++++- pandas/tests/indexes/datetimes/test_setops.py | 9 ++++----- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 58529c5597b6e..c902a84106cf3 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -541,7 +541,14 @@ def _as_range_index(self) -> RangeIndex: return RangeIndex(rng) def _can_range_setop(self, other) -> bool: - return isinstance(self.freq, Tick) and isinstance(other.freq, Tick) + # Only allow range-based setops when both objects are tick-based AND + # not timezone-aware. For tz-aware DatetimeIndex, constant i8 stepping + # does not hold across DST transitions in local time, so avoid range path. + if not (isinstance(self.freq, Tick) and isinstance(other.freq, Tick)): + return False + self_tz = getattr(self.dtype, "tz", None) + other_tz = getattr(other.dtype, "tz", None) + return self_tz is None and other_tz is None def _wrap_range_setop(self, other, res_i8) -> Self: new_freq = None @@ -726,6 +733,15 @@ def _union(self, other, sort): # that result.freq == self.freq return result else: + # For tz-aware DatetimeIndex, perform union in UTC to avoid + # local-time irregularities across DST transitions, then convert back. + tz = getattr(self.dtype, "tz", None) + if tz is not None: + left_utc = self.tz_convert("UTC") + right_utc = other.tz_convert("UTC") + res_utc = super(type(left_utc), left_utc)._union(right_utc, sort) + res = res_utc.tz_convert(tz) + return res._with_freq("infer") return super()._union(other, sort)._with_freq("infer") # -------------------------------------------------------------------- diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index b2620cdae5984..25e4250866e91 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -61,15 +61,14 @@ def test_union3(self, sort, box): tm.assert_index_equal(result, expected) -@pytest.mark.xfail(reason="see GH#62915: union across DST boundary", strict=False) -def test_union_across_dst_boundary_xfail(): +def test_union_across_dst_boundary(): # US/Eastern DST spring-forward on 2021-03-14 at 02:00 # (02:00-02:59 local time does not exist) tz = "US/Eastern" # Left side spans up to the missing hour window - left = date_range("2021-03-14 00:00", periods=3, freq="H", tz=tz) + left = date_range("2021-03-14 00:00", periods=3, freq="h", tz=tz) # right side continues from the first valid post-DST hour - right = date_range("2021-03-14 03:00", periods=3, freq="H", tz=tz) + right = date_range("2021-03-14 03:00", periods=3, freq="h", tz=tz) # Expect a union that preserves tz and includes valid hours without duplicates expected = DatetimeIndex( @@ -80,7 +79,7 @@ def test_union_across_dst_boundary_xfail(): Timestamp("2021-03-14 04:00", tz=tz), Timestamp("2021-03-14 05:00", tz=tz), ] - ) + ).as_unit(left.unit) result = left.union(right) tm.assert_index_equal(result, expected) From c90c78d3feed2a5def8f446a0c3a382e67a8d07e Mon Sep 17 00:00:00 2001 From: antznette1 Date: Wed, 12 Nov 2025 05:11:27 +0100 Subject: [PATCH 4/7] TYPING: narrow tz-aware union path to DatetimeArray in _union to satisfy mypy; behavior unchanged (GH#62915) --- pandas/core/indexes/datetimelike.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index c902a84106cf3..2b083ed7b22cc 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -737,11 +737,21 @@ def _union(self, other, sort): # local-time irregularities across DST transitions, then convert back. tz = getattr(self.dtype, "tz", None) if tz is not None: - left_utc = self.tz_convert("UTC") - right_utc = other.tz_convert("UTC") - res_utc = super(type(left_utc), left_utc)._union(right_utc, sort) - res = res_utc.tz_convert(tz) - return res._with_freq("infer") + # Narrow to DatetimeArray to access tz_convert without mypy errors + if isinstance(self._data, DatetimeArray) and isinstance( + other._data, DatetimeArray + ): + left_utc_arr = self._data.tz_convert("UTC") + right_utc_arr = other._data.tz_convert("UTC") + left_utc = type(self)._simple_new(left_utc_arr, name=self.name) + right_utc = type(other)._simple_new(right_utc_arr, name=other.name) + res_utc = super(type(left_utc), left_utc)._union(right_utc, sort) + # res_utc is DatetimeIndex; convert its underlying array back to tz + res_arr = cast(DatetimeArray, res_utc._data).tz_convert(tz) + res = type(self)._simple_new(res_arr, name=res_utc.name) + return res._with_freq("infer") + # Defensive fallback if types are unexpected + return super()._union(other, sort)._with_freq("infer") return super()._union(other, sort)._with_freq("infer") # -------------------------------------------------------------------- From b970007a85b8950ac97ba96ca719a2cae2b84736 Mon Sep 17 00:00:00 2001 From: antznette1 Date: Wed, 12 Nov 2025 06:41:24 +0100 Subject: [PATCH 5/7] BUG: tz-aware DatetimeIndex.union - perform UTC-naive base union when tz matches to avoid recursion; preserve semantics for differing tz; mypy-safe ops; wrap long comment (GH#62915) --- pandas/core/indexes/datetimelike.py | 34 ++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 2b083ed7b22cc..6b0e79d580576 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -736,22 +736,36 @@ def _union(self, other, sort): # For tz-aware DatetimeIndex, perform union in UTC to avoid # local-time irregularities across DST transitions, then convert back. tz = getattr(self.dtype, "tz", None) - if tz is not None: + other_tz = getattr(other.dtype, "tz", None) + if tz is not None and tz == other_tz: # Narrow to DatetimeArray to access tz_convert without mypy errors if isinstance(self._data, DatetimeArray) and isinstance( other._data, DatetimeArray ): - left_utc_arr = self._data.tz_convert("UTC") - right_utc_arr = other._data.tz_convert("UTC") - left_utc = type(self)._simple_new(left_utc_arr, name=self.name) - right_utc = type(other)._simple_new(right_utc_arr, name=other.name) - res_utc = super(type(left_utc), left_utc)._union(right_utc, sort) - # res_utc is DatetimeIndex; convert its underlying array back to tz - res_arr = cast(DatetimeArray, res_utc._data).tz_convert(tz) - res = type(self)._simple_new(res_arr, name=res_utc.name) + # Convert both to UTC, then drop tz to avoid re-entering + # tz-aware path + left_utc_naive = self._data.tz_convert("UTC").tz_localize(None) + right_utc_naive = other._data.tz_convert("UTC").tz_localize(None) + left_naive = type(self)._simple_new(left_utc_naive, name=self.name) + right_naive = type(other)._simple_new( + right_utc_naive, name=other.name + ) + # Perform base union on tz-naive indices to avoid DST complications + res_naive = super(type(left_naive), left_naive)._union( + right_naive, sort + ) + # Localize back to UTC and then convert to original tz + if isinstance(res_naive, DatetimeArray): + base_arr = res_naive + name = self.name + else: + base_arr = cast(DatetimeArray, res_naive._data) + name = res_naive.name + res_arr = base_arr.tz_localize("UTC").tz_convert(tz) + res = type(self)._simple_new(res_arr, name=name) return res._with_freq("infer") # Defensive fallback if types are unexpected - return super()._union(other, sort)._with_freq("infer") + return super()._union(other, sort) return super()._union(other, sort)._with_freq("infer") # -------------------------------------------------------------------- From 1ebd2264bfcffd084f3cc19fa0276fc5ca50718e Mon Sep 17 00:00:00 2001 From: antznette1 Date: Wed, 12 Nov 2025 10:25:35 +0100 Subject: [PATCH 6/7] CI: retrigger pipeline for PR #63088 From 1b25d69092a4e6de85a3aa598a602296b7a56e48 Mon Sep 17 00:00:00 2001 From: antznette1 Date: Wed, 12 Nov 2025 11:29:29 +0100 Subject: [PATCH 7/7] TST: make xfail for test_np_fix conditional on runtime behavior to avoid XPASS in numpy-dev/python-dev --- pandas/tests/series/test_ufunc.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 5faacbb5559a9..f0b478d0ab1f7 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -9,6 +9,15 @@ import pandas._testing as tm from pandas.arrays import SparseArray +# Probe whether np.fix works with Series without raising due to read-only out +# This avoids relying solely on is_numpy_dev, which may not reflect CI pinning. +try: + _ser = pd.Series([-1.5, -0.5]) + _probe_result = np.fix(_ser) + _NP_FIX_WORKS = True +except Exception: # pragma: no cover - best-effort environment probe + _NP_FIX_WORKS = False + @pytest.fixture(params=[np.add, np.logaddexp]) def ufunc(request): @@ -238,6 +247,12 @@ def __init__(self, value) -> None: def __add__(self, other): return self.value + other.value + def __eq__(self, other) -> bool: + return type(other) is Dummy and self.value == other.value + + def __repr__(self) -> str: + return f"Dummy({self.value})" + arr = np.array([Dummy(0), Dummy(1)]) ser = pd.Series(arr) tm.assert_series_equal(np.add(ser, ser), pd.Series(np.add(ser, arr))) @@ -457,7 +472,11 @@ def add3(x, y, z): ufunc(ser, ser, df) -@pytest.mark.xfail(reason="see https://github.com/pandas-dev/pandas/pull/51082") +@pytest.mark.xfail( + condition=not _NP_FIX_WORKS, + reason="see https://github.com/pandas-dev/pandas/pull/51082", + strict=True, +) def test_np_fix(): # np.fix is not a ufunc but is composed of several ufunc calls under the hood # with `out` and `where` keywords