diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 58529c5597b6e..6b0e79d580576 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -541,7 +541,14 @@ def _as_range_index(self) -> RangeIndex: return RangeIndex(rng) def _can_range_setop(self, other) -> bool: - return isinstance(self.freq, Tick) and isinstance(other.freq, Tick) + # Only allow range-based setops when both objects are tick-based AND + # not timezone-aware. For tz-aware DatetimeIndex, constant i8 stepping + # does not hold across DST transitions in local time, so avoid range path. + if not (isinstance(self.freq, Tick) and isinstance(other.freq, Tick)): + return False + self_tz = getattr(self.dtype, "tz", None) + other_tz = getattr(other.dtype, "tz", None) + return self_tz is None and other_tz is None def _wrap_range_setop(self, other, res_i8) -> Self: new_freq = None @@ -726,6 +733,39 @@ def _union(self, other, sort): # that result.freq == self.freq return result else: + # For tz-aware DatetimeIndex, perform union in UTC to avoid + # local-time irregularities across DST transitions, then convert back. + tz = getattr(self.dtype, "tz", None) + other_tz = getattr(other.dtype, "tz", None) + if tz is not None and tz == other_tz: + # Narrow to DatetimeArray to access tz_convert without mypy errors + if isinstance(self._data, DatetimeArray) and isinstance( + other._data, DatetimeArray + ): + # Convert both to UTC, then drop tz to avoid re-entering + # tz-aware path + left_utc_naive = self._data.tz_convert("UTC").tz_localize(None) + right_utc_naive = other._data.tz_convert("UTC").tz_localize(None) + left_naive = type(self)._simple_new(left_utc_naive, name=self.name) + right_naive = type(other)._simple_new( + right_utc_naive, name=other.name + ) + # Perform base union on tz-naive indices to avoid DST complications + res_naive = super(type(left_naive), left_naive)._union( + right_naive, sort + ) + # Localize back to UTC and then convert to original tz + if isinstance(res_naive, DatetimeArray): + base_arr = res_naive + name = self.name + else: + base_arr = cast(DatetimeArray, res_naive._data) + name = res_naive.name + res_arr = base_arr.tz_localize("UTC").tz_convert(tz) + res = type(self)._simple_new(res_arr, name=name) + return res._with_freq("infer") + # Defensive fallback if types are unexpected + return super()._union(other, sort) return super()._union(other, sort)._with_freq("infer") # -------------------------------------------------------------------- diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 7a68cb867c94e..25e4250866e91 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -60,6 +60,30 @@ def test_union3(self, sort, box): result = first.union(case, sort=sort) tm.assert_index_equal(result, expected) + +def test_union_across_dst_boundary(): + # US/Eastern DST spring-forward on 2021-03-14 at 02:00 + # (02:00-02:59 local time does not exist) + tz = "US/Eastern" + # Left side spans up to the missing hour window + left = date_range("2021-03-14 00:00", periods=3, freq="h", tz=tz) + # right side continues from the first valid post-DST hour + right = date_range("2021-03-14 03:00", periods=3, freq="h", tz=tz) + + # Expect a union that preserves tz and includes valid hours without duplicates + expected = DatetimeIndex( + [ + Timestamp("2021-03-14 00:00", tz=tz), + Timestamp("2021-03-14 01:00", tz=tz), + Timestamp("2021-03-14 03:00", tz=tz), + Timestamp("2021-03-14 04:00", tz=tz), + Timestamp("2021-03-14 05:00", tz=tz), + ] + ).as_unit(left.unit) + + result = left.union(right) + tm.assert_index_equal(result, expected) + @pytest.mark.parametrize("tz", tz) def test_union(self, tz, sort): rng1 = date_range("1/1/2000", freq="D", periods=5, tz=tz) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 5faacbb5559a9..f0b478d0ab1f7 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -9,6 +9,15 @@ import pandas._testing as tm from pandas.arrays import SparseArray +# Probe whether np.fix works with Series without raising due to read-only out +# This avoids relying solely on is_numpy_dev, which may not reflect CI pinning. +try: + _ser = pd.Series([-1.5, -0.5]) + _probe_result = np.fix(_ser) + _NP_FIX_WORKS = True +except Exception: # pragma: no cover - best-effort environment probe + _NP_FIX_WORKS = False + @pytest.fixture(params=[np.add, np.logaddexp]) def ufunc(request): @@ -238,6 +247,12 @@ def __init__(self, value) -> None: def __add__(self, other): return self.value + other.value + def __eq__(self, other) -> bool: + return type(other) is Dummy and self.value == other.value + + def __repr__(self) -> str: + return f"Dummy({self.value})" + arr = np.array([Dummy(0), Dummy(1)]) ser = pd.Series(arr) tm.assert_series_equal(np.add(ser, ser), pd.Series(np.add(ser, arr))) @@ -457,7 +472,11 @@ def add3(x, y, z): ufunc(ser, ser, df) -@pytest.mark.xfail(reason="see https://github.com/pandas-dev/pandas/pull/51082") +@pytest.mark.xfail( + condition=not _NP_FIX_WORKS, + reason="see https://github.com/pandas-dev/pandas/pull/51082", + strict=True, +) def test_np_fix(): # np.fix is not a ufunc but is composed of several ufunc calls under the hood # with `out` and `where` keywords