Skip to content

Commit 2575866

Browse files
committed
Merge branch 'main' into api-date_range
2 parents be8e591 + 95624ca commit 2575866

File tree

8 files changed

+64
-22
lines changed

8 files changed

+64
-22
lines changed

doc/source/whatsnew/v2.3.4.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ including other versions of pandas.
1313
Bug fixes
1414
^^^^^^^^^
1515
- Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`)
16+
- Bug in :meth:`Series.str.replace` raising an error on valid group references (``\1``, ``\2``, etc.) on series converted to PyArrow backend dtype (:issue:`62653`)
1617

1718
.. ---------------------------------------------------------------------------
1819
.. _whatsnew_234.contributors:

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1124,7 +1124,6 @@ Conversion
11241124
Strings
11251125
^^^^^^^
11261126
- Bug in :meth:`Series.str.match` failing to raise when given a compiled ``re.Pattern`` object and conflicting ``case`` or ``flags`` arguments (:issue:`62240`)
1127-
- Bug in :meth:`Series.str.replace` raising an error on valid group references (``\1``, ``\2``, etc.) on series converted to PyArrow backend dtype (:issue:`62653`)
11281127
- Bug in :meth:`Series.str.zfill` raising ``AttributeError`` for :class:`ArrowDtype` (:issue:`61485`)
11291128
- Bug in :meth:`Series.value_counts` would not respect ``sort=False`` for series having ``string`` dtype (:issue:`55224`)
11301129
- Bug in multiplication with a :class:`StringDtype` incorrectly allowing multiplying by bools; explicitly cast to integers instead (:issue:`62595`)
@@ -1268,6 +1267,7 @@ Groupby/resample/rolling
12681267
- Bug in :meth:`Series.resample` could raise when the date range ended shortly before a non-existent time. (:issue:`58380`)
12691268
- Bug in :meth:`Series.resample` raising error when resampling non-nanosecond resolutions out of bounds for nanosecond precision (:issue:`57427`)
12701269
- Bug in :meth:`Series.rolling.var` and :meth:`Series.rolling.std` computing incorrect results due to numerical instability. (:issue:`47721`, :issue:`52407`, :issue:`54518`, :issue:`55343`)
1270+
- Bug in :meth:`DataFrame.groupby` methods when operating on NumPy-nullable data failing when the NA mask was not C-contiguous (:issue:`61031`)
12711271

12721272
Reshaping
12731273
^^^^^^^^^

pandas/_libs/groupby.pyx

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -819,7 +819,7 @@ def group_prod(
819819
int64_t[::1] counts,
820820
ndarray[int64float_t, ndim=2] values,
821821
const intp_t[::1] labels,
822-
const uint8_t[:, ::1] mask,
822+
const uint8_t[:, :] mask,
823823
uint8_t[:, ::1] result_mask=None,
824824
Py_ssize_t min_count=0,
825825
bint skipna=True,
@@ -893,7 +893,7 @@ def group_var(
893893
const intp_t[::1] labels,
894894
Py_ssize_t min_count=-1,
895895
int64_t ddof=1,
896-
const uint8_t[:, ::1] mask=None,
896+
const uint8_t[:, :] mask=None,
897897
uint8_t[:, ::1] result_mask=None,
898898
bint is_datetimelike=False,
899899
str name="var",
@@ -998,7 +998,7 @@ def group_skew(
998998
int64_t[::1] counts,
999999
ndarray[float64_t, ndim=2] values,
10001000
const intp_t[::1] labels,
1001-
const uint8_t[:, ::1] mask=None,
1001+
const uint8_t[:, :] mask=None,
10021002
uint8_t[:, ::1] result_mask=None,
10031003
bint skipna=True,
10041004
) -> None:
@@ -1086,7 +1086,7 @@ def group_kurt(
10861086
int64_t[::1] counts,
10871087
ndarray[float64_t, ndim=2] values,
10881088
const intp_t[::1] labels,
1089-
const uint8_t[:, ::1] mask=None,
1089+
const uint8_t[:, :] mask=None,
10901090
uint8_t[:, ::1] result_mask=None,
10911091
bint skipna=True,
10921092
) -> None:
@@ -1180,7 +1180,7 @@ def group_mean(
11801180
const intp_t[::1] labels,
11811181
Py_ssize_t min_count=-1,
11821182
bint is_datetimelike=False,
1183-
const uint8_t[:, ::1] mask=None,
1183+
const uint8_t[:, :] mask=None,
11841184
uint8_t[:, ::1] result_mask=None,
11851185
bint skipna=True,
11861186
) -> None:
@@ -1324,7 +1324,7 @@ def group_ohlc(
13241324
ndarray[int64float_t, ndim=2] values,
13251325
const intp_t[::1] labels,
13261326
Py_ssize_t min_count=-1,
1327-
const uint8_t[:, ::1] mask=None,
1327+
const uint8_t[:, :] mask=None,
13281328
uint8_t[:, ::1] result_mask=None,
13291329
) -> None:
13301330
"""
@@ -1870,7 +1870,7 @@ cdef group_min_max(
18701870
Py_ssize_t min_count=-1,
18711871
bint is_datetimelike=False,
18721872
bint compute_max=True,
1873-
const uint8_t[:, ::1] mask=None,
1873+
const uint8_t[:, :] mask=None,
18741874
uint8_t[:, ::1] result_mask=None,
18751875
bint skipna=True,
18761876
):
@@ -1983,7 +1983,7 @@ def group_idxmin_idxmax(
19831983
const intp_t[::1] labels,
19841984
Py_ssize_t min_count=-1,
19851985
bint is_datetimelike=False,
1986-
const uint8_t[:, ::1] mask=None,
1986+
const uint8_t[:, :] mask=None,
19871987
str name="idxmin",
19881988
bint skipna=True,
19891989
uint8_t[:, ::1] result_mask=None,
@@ -2096,7 +2096,7 @@ def group_max(
20962096
const intp_t[::1] labels,
20972097
Py_ssize_t min_count=-1,
20982098
bint is_datetimelike=False,
2099-
const uint8_t[:, ::1] mask=None,
2099+
const uint8_t[:, :] mask=None,
21002100
uint8_t[:, ::1] result_mask=None,
21012101
bint skipna=True,
21022102
) -> None:
@@ -2124,7 +2124,7 @@ def group_min(
21242124
const intp_t[::1] labels,
21252125
Py_ssize_t min_count=-1,
21262126
bint is_datetimelike=False,
2127-
const uint8_t[:, ::1] mask=None,
2127+
const uint8_t[:, :] mask=None,
21282128
uint8_t[:, ::1] result_mask=None,
21292129
bint skipna=True,
21302130
) -> None:
@@ -2148,7 +2148,7 @@ def group_min(
21482148
cdef group_cummin_max(
21492149
numeric_t[:, ::1] out,
21502150
ndarray[numeric_t, ndim=2] values,
2151-
const uint8_t[:, ::1] mask,
2151+
const uint8_t[:, :] mask,
21522152
uint8_t[:, ::1] result_mask,
21532153
const intp_t[::1] labels,
21542154
int ngroups,
@@ -2264,7 +2264,7 @@ def group_cummin(
22642264
const intp_t[::1] labels,
22652265
int ngroups,
22662266
bint is_datetimelike,
2267-
const uint8_t[:, ::1] mask=None,
2267+
const uint8_t[:, :] mask=None,
22682268
uint8_t[:, ::1] result_mask=None,
22692269
bint skipna=True,
22702270
) -> None:
@@ -2290,7 +2290,7 @@ def group_cummax(
22902290
const intp_t[::1] labels,
22912291
int ngroups,
22922292
bint is_datetimelike,
2293-
const uint8_t[:, ::1] mask=None,
2293+
const uint8_t[:, :] mask=None,
22942294
uint8_t[:, ::1] result_mask=None,
22952295
bint skipna=True,
22962296
) -> None:

pandas/_libs/internals.pyx

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1006,8 +1006,12 @@ cdef class BlockValuesRefs:
10061006

10071007
cdef extern from "Python.h":
10081008
"""
1009+
// python version < 3.14
10091010
#if PY_VERSION_HEX < 0x030E0000
1010-
int __Pyx_PyUnstable_Object_IsUniqueReferencedTemporary(PyObject *ref);
1011+
// This function is unused and is declared to avoid a build warning
1012+
int __Pyx_PyUnstable_Object_IsUniqueReferencedTemporary(PyObject *ref) {
1013+
return Py_REFCNT(ref) == 1;
1014+
}
10111015
#else
10121016
#define __Pyx_PyUnstable_Object_IsUniqueReferencedTemporary \
10131017
PyUnstable_Object_IsUniqueReferencedTemporary

pandas/core/arrays/datetimelike.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2342,6 +2342,9 @@ def _with_freq(self, freq) -> Self:
23422342

23432343
def _values_for_json(self) -> np.ndarray:
23442344
# Small performance bump vs the base class which calls np.asarray(self)
2345+
if self.unit != "ns":
2346+
# GH#55827
2347+
return self.as_unit("ns")._values_for_json()
23452348
if isinstance(self.dtype, np.dtype):
23462349
return self._ndarray
23472350
return super()._values_for_json()

pandas/tests/frame/methods/test_update.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ def test_update_dt_column_with_NaT_create_column(self):
197197
np.datetime64("2000-01-02T00:00:00"),
198198
np.dtype("datetime64[ns]"),
199199
),
200+
(1, 2, pd.Int64Dtype()),
200201
],
201202
)
202203
def test_update_preserve_dtype(self, value_df, value_other, dtype):
@@ -228,3 +229,19 @@ def test_update_on_duplicate_frame_unique_argument_index(self):
228229
expected = DataFrame({"a": [2, 2, 3]}, index=[1, 1, 2], dtype=np.dtype("intc"))
229230
df.update(other)
230231
tm.assert_frame_equal(df, expected)
232+
233+
def test_update_preserve_mixed_dtypes(self):
234+
# GH#44104
235+
dtype1 = pd.Int64Dtype()
236+
dtype2 = pd.StringDtype()
237+
df = DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]})
238+
df = df.astype({"a": dtype1, "b": dtype2})
239+
240+
other = DataFrame({"a": [4, 5], "b": ["a", "b"]})
241+
other = other.astype({"a": dtype1, "b": dtype2})
242+
243+
expected = DataFrame({"a": [4, 5, 3], "b": ["a", "b", "z"]})
244+
expected = expected.astype({"a": dtype1, "b": dtype2})
245+
246+
df.update(other)
247+
tm.assert_frame_equal(df, expected)

pandas/tests/groupby/test_all_methods.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,3 +84,22 @@ def test_dup_labels_output_shape(groupby_func, idx):
8484

8585
assert result.shape == (1, 2)
8686
tm.assert_index_equal(result.columns, idx)
87+
88+
89+
def test_not_c_contiguous_mask(groupby_func):
90+
# https://github.com/pandas-dev/pandas/issues/61031
91+
if groupby_func == "corrwith":
92+
# corrwith is deprecated
93+
return
94+
df = DataFrame({"a": [1, 1, 2], "b": [3, 4, 5]}, dtype="Int64")
95+
reversed = DataFrame(
96+
{"a": [2, 1, 1], "b": [5, 4, 3]}, dtype="Int64", index=[2, 1, 0]
97+
)[::-1]
98+
assert not reversed["b"].array._mask.flags["C_CONTIGUOUS"]
99+
args = get_groupby_method_args(groupby_func, df)
100+
101+
gb_reversed = reversed.groupby("a")
102+
result = getattr(gb_reversed, groupby_func)(*args)
103+
gb = df.groupby("a")
104+
expected = getattr(gb, groupby_func)(*args)
105+
tm.assert_equal(result, expected)

pandas/tests/io/json/test_pandas.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -135,12 +135,6 @@ def test_frame_non_unique_index_raises(self, orient):
135135
],
136136
)
137137
def test_frame_non_unique_columns(self, orient, data, request):
138-
if isinstance(data[0][0], Timestamp) and orient == "split":
139-
mark = pytest.mark.xfail(
140-
reason="GH#55827 non-nanosecond dt64 fails to round-trip"
141-
)
142-
request.applymarker(mark)
143-
144138
df = DataFrame(data, index=[1, 2], columns=["x", "x"])
145139

146140
expected_warning = None
@@ -162,10 +156,14 @@ def test_frame_non_unique_columns(self, orient, data, request):
162156
# in milliseconds; these are internally stored in nanosecond,
163157
# so divide to get where we need
164158
# TODO: a to_epoch method would also solve; see GH 14772
165-
expected.isetitem(0, expected.iloc[:, 0].astype(np.int64) // 1000000)
159+
dta = expected.iloc[:, 0]._values
160+
dta = dta.as_unit("ns") # GH#55827
161+
expected.isetitem(0, dta.astype(np.int64) // 1_000_000)
166162
elif orient == "split":
167163
expected = df
168164
expected.columns = ["x", "x.1"]
165+
if expected["x"].dtype.kind == "M":
166+
expected["x"] = expected["x"].astype("M8[ns]") # GH#55827
169167

170168
tm.assert_frame_equal(result, expected)
171169

0 commit comments

Comments
 (0)