Skip to content

Commit 68d058b

Browse files
committed
Merge branch 'main' into api-date_range
2 parents cd160be + 53e778b commit 68d058b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+561
-238
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,9 @@ Other enhancements
233233
- Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`)
234234
- Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
235235
- Switched wheel upload to **PyPI Trusted Publishing** (OIDC) for release-tag pushes in ``wheels.yml``. (:issue:`61718`)
236+
- Added a new :meth:`DataFrame.from_arrow` method to import any Arrow-compatible
237+
tabular data object into a pandas :class:`DataFrame` through the
238+
`Arrow PyCapsule Protocol <https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html>`__ (:issue:`59631`)
236239

237240
.. ---------------------------------------------------------------------------
238241
.. _whatsnew_300.notable_bug_fixes:
@@ -748,6 +751,7 @@ Other API changes
748751
- :func:`date_range` and :func:`timedelta_range` no longer default to ``unit="ns"``, instead will infer a unit from the ``start``, ``end``, and ``freq`` parameters. Explicitly specify a desired ``unit`` to override these (:issue:`59031`)
749752
- :meth:`CategoricalIndex.append` no longer attempts to cast different-dtype indexes to the caller's dtype (:issue:`41626`)
750753
- :meth:`ExtensionDtype.construct_array_type` is now a regular method instead of a ``classmethod`` (:issue:`58663`)
754+
- Arithmetic operations between a :class:`Series`, :class:`Index`, or :class:`ExtensionArray` with a ``list`` now consistently wrap that list with an array equivalent to ``Series(my_list).array``. To do any other kind of type inference or casting, do so explicitly before operating (:issue:`62552`)
751755
- Comparison operations between :class:`Index` and :class:`Series` now consistently return :class:`Series` regardless of which object is on the left or right (:issue:`36759`)
752756
- Numpy functions like ``np.isinf`` that return a bool dtype when called on a :class:`Index` object now return a bool-dtype :class:`Index` instead of ``np.ndarray`` (:issue:`52676`)
753757

pandas/_libs/internals.pyi

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,3 @@ class BlockValuesRefs:
9494
def add_reference(self, blk: Block) -> None: ...
9595
def add_index_reference(self, index: Index) -> None: ...
9696
def has_reference(self) -> bool: ...
97-
98-
class SetitemMixin:
99-
def __setitem__(self, key, value) -> None: ...
100-
def __delitem__(self, key) -> None: ...

pandas/_libs/internals.pyx

Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
from collections import defaultdict
2-
import sys
3-
import warnings
42

53
cimport cython
6-
from cpython cimport PY_VERSION_HEX
74
from cpython.object cimport PyObject
85
from cpython.pyport cimport PY_SSIZE_T_MAX
96
from cpython.slice cimport PySlice_GetIndicesEx
@@ -23,9 +20,6 @@ from numpy cimport (
2320
cnp.import_array()
2421

2522
from pandas._libs.algos import ensure_int64
26-
from pandas.compat import CHAINED_WARNING_DISABLED
27-
from pandas.errors import ChainedAssignmentError
28-
from pandas.errors.cow import _chained_assignment_msg
2923

3024
from pandas._libs.util cimport (
3125
is_array,
@@ -1002,47 +996,3 @@ cdef class BlockValuesRefs:
1002996
return self._has_reference_maybe_locked()
1003997
ELSE:
1004998
return self._has_reference_maybe_locked()
1005-
1006-
1007-
cdef extern from "Python.h":
1008-
"""
1009-
// python version < 3.14
1010-
#if PY_VERSION_HEX < 0x030E0000
1011-
// This function is unused and is declared to avoid a build warning
1012-
int __Pyx_PyUnstable_Object_IsUniqueReferencedTemporary(PyObject *ref) {
1013-
return Py_REFCNT(ref) == 1;
1014-
}
1015-
#else
1016-
#define __Pyx_PyUnstable_Object_IsUniqueReferencedTemporary \
1017-
PyUnstable_Object_IsUniqueReferencedTemporary
1018-
#endif
1019-
"""
1020-
int PyUnstable_Object_IsUniqueReferencedTemporary\
1021-
"__Pyx_PyUnstable_Object_IsUniqueReferencedTemporary"(object o) except -1
1022-
1023-
1024-
# Python version compatibility for PyUnstable_Object_IsUniqueReferencedTemporary
1025-
cdef inline bint _is_unique_referenced_temporary(object obj) except -1:
1026-
if PY_VERSION_HEX >= 0x030E0000:
1027-
# Python 3.14+ has PyUnstable_Object_IsUniqueReferencedTemporary
1028-
return PyUnstable_Object_IsUniqueReferencedTemporary(obj)
1029-
else:
1030-
# Fallback for older Python versions using sys.getrefcount
1031-
return sys.getrefcount(obj) <= 1
1032-
1033-
1034-
cdef class SetitemMixin:
1035-
# class used in DataFrame and Series for checking for chained assignment
1036-
1037-
def __setitem__(self, key, value) -> None:
1038-
cdef bint is_unique = 0
1039-
if not CHAINED_WARNING_DISABLED:
1040-
is_unique = _is_unique_referenced_temporary(self)
1041-
if is_unique:
1042-
warnings.warn(
1043-
_chained_assignment_msg, ChainedAssignmentError, stacklevel=1
1044-
)
1045-
self._setitem(key, value)
1046-
1047-
def __delitem__(self, key) -> None:
1048-
self._delitem(key)

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ from pandas._libs.tslibs.offsets import Day
8383

8484
from pandas._libs.tslibs.util cimport (
8585
is_array,
86+
is_bool_object,
8687
is_float_object,
8788
is_integer_object,
8889
)
@@ -2311,6 +2312,13 @@ class Timedelta(_Timedelta):
23112312
return self.__mul__(item)
23122313
return other * self.to_timedelta64()
23132314

2315+
elif is_bool_object(other):
2316+
# GH#62316
2317+
raise TypeError(
2318+
"Cannot multiply Timedelta by bool. "
2319+
"Explicitly cast to integer instead."
2320+
)
2321+
23142322
return NotImplemented
23152323

23162324
__rmul__ = __mul__

pandas/_testing/contexts.py

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,7 @@
1212
)
1313
import uuid
1414

15-
from pandas.compat import (
16-
CHAINED_WARNING_DISABLED,
17-
CHAINED_WARNING_DISABLED_INPLACE_METHOD,
18-
)
15+
from pandas.compat import CHAINED_WARNING_DISABLED
1916
from pandas.errors import ChainedAssignmentError
2017

2118
from pandas.io.common import get_handle
@@ -163,18 +160,10 @@ def with_csv_dialect(name: str, **kwargs) -> Generator[None]:
163160
csv.unregister_dialect(name)
164161

165162

166-
def raises_chained_assignment_error(
167-
extra_warnings=(), extra_match=(), inplace_method=False
168-
):
163+
def raises_chained_assignment_error(extra_warnings=(), extra_match=()):
169164
from pandas._testing import assert_produces_warning
170165

171-
WARNING_DISABLED = (
172-
CHAINED_WARNING_DISABLED_INPLACE_METHOD
173-
if inplace_method
174-
else CHAINED_WARNING_DISABLED
175-
)
176-
177-
if WARNING_DISABLED:
166+
if CHAINED_WARNING_DISABLED:
178167
if not extra_warnings:
179168
from contextlib import nullcontext
180169

pandas/_typing.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -533,4 +533,44 @@ def closed(self) -> bool:
533533

534534
SliceType: TypeAlias = Hashable | None
535535

536+
537+
# Arrow PyCapsule Interface
538+
# from https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#protocol-typehints
539+
540+
541+
class ArrowArrayExportable(Protocol):
542+
"""
543+
An object with an ``__arrow_c_array__`` method.
544+
545+
This method indicates the object is an Arrow-compatible object implementing
546+
the `Arrow PyCapsule Protocol`_ (exposing the `Arrow C Data Interface`_ in
547+
Python), enabling zero-copy Arrow data interchange across libraries.
548+
549+
.. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
550+
.. _Arrow C Data Interface: https://arrow.apache.org/docs/format/CDataInterface.html
551+
552+
"""
553+
554+
def __arrow_c_array__(
555+
self, requested_schema: object | None = None
556+
) -> tuple[object, object]: ...
557+
558+
559+
class ArrowStreamExportable(Protocol):
560+
"""
561+
An object with an ``__arrow_c_stream__`` method.
562+
563+
This method indicates the object is an Arrow-compatible object implementing
564+
the `Arrow PyCapsule Protocol`_ (exposing the `Arrow C Data Interface`_
565+
for streams in Python), enabling zero-copy Arrow data interchange across
566+
libraries.
567+
568+
.. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
569+
.. _Arrow C Stream Interface: https://arrow.apache.org/docs/format/CStreamInterface.html
570+
571+
"""
572+
573+
def __arrow_c_stream__(self, requested_schema: object | None = None) -> object: ...
574+
575+
536576
__all__ = ["type_t"]

pandas/compat/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
from pandas.compat._constants import (
1919
CHAINED_WARNING_DISABLED,
20-
CHAINED_WARNING_DISABLED_INPLACE_METHOD,
2120
IS64,
2221
ISMUSL,
2322
PY312,
@@ -154,7 +153,6 @@ def is_ci_environment() -> bool:
154153

155154
__all__ = [
156155
"CHAINED_WARNING_DISABLED",
157-
"CHAINED_WARNING_DISABLED_INPLACE_METHOD",
158156
"HAS_PYARROW",
159157
"IS64",
160158
"ISMUSL",

pandas/compat/_constants.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@
1818
PYPY = platform.python_implementation() == "PyPy"
1919
WASM = (sys.platform == "emscripten") or (platform.machine() in ["wasm32", "wasm64"])
2020
ISMUSL = "musl" in (sysconfig.get_config_var("HOST_GNU_TYPE") or "")
21-
REF_COUNT = 2
22-
CHAINED_WARNING_DISABLED = PYPY or (PY314 and not sys._is_gil_enabled()) # type: ignore[attr-defined]
23-
CHAINED_WARNING_DISABLED_INPLACE_METHOD = PYPY or PY314
21+
# the refcount for self in a chained __setitem__/.(i)loc indexing/method call
22+
REF_COUNT = 2 if PY314 else 3
23+
REF_COUNT_IDX = 2
24+
REF_COUNT_METHOD = 1 if PY314 else 2
25+
CHAINED_WARNING_DISABLED = PYPY
2426

2527

2628
__all__ = [

pandas/compat/pickle_compat.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
PeriodArray,
2323
TimedeltaArray,
2424
)
25-
from pandas.core.generic import NDFrame
2625
from pandas.core.internals import BlockManager
2726

2827
if TYPE_CHECKING:
@@ -91,10 +90,6 @@ def load_reduce(self) -> None:
9190
cls = args[0]
9291
stack[-1] = NDArrayBacked.__new__(*args)
9392
return
94-
elif args and issubclass(args[0], NDFrame):
95-
cls = args[0]
96-
stack[-1] = cls.__new__(cls)
97-
return
9893
raise
9994

10095
dispatch[pickle.REDUCE[0]] = load_reduce # type: ignore[assignment]

pandas/core/arrays/interval.py

Lines changed: 14 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2100,18 +2100,9 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
21002100
return np.zeros(self.shape, dtype=bool)
21012101

21022102
if self.dtype == values.dtype:
2103-
# GH#38353 instead of casting to object, operating on a
2104-
# complex128 ndarray is much more performant.
2105-
left = self._combined.view("complex128")
2106-
right = values._combined.view("complex128")
2107-
# error: Argument 1 to "isin" has incompatible type
2108-
# "Union[ExtensionArray, ndarray[Any, Any],
2109-
# ndarray[Any, dtype[Any]]]"; expected
2110-
# "Union[_SupportsArray[dtype[Any]],
2111-
# _NestedSequence[_SupportsArray[dtype[Any]]], bool,
2112-
# int, float, complex, str, bytes, _NestedSequence[
2113-
# Union[bool, int, float, complex, str, bytes]]]"
2114-
return np.isin(left, right).ravel() # type: ignore[arg-type]
2103+
left = self._combined
2104+
right = values._combined
2105+
return np.isin(left, right).ravel()
21152106

21162107
elif needs_i8_conversion(self.left.dtype) ^ needs_i8_conversion(
21172108
values.left.dtype
@@ -2127,24 +2118,29 @@ def _combined(self) -> IntervalSide:
21272118
# has no attribute "reshape" [union-attr]
21282119
left = self.left._values.reshape(-1, 1) # type: ignore[union-attr]
21292120
right = self.right._values.reshape(-1, 1) # type: ignore[union-attr]
2121+
# GH#38353 instead of casting to object, operating on a
2122+
# complex128 ndarray is much more performant.
21302123
if needs_i8_conversion(left.dtype):
21312124
# error: Item "ndarray[Any, Any]" of "Any | ndarray[Any, Any]" has
21322125
# no attribute "_concat_same_type"
21332126
comb = left._concat_same_type( # type: ignore[union-attr]
21342127
[left, right], axis=1
21352128
)
2129+
comb = comb.view("complex128")[:, 0]
21362130
else:
2137-
comb = np.concatenate([left, right], axis=1)
2131+
comb = (np.array(left.ravel(), dtype="complex128")) + (
2132+
1j * np.array(right.ravel(), dtype="complex128")
2133+
)
21382134
return comb
21392135

21402136
def _from_combined(self, combined: np.ndarray) -> IntervalArray:
21412137
"""
21422138
Create a new IntervalArray with our dtype from a 1D complex128 ndarray.
21432139
"""
2144-
nc = combined.view("i8").reshape(-1, 2)
21452140

21462141
dtype = self._left.dtype
21472142
if needs_i8_conversion(dtype):
2143+
nc = combined.view("i8").reshape(-1, 2)
21482144
assert isinstance(self._left, (DatetimeArray, TimedeltaArray))
21492145
new_left: DatetimeArray | TimedeltaArray | np.ndarray = type(
21502146
self._left
@@ -2155,18 +2151,13 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray:
21552151
)._from_sequence(nc[:, 1], dtype=dtype)
21562152
else:
21572153
assert isinstance(dtype, np.dtype)
2158-
new_left = nc[:, 0].view(dtype)
2159-
new_right = nc[:, 1].view(dtype)
2154+
new_left = np.real(combined).astype(dtype).ravel()
2155+
new_right = np.imag(combined).astype(dtype).ravel()
21602156
return self._shallow_copy(left=new_left, right=new_right)
21612157

21622158
def unique(self) -> IntervalArray:
2163-
# No overload variant of "__getitem__" of "ExtensionArray" matches argument
2164-
# type "Tuple[slice, int]"
2165-
nc = unique(
2166-
self._combined.view("complex128")[:, 0] # type: ignore[call-overload]
2167-
)
2168-
nc = nc[:, None]
2169-
return self._from_combined(nc)
2159+
nc = unique(self._combined)
2160+
return self._from_combined(np.asarray(nc)[:, None])
21702161

21712162

21722163
def _maybe_convert_platform_interval(values) -> ArrayLike:

0 commit comments

Comments
 (0)