From d0c4d55854e715472b2956f0b309e6ae64a999ed Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 26 Nov 2025 11:31:13 +0100 Subject: [PATCH] ENH: add Series.from_arrow class method for importing through Arrow PyCapsule interface (in addition to DataFrame.from_arrow) --- pandas/core/series.py | 51 +++++++++++++++++++ pandas/tests/series/test_arrow_interface.py | 56 +++++++++++++++++++++ 2 files changed, 107 insertions(+) diff --git a/pandas/core/series.py b/pandas/core/series.py index 6d7e713b7ad6b..abde1f504ea03 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -168,6 +168,8 @@ AnyAll, AnyArrayLike, ArrayLike, + ArrowArrayExportable, + ArrowStreamExportable, Axis, AxisInt, CorrelationMethod, @@ -1833,6 +1835,55 @@ def to_frame(self, name: Hashable = lib.no_default) -> DataFrame: df = self._constructor_expanddim_from_mgr(mgr, axes=mgr.axes) return df.__finalize__(self, method="to_frame") + @classmethod + def from_arrow(cls, data: ArrowArrayExportable | ArrowStreamExportable) -> Series: + """ + Construct a Series from an array-like Arrow object. + + This function accepts any Arrow-compatible array-like object implementing + the `Arrow PyCapsule Protocol`_ (i.e. having an ``__arrow_c_array__`` + or ``__arrow_c_stream__`` method). + + This function currently relies on ``pyarrow`` to convert the object + in Arrow format to pandas. + + .. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html + + .. versionadded:: 3.0 + + Parameters + ---------- + data : pyarrow.Array or Arrow-compatible object + Any array-like object implementing the Arrow PyCapsule Protocol + (i.e. has an ``__arrow_c_array__`` or ``__arrow_c_stream__`` + method). + + Returns + ------- + Series + + """ + pa = import_optional_dependency("pyarrow", min_version="14.0.0") + if not isinstance(data, (pa.Array, pa.ChunkedArray)): + if not ( + hasattr(data, "__arrow_c_array__") + or hasattr(data, "__arrow_c_stream__") + ): + # explicitly test this, because otherwise we would accept variour other + # input types through the pa.chunked_array(..) call + raise TypeError( + "Expected an Arrow-compatible array-like object (i.e. having an " + "'_arrow_c_array__' or '__arrow_c_stream__' method), got " + f"'{type(data).__name__}' instead." + ) + # using chunked_array() as it works for both arrays and streams + pa_array = pa.chunked_array(data) + else: + pa_array = data + + ser = pa_array.to_pandas() + return ser + def _set_name(self, name, inplace: bool = False) -> Series: """ Set the Series name. diff --git a/pandas/tests/series/test_arrow_interface.py b/pandas/tests/series/test_arrow_interface.py index e73cf9bee6aeb..3b4340064ea1b 100644 --- a/pandas/tests/series/test_arrow_interface.py +++ b/pandas/tests/series/test_arrow_interface.py @@ -2,7 +2,10 @@ import pytest +import pandas.util._test_decorators as td + import pandas as pd +import pandas._testing as tm pa = pytest.importorskip("pyarrow", minversion="16.0") @@ -59,3 +62,56 @@ def test_series_arrow_interface_stringdtype(): ca = pa.chunked_array(s) expected = pa.chunked_array([["foo", "bar"]], type=pa.large_string()) assert ca.equals(expected) + + +class ArrowArrayWrapper: + def __init__(self, array): + self.array = array + + def __arrow_c_array__(self, requested_schema=None): + return self.array.__arrow_c_array__(requested_schema) + + +class ArrowStreamWrapper: + def __init__(self, chunked_array): + self.stream = chunked_array + + def __arrow_c_stream__(self, requested_schema=None): + return self.stream.__arrow_c_stream__(requested_schema) + + +@td.skip_if_no("pyarrow", min_version="14.0") +def test_dataframe_from_arrow(): + # objects with __arrow_c_stream__ + arr = pa.chunked_array([[1, 2, 3], [4, 5]]) + + result = pd.Series.from_arrow(arr) + expected = pd.Series([1, 2, 3, 4, 5]) + tm.assert_series_equal(result, expected) + + # not only pyarrow object are supported + result = pd.Series.from_arrow(ArrowStreamWrapper(arr)) + tm.assert_series_equal(result, expected) + + # table works as well, but will be seen as a StructArray + table = pa.table({"a": [1, 2, 3], "b": ["a", "b", "c"]}) + + result = pd.Series.from_arrow(table) + expected = pd.Series([{"a": 1, "b": "a"}, {"a": 2, "b": "b"}, {"a": 3, "b": "c"}]) + tm.assert_series_equal(result, expected) + + # objects with __arrow_c_array__ + arr = pa.array([1, 2, 3]) + + expected = pd.Series([1, 2, 3]) + result = pd.Series.from_arrow(arr) + tm.assert_series_equal(result, expected) + + result = pd.Series.from_arrow(ArrowArrayWrapper(arr)) + tm.assert_series_equal(result, expected) + + # only accept actual Arrow objects + with pytest.raises( + TypeError, match="Expected an Arrow-compatible array-like object" + ): + pd.Series.from_arrow([1, 2, 3])