Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@
AnyAll,
AnyArrayLike,
ArrayLike,
ArrowArrayExportable,
ArrowStreamExportable,
Axis,
AxisInt,
CorrelationMethod,
Expand Down Expand Up @@ -1833,6 +1835,55 @@ def to_frame(self, name: Hashable = lib.no_default) -> DataFrame:
df = self._constructor_expanddim_from_mgr(mgr, axes=mgr.axes)
return df.__finalize__(self, method="to_frame")

@classmethod
def from_arrow(cls, data: ArrowArrayExportable | ArrowStreamExportable) -> Series:
"""
Construct a Series from an array-like Arrow object.

This function accepts any Arrow-compatible array-like object implementing
the `Arrow PyCapsule Protocol`_ (i.e. having an ``__arrow_c_array__``
or ``__arrow_c_stream__`` method).

This function currently relies on ``pyarrow`` to convert the object
in Arrow format to pandas.

.. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html

.. versionadded:: 3.0

Parameters
----------
data : pyarrow.Array or Arrow-compatible object
Any array-like object implementing the Arrow PyCapsule Protocol
(i.e. has an ``__arrow_c_array__`` or ``__arrow_c_stream__``
method).

Returns
-------
Series

"""
pa = import_optional_dependency("pyarrow", min_version="14.0.0")
if not isinstance(data, (pa.Array, pa.ChunkedArray)):
if not (
hasattr(data, "__arrow_c_array__")
or hasattr(data, "__arrow_c_stream__")
):
# explicitly test this, because otherwise we would accept variour other
# input types through the pa.chunked_array(..) call
raise TypeError(
"Expected an Arrow-compatible array-like object (i.e. having an "
"'_arrow_c_array__' or '__arrow_c_stream__' method), got "
f"'{type(data).__name__}' instead."
)
# using chunked_array() as it works for both arrays and streams
pa_array = pa.chunked_array(data)
else:
pa_array = data

ser = pa_array.to_pandas()
return ser

def _set_name(self, name, inplace: bool = False) -> Series:
"""
Set the Series name.
Expand Down
56 changes: 56 additions & 0 deletions pandas/tests/series/test_arrow_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

import pytest

import pandas.util._test_decorators as td

import pandas as pd
import pandas._testing as tm

pa = pytest.importorskip("pyarrow", minversion="16.0")

Expand Down Expand Up @@ -59,3 +62,56 @@ def test_series_arrow_interface_stringdtype():
ca = pa.chunked_array(s)
expected = pa.chunked_array([["foo", "bar"]], type=pa.large_string())
assert ca.equals(expected)


class ArrowArrayWrapper:
def __init__(self, array):
self.array = array

def __arrow_c_array__(self, requested_schema=None):
return self.array.__arrow_c_array__(requested_schema)


class ArrowStreamWrapper:
def __init__(self, chunked_array):
self.stream = chunked_array

def __arrow_c_stream__(self, requested_schema=None):
return self.stream.__arrow_c_stream__(requested_schema)


@td.skip_if_no("pyarrow", min_version="14.0")
def test_dataframe_from_arrow():
# objects with __arrow_c_stream__
arr = pa.chunked_array([[1, 2, 3], [4, 5]])

result = pd.Series.from_arrow(arr)
expected = pd.Series([1, 2, 3, 4, 5])
tm.assert_series_equal(result, expected)

# not only pyarrow object are supported
result = pd.Series.from_arrow(ArrowStreamWrapper(arr))
tm.assert_series_equal(result, expected)

# table works as well, but will be seen as a StructArray
table = pa.table({"a": [1, 2, 3], "b": ["a", "b", "c"]})

result = pd.Series.from_arrow(table)
expected = pd.Series([{"a": 1, "b": "a"}, {"a": 2, "b": "b"}, {"a": 3, "b": "c"}])
tm.assert_series_equal(result, expected)

# objects with __arrow_c_array__
arr = pa.array([1, 2, 3])

expected = pd.Series([1, 2, 3])
result = pd.Series.from_arrow(arr)
tm.assert_series_equal(result, expected)

result = pd.Series.from_arrow(ArrowArrayWrapper(arr))
tm.assert_series_equal(result, expected)

# only accept actual Arrow objects
with pytest.raises(
TypeError, match="Expected an Arrow-compatible array-like object"
):
pd.Series.from_arrow([1, 2, 3])
Loading