From f61ded5c4e291110ce07abd7d0669c9677fac663 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Diridollou?= Date: Sun, 9 Nov 2025 20:59:06 -0500 Subject: [PATCH 1/4] GH1409 Improve Series.to_numpy typing --- pandas-stubs/core/series.pyi | 48 +++++++++++++++++++ tests/series/test_series.py | 89 +++++++++++++++++++++++++++++++++--- 2 files changed, 131 insertions(+), 6 deletions(-) diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index f927835ce..3796d5a94 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -4494,6 +4494,54 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame): **kwargs: Any, ) -> np_1darray[_T_INTERVAL_NP]: ... @overload + def to_numpy( + self: Series[int], + dtype: DTypeLike | None = None, + copy: bool = False, + na_value: Scalar = ..., + **kwargs: Any, + ) -> np_1darray[np.integer]: ... + @overload + def to_numpy( + self: Series[float], + dtype: DTypeLike | None = None, + copy: bool = False, + na_value: Scalar = ..., + **kwargs: Any, + ) -> np_1darray[np.floating]: ... + @overload + def to_numpy( + self: Series[complex], + dtype: DTypeLike | None = None, + copy: bool = False, + na_value: Scalar = ..., + **kwargs: Any, + ) -> np_1darray[np.complexfloating]: ... + @overload + def to_numpy( + self: Series[bool], + dtype: DTypeLike | None = None, + copy: bool = False, + na_value: Scalar = ..., + **kwargs: Any, + ) -> np_1darray[np.bool_]: ... + @overload + def to_numpy( + self: Series[_str], + dtype: DTypeLike | None = None, + copy: bool = False, + na_value: Scalar = ..., + **kwargs: Any, + ) -> np_1darray[np.str_]: ... + @overload + def to_numpy( + self: Series[bytes], + dtype: DTypeLike | None = None, + copy: bool = False, + na_value: Scalar = ..., + **kwargs: Any, + ) -> np_1darray[np.bytes_]: ... + @overload def to_numpy( # pyright: ignore[reportIncompatibleMethodOverride] self, dtype: DTypeLike | None = None, diff --git a/tests/series/test_series.py b/tests/series/test_series.py index 474649999..95d7ac4ec 100644 --- a/tests/series/test_series.py +++ b/tests/series/test_series.py @@ -45,6 +45,7 @@ ) import xarray as xr +from pandas._libs.tslibs.offsets import Day from pandas._typing import ( DtypeObj, Scalar, @@ -1980,16 +1981,92 @@ def test_dtype_type() -> None: def test_types_to_numpy() -> None: s = pd.Series(["a", "b", "c"], dtype=str) - check(assert_type(s.to_numpy(), np_1darray), np_1darray) - check(assert_type(s.to_numpy(dtype="str", copy=True), np_1darray), np_1darray) - check(assert_type(s.to_numpy(na_value=0), np_1darray), np_1darray) - check(assert_type(s.to_numpy(na_value=np.int32(4)), np_1darray), np_1darray) - check(assert_type(s.to_numpy(na_value=np.float16(4)), np_1darray), np_1darray) - check(assert_type(s.to_numpy(na_value=np.complex128(4, 7)), np_1darray), np_1darray) + check(assert_type(s.to_numpy(), np_1darray[np.str_]), np_1darray) + check( + assert_type(s.to_numpy(dtype="str", copy=True), np_1darray[np.str_]), np_1darray + ) + check(assert_type(s.to_numpy(na_value=0), np_1darray[np.str_]), np_1darray) + check( + assert_type(s.to_numpy(na_value=np.int32(4)), np_1darray[np.str_]), np_1darray + ) + check( + assert_type(s.to_numpy(na_value=np.float16(4)), np_1darray[np.str_]), np_1darray + ) + check( + assert_type(s.to_numpy(na_value=np.complex128(4, 7)), np_1darray[np.str_]), + np_1darray, + ) check(assert_type(pd.Series().to_numpy(), np_1darray), np_1darray) +def test_to_numpy() -> None: + """Test Series.to_numpy for different types.""" + s1 = pd.Series(["a", "b", "c"], dtype=str) + check(assert_type(s1.to_numpy(), np_1darray[np.str_]), np_1darray, str) + + s2 = pd.Series(["a", "b", "c"]).astype(bytes) + check(assert_type(s2.to_numpy(), np_1darray[np.bytes_]), np_1darray, np.bytes_) + + s3 = pd.Series([True, False]) + check(assert_type(s3.to_numpy(), np_1darray[np.bool_]), np_1darray, np.bool_) + + s4 = pd.Series([2, 3, 4]) + check(assert_type(s4.to_numpy(), np_1darray[np.integer]), np_1darray, np.integer) + + s5 = pd.Series([2.0, 3.54, 4.84]) + check(assert_type(s5.to_numpy(), np_1darray[np.floating]), np_1darray, np.floating) + + s6 = pd.Series([2.0 + 2j, 3.54 + 4j, 4.84]) + check( + assert_type(s6.to_numpy(), np_1darray[np.complexfloating]), + np_1darray, + np.complexfloating, + ) + + dates = pd.Series( + [ + pd.Timestamp("2020-01-01"), + pd.Timestamp("2020-01-15"), + pd.Timestamp("2020-02-01"), + ], + dtype="datetime64[ns]", + ) + s7 = pd.Series(pd.PeriodIndex(dates, freq="M")) + check(assert_type(s7.to_numpy(), np_1darray[np.object_]), np_1darray, pd.Period) + + s8 = pd.Series( + [ + pd.Interval(date, date + pd.DateOffset(days=1), closed="left") + for date in dates + ] + ) + check(assert_type(s8.to_numpy(), np_1darray[np.object_]), np_1darray, pd.Interval) + + s9 = ( + pd.Series(pd.period_range(start="2017-01-01", end="2017-02-01", freq="1D")) + .diff() + .iloc[1:] + ) + check(assert_type(s9.to_numpy(), np_1darray[np.object_]), np_1darray, Day) + + s10 = pd.Series(pd.date_range(start="2017-01-01", end="2017-02-01")) + check( + assert_type(s10.to_numpy(), np_1darray[np.datetime64]), + np_1darray, + np.datetime64, + ) + + s11 = pd.Series( + [datetime.datetime.now().date(), datetime.datetime.now().date()] + ).diff() + check( + assert_type(s11.to_numpy(), np_1darray[np.timedelta64]), + np_1darray, + np.timedelta64, + ) + + def test_where() -> None: s = pd.Series([1, 2, 3], dtype=int) From 126b78f86ff75a255d287d9e263f24bf406ada86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Diridollou?= Date: Mon, 10 Nov 2025 17:59:13 -0500 Subject: [PATCH 2/4] GH1409 PR Feedback --- tests/series/test_series.py | 52 +++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/tests/series/test_series.py b/tests/series/test_series.py index 95d7ac4ec..666758c2b 100644 --- a/tests/series/test_series.py +++ b/tests/series/test_series.py @@ -2002,24 +2002,28 @@ def test_types_to_numpy() -> None: def test_to_numpy() -> None: """Test Series.to_numpy for different types.""" - s1 = pd.Series(["a", "b", "c"], dtype=str) - check(assert_type(s1.to_numpy(), np_1darray[np.str_]), np_1darray, str) + s_str = pd.Series(["a", "b", "c"], dtype=str) + check(assert_type(s_str.to_numpy(), np_1darray[np.str_]), np_1darray, str) - s2 = pd.Series(["a", "b", "c"]).astype(bytes) - check(assert_type(s2.to_numpy(), np_1darray[np.bytes_]), np_1darray, np.bytes_) + s_bytes = pd.Series(["a", "b", "c"]).astype(bytes) + check(assert_type(s_bytes.to_numpy(), np_1darray[np.bytes_]), np_1darray, np.bytes_) - s3 = pd.Series([True, False]) - check(assert_type(s3.to_numpy(), np_1darray[np.bool_]), np_1darray, np.bool_) + s_bool = pd.Series([True, False]) + check(assert_type(s_bool.to_numpy(), np_1darray[np.bool_]), np_1darray, np.bool_) - s4 = pd.Series([2, 3, 4]) - check(assert_type(s4.to_numpy(), np_1darray[np.integer]), np_1darray, np.integer) + s_int = pd.Series([2, 3, 4]) + check(assert_type(s_int.to_numpy(), np_1darray[np.integer]), np_1darray, np.integer) - s5 = pd.Series([2.0, 3.54, 4.84]) - check(assert_type(s5.to_numpy(), np_1darray[np.floating]), np_1darray, np.floating) + s_float = pd.Series([2.0, 3.54, 4.84]) + check( + assert_type(s_float.to_numpy(), np_1darray[np.floating]), + np_1darray, + np.floating, + ) - s6 = pd.Series([2.0 + 2j, 3.54 + 4j, 4.84]) + s_complex = pd.Series([2.0 + 2j, 3.54 + 4j, 4.84]) check( - assert_type(s6.to_numpy(), np_1darray[np.complexfloating]), + assert_type(s_complex.to_numpy(), np_1darray[np.complexfloating]), np_1darray, np.complexfloating, ) @@ -2032,8 +2036,10 @@ def test_to_numpy() -> None: ], dtype="datetime64[ns]", ) - s7 = pd.Series(pd.PeriodIndex(dates, freq="M")) - check(assert_type(s7.to_numpy(), np_1darray[np.object_]), np_1darray, pd.Period) + s_period = pd.PeriodIndex(dates, freq="M").to_series() + check( + assert_type(s_period.to_numpy(), np_1darray[np.object_]), np_1darray, pd.Period + ) s8 = pd.Series( [ @@ -2043,25 +2049,21 @@ def test_to_numpy() -> None: ) check(assert_type(s8.to_numpy(), np_1darray[np.object_]), np_1darray, pd.Interval) - s9 = ( - pd.Series(pd.period_range(start="2017-01-01", end="2017-02-01", freq="1D")) - .diff() - .iloc[1:] - ) - check(assert_type(s9.to_numpy(), np_1darray[np.object_]), np_1darray, Day) + s_interval = pd.Series([Day(1)]) + check(assert_type(s_interval.to_numpy(), np_1darray[np.object_]), np_1darray, Day) - s10 = pd.Series(pd.date_range(start="2017-01-01", end="2017-02-01")) + s_date = pd.Series(pd.date_range(start="2017-01-01", end="2017-02-01")) check( - assert_type(s10.to_numpy(), np_1darray[np.datetime64]), + assert_type(s_date.to_numpy(), np_1darray[np.datetime64]), np_1darray, np.datetime64, ) - s11 = pd.Series( - [datetime.datetime.now().date(), datetime.datetime.now().date()] + s_timedelta = pd.Series( + [pd.Timestamp.now().date(), pd.Timestamp.now().date()] ).diff() check( - assert_type(s11.to_numpy(), np_1darray[np.timedelta64]), + assert_type(s_timedelta.to_numpy(), np_1darray[np.timedelta64]), np_1darray, np.timedelta64, ) From d07cb978590d869a388699b3e3865f3dfe41dc7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Diridollou?= Date: Tue, 11 Nov 2025 10:34:41 -0500 Subject: [PATCH 3/4] GH1409 PR Feedback --- pandas-stubs/_typing.pyi | 9 +++++++++ tests/__init__.py | 18 +++++++++++++++++ tests/series/test_series.py | 39 +++++++++++++++++++++++-------------- 3 files changed, 51 insertions(+), 15 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index ef1398965..8b4c1417e 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -872,6 +872,15 @@ ShapeT = TypeVar("ShapeT", bound=tuple[int, ...], default=tuple[Any, ...]) np_ndarray: TypeAlias = np.ndarray[ShapeT, np.dtype[GenericT]] # Numpy arrays with known shape (Do not use as argument types, only as return types) np_1darray: TypeAlias = np.ndarray[tuple[int], np.dtype[GenericT]] +np_1darray_bool: TypeAlias = np_1darray[np.bool_] +np_1darray_str: TypeAlias = np_1darray[np.str_] +np_1darray_bytes: TypeAlias = np_1darray[np.bytes_] +np_1darray_anyint: TypeAlias = np_1darray[np.integer] +np_1darray_float: TypeAlias = np_1darray[np.floating] +np_1darray_complex: TypeAlias = np_1darray[np.complexfloating] +np_1darray_datetime: TypeAlias = np_1darray[np.datetime64] +np_1darray_timedelta: TypeAlias = np_1darray[np.timedelta64] +np_1darray_object: TypeAlias = np_1darray[np.object_] np_2darray: TypeAlias = np.ndarray[tuple[int, int], np.dtype[GenericT]] class SupportsDType(Protocol[GenericT_co]): diff --git a/tests/__init__.py b/tests/__init__.py index 0e10b6b24..c368cbe36 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -45,6 +45,15 @@ UIntDtypeArg as UIntDtypeArg, VoidDtypeArg as VoidDtypeArg, np_1darray as np_1darray, + np_1darray_anyint as np_1darray_anyint, + np_1darray_bool as np_1darray_bool, + np_1darray_bytes as np_1darray_bytes, + np_1darray_complex as np_1darray_complex, + np_1darray_datetime as np_1darray_datetime, + np_1darray_float as np_1darray_float, + np_1darray_object as np_1darray_object, + np_1darray_str as np_1darray_str, + np_1darray_timedelta as np_1darray_timedelta, np_2darray as np_2darray, np_ndarray as np_ndarray, np_ndarray_bool as np_ndarray_bool, @@ -55,6 +64,15 @@ _S = TypeVar("_S", bound=tuple[int, ...]) # Separately define here so pytest works np_1darray: TypeAlias = np.ndarray[tuple[int], np.dtype[_G]] + np_1darray_bool: TypeAlias = np.ndarray[tuple[int], np.bool_] + np_1darray_str: TypeAlias = np.ndarray[tuple[int], np.str_] + np_1darray_bytes: TypeAlias = np.ndarray[tuple[int], np.bytes_] + np_1darray_anyint: TypeAlias = np.ndarray[tuple[int], np.integer] + np_1darray_float: TypeAlias = np.ndarray[tuple[int], np.floating] + np_1darray_complex: TypeAlias = np.ndarray[tuple[int], np.complexfloating] + np_1darray_datetime: TypeAlias = np.ndarray[tuple[int], np.datetime64] + np_1darray_timedelta: TypeAlias = np.ndarray[tuple[int], np.timedelta64] + np_1darray_object: TypeAlias = np.ndarray[tuple[int], np.object_] np_2darray: TypeAlias = np.ndarray[tuple[int, int], np.dtype[_G]] np_ndarray: TypeAlias = np.ndarray[_S, np.dtype[_G]] np_ndarray_bool: TypeAlias = npt.NDArray[np.bool_] diff --git a/tests/series/test_series.py b/tests/series/test_series.py index 666758c2b..56e57ee25 100644 --- a/tests/series/test_series.py +++ b/tests/series/test_series.py @@ -60,6 +60,15 @@ check, ensure_clean, np_1darray, + np_1darray_anyint, + np_1darray_bool, + np_1darray_bytes, + np_1darray_complex, + np_1darray_datetime, + np_1darray_float, + np_1darray_object, + np_1darray_str, + np_1darray_timedelta, pytest_warns_bounded, ) from tests.extension.decimal.array import DecimalDtype @@ -2003,27 +2012,27 @@ def test_types_to_numpy() -> None: def test_to_numpy() -> None: """Test Series.to_numpy for different types.""" s_str = pd.Series(["a", "b", "c"], dtype=str) - check(assert_type(s_str.to_numpy(), np_1darray[np.str_]), np_1darray, str) + check(assert_type(s_str.to_numpy(), np_1darray_str), np_1darray, str) s_bytes = pd.Series(["a", "b", "c"]).astype(bytes) - check(assert_type(s_bytes.to_numpy(), np_1darray[np.bytes_]), np_1darray, np.bytes_) + check(assert_type(s_bytes.to_numpy(), np_1darray_bytes), np_1darray, np.bytes_) s_bool = pd.Series([True, False]) - check(assert_type(s_bool.to_numpy(), np_1darray[np.bool_]), np_1darray, np.bool_) + check(assert_type(s_bool.to_numpy(), np_1darray_bool), np_1darray, np.bool_) s_int = pd.Series([2, 3, 4]) - check(assert_type(s_int.to_numpy(), np_1darray[np.integer]), np_1darray, np.integer) + check(assert_type(s_int.to_numpy(), np_1darray_anyint), np_1darray, np.integer) s_float = pd.Series([2.0, 3.54, 4.84]) check( - assert_type(s_float.to_numpy(), np_1darray[np.floating]), + assert_type(s_float.to_numpy(), np_1darray_float), np_1darray, np.floating, ) s_complex = pd.Series([2.0 + 2j, 3.54 + 4j, 4.84]) check( - assert_type(s_complex.to_numpy(), np_1darray[np.complexfloating]), + assert_type(s_complex.to_numpy(), np_1darray_complex), np_1darray, np.complexfloating, ) @@ -2037,24 +2046,24 @@ def test_to_numpy() -> None: dtype="datetime64[ns]", ) s_period = pd.PeriodIndex(dates, freq="M").to_series() - check( - assert_type(s_period.to_numpy(), np_1darray[np.object_]), np_1darray, pd.Period - ) + check(assert_type(s_period.to_numpy(), np_1darray_object), np_1darray, pd.Period) - s8 = pd.Series( + s_interval = pd.Series( [ pd.Interval(date, date + pd.DateOffset(days=1), closed="left") for date in dates ] ) - check(assert_type(s8.to_numpy(), np_1darray[np.object_]), np_1darray, pd.Interval) + check( + assert_type(s_interval.to_numpy(), np_1darray_object), np_1darray, pd.Interval + ) - s_interval = pd.Series([Day(1)]) - check(assert_type(s_interval.to_numpy(), np_1darray[np.object_]), np_1darray, Day) + s_day = pd.Series([Day(1)]) + check(assert_type(s_day.to_numpy(), np_1darray_object), np_1darray, Day) s_date = pd.Series(pd.date_range(start="2017-01-01", end="2017-02-01")) check( - assert_type(s_date.to_numpy(), np_1darray[np.datetime64]), + assert_type(s_date.to_numpy(), np_1darray_datetime), np_1darray, np.datetime64, ) @@ -2063,7 +2072,7 @@ def test_to_numpy() -> None: [pd.Timestamp.now().date(), pd.Timestamp.now().date()] ).diff() check( - assert_type(s_timedelta.to_numpy(), np_1darray[np.timedelta64]), + assert_type(s_timedelta.to_numpy(), np_1darray_timedelta), np_1darray, np.timedelta64, ) From 7e4795bb311d05233749b52cf947e3331fcbbfdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Diridollou?= Date: Tue, 11 Nov 2025 10:47:46 -0500 Subject: [PATCH 4/4] GH1409 PR Feedback --- pandas-stubs/core/series.pyi | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index f2113ed60..96f73a950 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -220,10 +220,16 @@ from pandas._typing import ( WriteBuffer, _T_co, np_1darray, + np_1darray_anyint, np_1darray_bool, + np_1darray_bytes, + np_1darray_complex, np_1darray_dt, + np_1darray_float, np_1darray_int64, np_1darray_intp, + np_1darray_object, + np_1darray_str, np_1darray_td, np_ndarray, np_ndarray_anyint, @@ -4469,7 +4475,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame): copy: bool = False, na_value: Scalar = ..., **kwargs: Any, - ) -> np_1darray[np.object_]: ... + ) -> np_1darray_object: ... @overload def to_numpy( self: Series[Period], @@ -4485,7 +4491,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame): copy: bool = False, na_value: Scalar = ..., **kwargs: Any, - ) -> np_1darray[np.object_]: ... + ) -> np_1darray_object: ... @overload def to_numpy( self: Series[BaseOffset], @@ -4493,7 +4499,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame): copy: bool = False, na_value: Scalar = ..., **kwargs: Any, - ) -> np_1darray[np.bytes_]: ... + ) -> np_1darray_bytes: ... @overload def to_numpy( self: Series[Interval], @@ -4501,7 +4507,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame): copy: bool = False, na_value: Scalar = ..., **kwargs: Any, - ) -> np_1darray[np.object_]: ... + ) -> np_1darray_object: ... @overload def to_numpy( self: Series[Interval], @@ -4517,7 +4523,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame): copy: bool = False, na_value: Scalar = ..., **kwargs: Any, - ) -> np_1darray[np.integer]: ... + ) -> np_1darray_anyint: ... @overload def to_numpy( self: Series[float], @@ -4525,7 +4531,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame): copy: bool = False, na_value: Scalar = ..., **kwargs: Any, - ) -> np_1darray[np.floating]: ... + ) -> np_1darray_float: ... @overload def to_numpy( self: Series[complex], @@ -4533,7 +4539,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame): copy: bool = False, na_value: Scalar = ..., **kwargs: Any, - ) -> np_1darray[np.complexfloating]: ... + ) -> np_1darray_complex: ... @overload def to_numpy( self: Series[bool], @@ -4541,7 +4547,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame): copy: bool = False, na_value: Scalar = ..., **kwargs: Any, - ) -> np_1darray[np.bool_]: ... + ) -> np_1darray_bool: ... @overload def to_numpy( self: Series[_str], @@ -4549,7 +4555,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame): copy: bool = False, na_value: Scalar = ..., **kwargs: Any, - ) -> np_1darray[np.str_]: ... + ) -> np_1darray_str: ... @overload def to_numpy( self: Series[bytes], @@ -4557,7 +4563,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame): copy: bool = False, na_value: Scalar = ..., **kwargs: Any, - ) -> np_1darray[np.bytes_]: ... + ) -> np_1darray_bytes: ... @overload def to_numpy( # pyright: ignore[reportIncompatibleMethodOverride] self,