From fb54089e809d4c3ed2d9d6d5473e1d511099e36b Mon Sep 17 00:00:00 2001 From: Chilin Chiou Date: Sun, 15 Jun 2025 02:16:30 +0800 Subject: [PATCH 1/5] BUG: to_numeric raise ValueError when the arrow array contains NA --- pandas/core/tools/numeric.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 14921457194ca..558a42c21033d 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -222,8 +222,15 @@ def to_numeric( values_dtype = getattr(values, "dtype", None) if isinstance(values_dtype, ArrowDtype): + if is_numeric_dtype(values_dtype): + if is_series: + return arg._constructor(values, index=arg.index, name=arg.name) + else: + return values + mask = values.isna() values = values.dropna().to_numpy() + new_mask: np.ndarray | None = None if is_numeric_dtype(values_dtype): pass From 7c2a65e26aaab853168e74ba9e73068013fc4e0d Mon Sep 17 00:00:00 2001 From: Chilin Chiou Date: Sun, 15 Jun 2025 02:18:41 +0800 Subject: [PATCH 2/5] TST: to_numeric raise ValueError when the arrow array contains NA --- pandas/tests/tools/test_to_numeric.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index 12e6be18244e1..c35eef1f391a9 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -919,3 +919,14 @@ def test_coerce_pyarrow_backend(): result = to_numeric(ser, errors="coerce", dtype_backend="pyarrow") expected = Series([1, 2, None], dtype=ArrowDtype(pa.int64())) tm.assert_series_equal(result, expected) + + +def test_to_numeric_arrow_decimal_with_na(): + # GH 61641 + pa = pytest.importorskip("pyarrow") + decimal_type = ArrowDtype(pa.decimal128(3, scale=2)) + series = Series([1, None], dtype=decimal_type) + result = to_numeric(series, errors="coerce") + + expected = Series([1.00, pd.NA], dtype=decimal_type) + tm.assert_series_equal(result, expected) From 5ac82aac39006a81701a0610f00bfd532e14bfc2 Mon Sep 17 00:00:00 2001 From: Chilin Chiou Date: Sun, 15 Jun 2025 02:19:08 +0800 Subject: [PATCH 3/5] add changelog entry --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 44bc82008e718..7d0e730b7cc4d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1200,6 +1200,7 @@ Other - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`) - Bug in :func:`eval` with ``engine="numexpr"`` returning unexpected result for float division. (:issue:`59736`) - Bug in :func:`to_numeric` raising ``TypeError`` when ``arg`` is a :class:`Timedelta` or :class:`Timestamp` scalar. (:issue:`59944`) +- Bug in :func:`to_numeric` with :class:`ArrowDtype` raising ``ValueError`` when the array contained NA values. (:issue:`61641`) - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`) - Bug in :meth:`DataFrame.apply` raising ``RecursionError`` when passing ``func=list[int]``. (:issue:`61565`) - Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`) From a5e5065c4b23f7f8955f8f20a528a1149de456ba Mon Sep 17 00:00:00 2001 From: Michal Wiszniewski Date: Mon, 3 Nov 2025 19:37:48 -0500 Subject: [PATCH 4/5] Address some PR feedback --- pandas/core/tools/numeric.py | 7 ++----- pandas/tests/tools/test_to_numeric.py | 14 ++++++++++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 558a42c21033d..44b2948a6b362 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -222,11 +222,8 @@ def to_numeric( values_dtype = getattr(values, "dtype", None) if isinstance(values_dtype, ArrowDtype): - if is_numeric_dtype(values_dtype): - if is_series: - return arg._constructor(values, index=arg.index, name=arg.name) - else: - return values + if is_numeric_dtype(values_dtype) and is_series: + return arg._constructor(values, index=arg.index, name=arg.name) mask = values.isna() values = values.dropna().to_numpy() diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index c35eef1f391a9..f8d0c339c3d77 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -921,12 +921,18 @@ def test_coerce_pyarrow_backend(): tm.assert_series_equal(result, expected) -def test_to_numeric_arrow_decimal_with_na(): +@pytest.mark.parametrize( + "dtype", + [ + "ArrowDtype", + ], +) +def test_to_numeric_arrow_decimal_with_na(dtype): # GH 61641 pa = pytest.importorskip("pyarrow") - decimal_type = ArrowDtype(pa.decimal128(3, scale=2)) + target_class = globals()[dtype] + decimal_type = target_class(pa.decimal128(3, scale=2)) series = Series([1, None], dtype=decimal_type) result = to_numeric(series, errors="coerce") - expected = Series([1.00, pd.NA], dtype=decimal_type) - tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, series) From 631410e0b17229be0707fcfa1d82f4b14611a396 Mon Sep 17 00:00:00 2001 From: Michal Wiszniewski Date: Tue, 11 Nov 2025 23:12:13 -0500 Subject: [PATCH 5/5] Add parameterized tests --- pandas/tests/tools/test_to_numeric.py | 92 +++++++++++++++++++++++++-- 1 file changed, 86 insertions(+), 6 deletions(-) diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index 9128a32a8af09..36896fdac5601 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -16,6 +16,25 @@ to_numeric, ) import pandas._testing as tm +from pandas.core.arrays.floating import ( + Float32Dtype, + Float64Dtype, +) +from pandas.core.arrays.integer import ( + Int8Dtype, + Int16Dtype, + Int32Dtype, + Int64Dtype, + UInt8Dtype, + UInt16Dtype, + UInt32Dtype, + UInt64Dtype, +) + +try: + import pyarrow as pa +except ImportError: + pa = None @pytest.fixture(params=[None, "raise", "coerce"]) @@ -904,18 +923,79 @@ def test_coerce_pyarrow_backend(): tm.assert_series_equal(result, expected) +@pytest.mark.parametrize( + "pyarrow_dtype", + [ + pytest.param( + pa.int8(), marks=pytest.mark.skipif(not pa, reason="pyarrow required") + ), + pytest.param( + pa.int16(), marks=pytest.mark.skipif(not pa, reason="pyarrow required") + ), + pytest.param( + pa.int32(), marks=pytest.mark.skipif(not pa, reason="pyarrow required") + ), + pytest.param( + pa.int64(), marks=pytest.mark.skipif(not pa, reason="pyarrow required") + ), + pytest.param( + pa.uint8(), marks=pytest.mark.skipif(not pa, reason="pyarrow required") + ), + pytest.param( + pa.uint16(), marks=pytest.mark.skipif(not pa, reason="pyarrow required") + ), + pytest.param( + pa.uint32(), marks=pytest.mark.skipif(not pa, reason="pyarrow required") + ), + pytest.param( + pa.uint64(), marks=pytest.mark.skipif(not pa, reason="pyarrow required") + ), + pytest.param( + pa.float16(), marks=pytest.mark.skipif(not pa, reason="pyarrow required") + ), + pytest.param( + pa.float32(), marks=pytest.mark.skipif(not pa, reason="pyarrow required") + ), + pytest.param( + pa.float64(), marks=pytest.mark.skipif(not pa, reason="pyarrow required") + ), + pytest.param( + pa.decimal128(10, 2), + marks=pytest.mark.skipif(not pa, reason="pyarrow required"), + ), + pytest.param( + pa.decimal256(10, 2), + marks=pytest.mark.skipif(not pa, reason="pyarrow required"), + ), + ], +) +def test_to_numeric_arrow_decimal_with_na(pyarrow_dtype): + # GH 61641 + numeric_type = ArrowDtype(pyarrow_dtype) + series = Series([1, None], dtype=numeric_type) + result = to_numeric(series, errors="coerce") + + tm.assert_series_equal(result, series) + + @pytest.mark.parametrize( "dtype", [ - "ArrowDtype", + Int8Dtype, + Int16Dtype, + Int32Dtype, + Int64Dtype, + UInt8Dtype, + UInt16Dtype, + UInt32Dtype, + UInt64Dtype, + Float32Dtype, + Float64Dtype, ], ) -def test_to_numeric_arrow_decimal_with_na(dtype): +def test_to_numeric_decimal_with_na(dtype): # GH 61641 - pa = pytest.importorskip("pyarrow") - target_class = globals()[dtype] - decimal_type = target_class(pa.decimal128(3, scale=2)) - series = Series([1, None], dtype=decimal_type) + series = Series([1, None], dtype=dtype()) result = to_numeric(series, errors="coerce") tm.assert_series_equal(result, series)