diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 752d08a526d8c..ba1eb35bc3a39 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1240,6 +1240,7 @@ Other - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`) - Bug in :func:`eval` with ``engine="numexpr"`` returning unexpected result for float division. (:issue:`59736`) - Bug in :func:`to_numeric` raising ``TypeError`` when ``arg`` is a :class:`Timedelta` or :class:`Timestamp` scalar. (:issue:`59944`) +- Bug in :func:`to_numeric` with :class:`ArrowDtype` raising ``ValueError`` when the array contained NA values. (:issue:`61641`) - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`) - Bug in :meth:`DataFrame.apply` raising ``RecursionError`` when passing ``func=list[int]``. (:issue:`61565`) - Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`) diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 14921457194ca..44b2948a6b362 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -222,8 +222,12 @@ def to_numeric( values_dtype = getattr(values, "dtype", None) if isinstance(values_dtype, ArrowDtype): + if is_numeric_dtype(values_dtype) and is_series: + return arg._constructor(values, index=arg.index, name=arg.name) + mask = values.isna() values = values.dropna().to_numpy() + new_mask: np.ndarray | None = None if is_numeric_dtype(values_dtype): pass diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index fcbc91d4c632f..36896fdac5601 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -16,6 +16,25 @@ to_numeric, ) import pandas._testing as tm +from pandas.core.arrays.floating import ( + Float32Dtype, + Float64Dtype, +) +from pandas.core.arrays.integer import ( + Int8Dtype, + Int16Dtype, + Int32Dtype, + Int64Dtype, + UInt8Dtype, + UInt16Dtype, + UInt32Dtype, + UInt64Dtype, +) + +try: + import pyarrow as pa +except ImportError: + pa = None @pytest.fixture(params=[None, "raise", "coerce"]) @@ -902,3 +921,81 @@ def test_coerce_pyarrow_backend(): result = to_numeric(ser, errors="coerce", dtype_backend="pyarrow") expected = Series([1, 2, None], dtype=ArrowDtype(pa.int64())) tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "pyarrow_dtype", + [ + pytest.param( + pa.int8(), marks=pytest.mark.skipif(not pa, reason="pyarrow required") + ), + pytest.param( + pa.int16(), marks=pytest.mark.skipif(not pa, reason="pyarrow required") + ), + pytest.param( + pa.int32(), marks=pytest.mark.skipif(not pa, reason="pyarrow required") + ), + pytest.param( + pa.int64(), marks=pytest.mark.skipif(not pa, reason="pyarrow required") + ), + pytest.param( + pa.uint8(), marks=pytest.mark.skipif(not pa, reason="pyarrow required") + ), + pytest.param( + pa.uint16(), marks=pytest.mark.skipif(not pa, reason="pyarrow required") + ), + pytest.param( + pa.uint32(), marks=pytest.mark.skipif(not pa, reason="pyarrow required") + ), + pytest.param( + pa.uint64(), marks=pytest.mark.skipif(not pa, reason="pyarrow required") + ), + pytest.param( + pa.float16(), marks=pytest.mark.skipif(not pa, reason="pyarrow required") + ), + pytest.param( + pa.float32(), marks=pytest.mark.skipif(not pa, reason="pyarrow required") + ), + pytest.param( + pa.float64(), marks=pytest.mark.skipif(not pa, reason="pyarrow required") + ), + pytest.param( + pa.decimal128(10, 2), + marks=pytest.mark.skipif(not pa, reason="pyarrow required"), + ), + pytest.param( + pa.decimal256(10, 2), + marks=pytest.mark.skipif(not pa, reason="pyarrow required"), + ), + ], +) +def test_to_numeric_arrow_decimal_with_na(pyarrow_dtype): + # GH 61641 + numeric_type = ArrowDtype(pyarrow_dtype) + series = Series([1, None], dtype=numeric_type) + result = to_numeric(series, errors="coerce") + + tm.assert_series_equal(result, series) + + +@pytest.mark.parametrize( + "dtype", + [ + Int8Dtype, + Int16Dtype, + Int32Dtype, + Int64Dtype, + UInt8Dtype, + UInt16Dtype, + UInt32Dtype, + UInt64Dtype, + Float32Dtype, + Float64Dtype, + ], +) +def test_to_numeric_decimal_with_na(dtype): + # GH 61641 + series = Series([1, None], dtype=dtype()) + result = to_numeric(series, errors="coerce") + + tm.assert_series_equal(result, series)