From b1d9dc24416c37d01f4691fac4446578842a0793 Mon Sep 17 00:00:00 2001 From: Devin Petersohn Date: Fri, 20 Feb 2026 13:21:25 -0600 Subject: [PATCH] [SPARK-54666][PS] Leave numeric types unchanged on `to_numeric` Signed-off-by: Devin Petersohn Co-authored-by: Devin Petersohn --- python/pyspark/pandas/namespace.py | 3 +++ python/pyspark/pandas/tests/test_namespace.py | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/python/pyspark/pandas/namespace.py b/python/pyspark/pandas/namespace.py index 2213bfcc6aa16..3ff617c0ee3fb 100644 --- a/python/pyspark/pandas/namespace.py +++ b/python/pyspark/pandas/namespace.py @@ -64,6 +64,7 @@ FloatType, DoubleType, BooleanType, + NumericType, TimestampType, TimestampNTZType, DecimalType, @@ -3651,6 +3652,8 @@ def to_numeric(arg, errors="raise"): 1.0 """ if isinstance(arg, Series): + if isinstance(arg.spark.data_type, (NumericType, BooleanType)): + return arg.copy() if errors == "coerce": spark_session = arg._internal.spark_frame.sparkSession if is_ansi_mode_enabled(spark_session): diff --git a/python/pyspark/pandas/tests/test_namespace.py b/python/pyspark/pandas/tests/test_namespace.py index 8a267f76c5369..f68a637723f79 100644 --- a/python/pyspark/pandas/tests/test_namespace.py +++ b/python/pyspark/pandas/tests/test_namespace.py @@ -607,6 +607,11 @@ def test_to_numeric(self): lambda: ps.to_numeric(psser, errors="ignore"), ) + # SPARK-54666: Series with numeric dtype should be returned as-is. + pser = pd.Series([-1554478299, 2]) + psser = ps.from_pandas(pser) + self.assert_eq(pd.to_numeric(pser), ps.to_numeric(psser)) + def test_json_normalize(self): # Basic test case with a simple JSON structure data = [