diff --git a/python/pyspark/pandas/namespace.py b/python/pyspark/pandas/namespace.py index 2213bfcc6aa16..3ff617c0ee3fb 100644 --- a/python/pyspark/pandas/namespace.py +++ b/python/pyspark/pandas/namespace.py @@ -64,6 +64,7 @@ FloatType, DoubleType, BooleanType, + NumericType, TimestampType, TimestampNTZType, DecimalType, @@ -3651,6 +3652,8 @@ def to_numeric(arg, errors="raise"): 1.0 """ if isinstance(arg, Series): + if isinstance(arg.spark.data_type, (NumericType, BooleanType)): + return arg.copy() if errors == "coerce": spark_session = arg._internal.spark_frame.sparkSession if is_ansi_mode_enabled(spark_session): diff --git a/python/pyspark/pandas/tests/test_namespace.py b/python/pyspark/pandas/tests/test_namespace.py index 8a267f76c5369..f68a637723f79 100644 --- a/python/pyspark/pandas/tests/test_namespace.py +++ b/python/pyspark/pandas/tests/test_namespace.py @@ -607,6 +607,11 @@ def test_to_numeric(self): lambda: ps.to_numeric(psser, errors="ignore"), ) + # SPARK-54666: Series with numeric dtype should be returned as-is. + pser = pd.Series([-1554478299, 2]) + psser = ps.from_pandas(pser) + self.assert_eq(pd.to_numeric(pser), ps.to_numeric(psser)) + def test_json_normalize(self): # Basic test case with a simple JSON structure data = [