From 6bc86efe2a27e2d2400ce74c569083420cec1c39 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 1 Dec 2025 14:53:37 +0100 Subject: [PATCH 1/3] API: rename mode.nan_is_na option to future.distinguish_nan_and_na --- doc/source/whatsnew/v3.0.0.rst | 54 +++++++++++++++++++++++++++------ pandas/_config/__init__.py | 4 +-- pandas/conftest.py | 2 +- pandas/core/config_init.py | 19 ++++++------ pandas/io/json/_json.py | 4 +-- pandas/io/json/_table_schema.py | 2 +- 6 files changed, 60 insertions(+), 25 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 222790cfbef6c..fa57395c00813 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -547,29 +547,55 @@ small behavior differences as collateral: Changed treatment of NaN values in pyarrow and numpy-nullable floating dtypes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Previously, when dealing with a nullable dtype (e.g. ``Float64Dtype`` or ``int64[pyarrow]``), ``NaN`` was treated as interchangeable with :class:`NA` in some circumstances but not others. This was done to make adoption easier, but caused some confusion (:issue:`32265`). In 3.0, an option ``"mode.nan_is_na"`` (default ``True``) controls whether to treat ``NaN`` as equivalent to :class:`NA`. +Previously, when dealing with a nullable dtype (e.g. ``Float64Dtype`` or ``int64[pyarrow]``), +``NaN`` was treated as interchangeable with :class:`NA` in some circumstances but not others. +This was done to make adoption easier, but caused some confusion (:issue:`32265`). +In 3.0, this behaviour is made consistent to by default treat ``NaN`` as equivalent +to :class:`NA` in all cases. -With ``pd.set_option("mode.nan_is_na", True)`` (again, this is the default), ``NaN`` can be passed to constructors, ``__setitem__``, ``__contains__`` and be treated the same as :class:`NA`. The only change users will see is that arithmetic and ``np.ufunc`` operations that previously introduced ``NaN`` entries produce :class:`NA` entries instead: +By default, ``NaN`` can be passed to constructors, ``__setitem__``, ``__contains__`` +and will be treated the same as :class:`NA`. The only change users will see is +that arithmetic and ``np.ufunc`` operations that previously introduced ``NaN`` + entries produce :class:`NA` entries instead. *Old behavior:* .. code-block:: ipython - In [2]: ser = pd.Series([0, None], dtype=pd.Float64Dtype()) + # NaN in input gets converted to NA + In [1]: ser = pd.Series([0, np.nan], dtype=pd.Float64Dtype()) + In [2]: ser + Out[2]: + 0 0.0 + 1 + dtype: Float64 + # NaN produced by arithmetic (0/0) remained NaN In [3]: ser / 0 Out[3]: 0 NaN 1 dtype: Float64 + # the NaN value is not considered as missing + In [4]: (ser / 0).isna() + Out[4]: + 0 False + 1 True + dtype: bool *New behavior:* .. ipython:: python - ser = pd.Series([0, None], dtype=pd.Float64Dtype()) + ser = pd.Series([0, np.nan], dtype=pd.Float64Dtype()) + ser ser / 0 + (ser / 0).isna() -By contrast, with ``pd.set_option("mode.nan_is_na", False)``, ``NaN`` is always considered distinct and specifically as a floating-point value, so cannot be used with integer dtypes: +In the future, the intention is to consider ``NaN`` and :class:`NA` as distinct +values, and an option to control this behaviour is added in 3.0 through +``pd.options.future.distinguish_nan_and_na``. When enabled, ``NaN`` is always +considered distinct and specifically as a floating-point value. As a consequence, +it cannot be used with integer dtypes. *Old behavior:* @@ -583,13 +609,21 @@ By contrast, with ``pd.set_option("mode.nan_is_na", False)``, ``NaN`` is always .. ipython:: python - pd.set_option("mode.nan_is_na", False) - ser = pd.Series([1, np.nan], dtype=pd.Float64Dtype()) - ser[1] + with pd.option_context("future.distinguish_nan_and_na", True): + ser = pd.Series([1, np.nan], dtype=pd.Float64Dtype()) + print(ser[1]) + +If we had passed ``pd.Int64Dtype()`` or ``"int64[pyarrow]"`` for the dtype in +the latter example, this would raise, as a float ``NaN`` cannot be held by an +integer dtype. -If we had passed ``pd.Int64Dtype()`` or ``"int64[pyarrow]"`` for the dtype in the latter example, this would raise, as a float ``NaN`` cannot be held by an integer dtype. +With ``"future.distinguish_nan_and_na"`` enabled, ``ser.to_numpy()`` (and +``frame.values`` and ``np.asarray(obj)``) will convert to ``object`` dtype if +:class:`NA` entries are present, where before they would coerce to +``NaN``. To retain a float numpy dtype, explicitly pass ``na_value=np.nan`` +to :meth:`Series.to_numpy`. -With ``"mode.nan_is_na"`` set to ``False``, ``ser.to_numpy()`` (and ``frame.values`` and ``np.asarray(obj)``) will convert to ``object`` dtype if :class:`NA` entries are present, where before they would coerce to ``NaN``. To retain a float numpy dtype, explicitly pass ``na_value=np.nan`` to :meth:`Series.to_numpy`. +Note that the option is experimental and subject to change in future releases. The ``__module__`` attribute now points to public modules ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py index ee709eff2eeae..10a10e53e431d 100644 --- a/pandas/_config/__init__.py +++ b/pandas/_config/__init__.py @@ -36,5 +36,5 @@ def using_string_dtype() -> bool: def is_nan_na() -> bool: - _mode_options = _global_config["mode"] - return _mode_options["nan_is_na"] + _mode_options = _global_config["future"] + return not _mode_options["distinguish_nan_and_na"] diff --git a/pandas/conftest.py b/pandas/conftest.py index ee3320ccea35d..a76e66dbeaca1 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -2127,5 +2127,5 @@ def monkeysession(): @pytest.fixture(params=[True, False]) def using_nan_is_na(request): opt = request.param - with pd.option_context("mode.nan_is_na", opt): + with pd.option_context("future.distinguish_nan_and_na", not opt): yield opt diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 83015f4007793..8f2eeef081a64 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -429,15 +429,6 @@ def is_terminal() -> bool: validator=is_one_of_factory([True, False, "warn"]), ) - cf.register_option( - "nan_is_na", - os.environ.get("PANDAS_NAN_IS_NA", "1") == "1", - "Whether to treat NaN entries as interchangeable with pd.NA in " - "numpy-nullable and pyarrow float dtypes. See discussion in " - "https://github.com/pandas-dev/pandas/issues/32265", - validator=is_one_of_factory([True, False]), - ) - # user warnings chained_assignment = """ @@ -900,5 +891,15 @@ def register_converter_cb(key: str) -> None: validator=is_one_of_factory([True, False]), ) + cf.register_option( + "distinguish_nan_and_na", + os.environ.get("PANDAS_FUTURE_DISTINGUISH_NAN_AND_NA", "0") == "1", + "Whether to treat NaN entries as interchangeable with pd.NA in " + "numpy-nullable and pyarrow float dtypes. See discussion in " + "https://github.com/pandas-dev/pandas/issues/32265", + validator=is_one_of_factory([True, False]), + ) + + # GH#59502 cf.deprecate_option("future.no_silent_downcasting", Pandas4Warning) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 985a192eb79f4..2bd6efd359717 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -997,7 +997,7 @@ def _read_ujson(self) -> DataFrame | Series: else: obj = self._get_object_parser(self.data) if self.dtype_backend is not lib.no_default: - with option_context("mode.nan_is_na", True): + with option_context("future.distinguish_nan_and_na", False): return obj.convert_dtypes( infer_objects=False, dtype_backend=self.dtype_backend ) @@ -1075,7 +1075,7 @@ def __next__(self) -> DataFrame | Series: raise ex if self.dtype_backend is not lib.no_default: - with option_context("mode.nan_is_na", True): + with option_context("future.distinguish_nan_and_na", False): return obj.convert_dtypes( infer_objects=False, dtype_backend=self.dtype_backend ) diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 1723be3de6e82..7ca4e370c8564 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -386,7 +386,7 @@ def parse_table_schema(json, precise_float: bool) -> DataFrame: 'table="orient" can not yet read ISO-formatted Timedelta data' ) - with option_context("mode.nan_is_na", True): + with option_context("future.distinguish_nan_and_na", False): df = df.astype(dtypes) if "primaryKey" in table["schema"]: From 15a5016ef302a1fccad1390e173042e95a135e4c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 1 Dec 2025 23:44:48 +0100 Subject: [PATCH 2/3] fixup doc --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index fa57395c00813..c20a928dc356e 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -556,7 +556,7 @@ to :class:`NA` in all cases. By default, ``NaN`` can be passed to constructors, ``__setitem__``, ``__contains__`` and will be treated the same as :class:`NA`. The only change users will see is that arithmetic and ``np.ufunc`` operations that previously introduced ``NaN`` - entries produce :class:`NA` entries instead. +entries produce :class:`NA` entries instead. *Old behavior:* From 790262a9bc2022579d158c0057904f43e4dd21e1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 1 Dec 2025 23:46:50 +0100 Subject: [PATCH 3/3] update description --- pandas/core/config_init.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 8f2eeef081a64..27b55152ab6dc 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -894,8 +894,10 @@ def register_converter_cb(key: str) -> None: cf.register_option( "distinguish_nan_and_na", os.environ.get("PANDAS_FUTURE_DISTINGUISH_NAN_AND_NA", "0") == "1", - "Whether to treat NaN entries as interchangeable with pd.NA in " - "numpy-nullable and pyarrow float dtypes. See discussion in " + "Whether to treat NaN entries as distinct from pd.NA in " + "numpy-nullable and pyarrow float dtypes. By default treats both " + "interchangeable as missing values (NaN will be coerced to NA). " + "See discussion in " "https://github.com/pandas-dev/pandas/issues/32265", validator=is_one_of_factory([True, False]), )