API: rename mode.nan_is_na option to future.distinguish_nan_and_na (#63241)

jorisvandenbossche · web-flow · commit 787ad728443e · 2025-12-02T07:02:08.000-08:00
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -552,29 +552,55 @@ small behavior differences as collateral:
 Changed treatment of NaN values in pyarrow and numpy-nullable floating dtypes
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Previously, when dealing with a nullable dtype (e.g. ``Float64Dtype`` or ``int64[pyarrow]``), ``NaN`` was treated as interchangeable with :class:`NA` in some circumstances but not others. This was done to make adoption easier, but caused some confusion (:issue:`32265`). In 3.0, an option ``"mode.nan_is_na"`` (default ``True``) controls whether to treat ``NaN`` as equivalent to :class:`NA`.
+Previously, when dealing with a nullable dtype (e.g. ``Float64Dtype`` or ``int64[pyarrow]``),
+``NaN`` was treated as interchangeable with :class:`NA` in some circumstances but not others.
+This was done to make adoption easier, but caused some confusion (:issue:`32265`).
+In 3.0, this behaviour is made consistent to by default treat ``NaN`` as equivalent
+to :class:`NA` in all cases.
 
-With ``pd.set_option("mode.nan_is_na", True)`` (again, this is the default), ``NaN`` can be passed to constructors, ``__setitem__``, ``__contains__`` and be treated the same as :class:`NA`. The only change users will see is that arithmetic and ``np.ufunc`` operations that previously introduced ``NaN`` entries produce :class:`NA` entries instead:
+By default, ``NaN`` can be passed to constructors, ``__setitem__``, ``__contains__``
+and will be treated the same as :class:`NA`. The only change users will see is
+that arithmetic and ``np.ufunc`` operations that previously introduced ``NaN``
+entries produce :class:`NA` entries instead.
 
 *Old behavior:*
 
 .. code-block:: ipython
 
-    In [2]: ser = pd.Series([0, None], dtype=pd.Float64Dtype())
+    # NaN in input gets converted to NA
+    In [1]: ser = pd.Series([0, np.nan], dtype=pd.Float64Dtype())
+    In [2]: ser
+    Out[2]:
+    0     0.0
+    1    <NA>
+    dtype: Float64
+    # NaN produced by arithmetic (0/0) remained NaN
     In [3]: ser / 0
     Out[3]:
     0     NaN
     1    <NA>
     dtype: Float64
+    # the NaN value is not considered as missing
+    In [4]: (ser / 0).isna()
+    Out[4]:
+    0    False
+    1     True
+    dtype: bool
 
 *New behavior:*
 
 .. ipython:: python
 
-    ser = pd.Series([0, None], dtype=pd.Float64Dtype())
+    ser = pd.Series([0, np.nan], dtype=pd.Float64Dtype())
+    ser
     ser / 0
+    (ser / 0).isna()
 
-By contrast, with ``pd.set_option("mode.nan_is_na", False)``, ``NaN`` is always considered distinct and specifically as a floating-point value, so cannot be used with integer dtypes:
+In the future, the intention is to consider ``NaN`` and :class:`NA` as distinct
+values, and an option to control this behaviour is added in 3.0 through
+``pd.options.future.distinguish_nan_and_na``. When enabled, ``NaN`` is always
+considered distinct and specifically as a floating-point value. As a consequence,
+it cannot be used with integer dtypes.
 
 *Old behavior:*
 
@@ -588,13 +614,21 @@ By contrast, with ``pd.set_option("mode.nan_is_na", False)``, ``NaN`` is always
 
 .. ipython:: python
 
-    pd.set_option("mode.nan_is_na", False)
-    ser = pd.Series([1, np.nan], dtype=pd.Float64Dtype())
-    ser[1]
+    with pd.option_context("future.distinguish_nan_and_na", True):
+        ser = pd.Series([1, np.nan], dtype=pd.Float64Dtype())
+        print(ser[1])
+
+If we had passed ``pd.Int64Dtype()`` or ``"int64[pyarrow]"`` for the dtype in
+the latter example, this would raise, as a float ``NaN`` cannot be held by an
+integer dtype.
 
-If we had passed ``pd.Int64Dtype()`` or ``"int64[pyarrow]"`` for the dtype in the latter example, this would raise, as a float ``NaN`` cannot be held by an integer dtype.
+With ``"future.distinguish_nan_and_na"`` enabled, ``ser.to_numpy()`` (and
+``frame.values`` and ``np.asarray(obj)``) will convert to ``object`` dtype if
+:class:`NA` entries are present, where before they would coerce to
+``NaN``.  To retain a float numpy dtype, explicitly pass ``na_value=np.nan``
+to :meth:`Series.to_numpy`.
 
-With ``"mode.nan_is_na"`` set to ``False``, ``ser.to_numpy()`` (and ``frame.values`` and ``np.asarray(obj)``) will convert to ``object`` dtype if :class:`NA` entries are present, where before they would coerce to ``NaN``.  To retain a float numpy dtype, explicitly pass ``na_value=np.nan`` to :meth:`Series.to_numpy`.
+Note that the option is experimental and subject to change in future releases.
 
 The ``__module__`` attribute now points to public modules
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py
@@ -36,5 +36,5 @@ def using_string_dtype() -> bool:
 
 
 def is_nan_na() -> bool:
-    _mode_options = _global_config["mode"]
-    return _mode_options["nan_is_na"]
+    _mode_options = _global_config["future"]
+    return not _mode_options["distinguish_nan_and_na"]
diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -2127,5 +2127,5 @@ def monkeysession():
 @pytest.fixture(params=[True, False])
 def using_nan_is_na(request):
     opt = request.param
-    with pd.option_context("mode.nan_is_na", opt):
+    with pd.option_context("future.distinguish_nan_and_na", not opt):
         yield opt
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
@@ -428,15 +428,6 @@ def is_terminal() -> bool:
         validator=is_one_of_factory([True, False, "warn"]),
     )
 
-    cf.register_option(
-        "nan_is_na",
-        os.environ.get("PANDAS_NAN_IS_NA", "1") == "1",
-        "Whether to treat NaN entries as interchangeable with pd.NA in "
-        "numpy-nullable and pyarrow float dtypes. See discussion in "
-        "https://github.com/pandas-dev/pandas/issues/32265",
-        validator=is_one_of_factory([True, False]),
-    )
-
 
 # user warnings
 chained_assignment = """
@@ -899,6 +890,18 @@ def register_converter_cb(key: str) -> None:
         validator=is_one_of_factory([True, False]),
     )
 
+    cf.register_option(
+        "distinguish_nan_and_na",
+        os.environ.get("PANDAS_FUTURE_DISTINGUISH_NAN_AND_NA", "0") == "1",
+        "Whether to treat NaN entries as distinct from pd.NA in "
+        "numpy-nullable and pyarrow float dtypes. By default treats both "
+        "interchangeable as missing values (NaN will be coerced to NA). "
+        "See discussion in "
+        "https://github.com/pandas-dev/pandas/issues/32265",
+        validator=is_one_of_factory([True, False]),
+    )
+
+
 # GH#59502
 cf.deprecate_option("future.no_silent_downcasting", Pandas4Warning)
 cf.deprecate_option(
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
@@ -997,7 +997,7 @@ def _read_ujson(self) -> DataFrame | Series:
         else:
             obj = self._get_object_parser(self.data)
         if self.dtype_backend is not lib.no_default:
-            with option_context("mode.nan_is_na", True):
+            with option_context("future.distinguish_nan_and_na", False):
                 return obj.convert_dtypes(
                     infer_objects=False, dtype_backend=self.dtype_backend
                 )
@@ -1075,7 +1075,7 @@ def __next__(self) -> DataFrame | Series:
             raise ex
 
         if self.dtype_backend is not lib.no_default:
-            with option_context("mode.nan_is_na", True):
+            with option_context("future.distinguish_nan_and_na", False):
                 return obj.convert_dtypes(
                     infer_objects=False, dtype_backend=self.dtype_backend
                 )
diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py
@@ -386,7 +386,7 @@ def parse_table_schema(json, precise_float: bool) -> DataFrame:
             'table="orient" can not yet read ISO-formatted Timedelta data'
         )
 
-    with option_context("mode.nan_is_na", True):
+    with option_context("future.distinguish_nan_and_na", False):
         df = df.astype(dtypes)
 
     if "primaryKey" in table["schema"]:

Original file line number	Diff line number	Diff line change
`@@ -386,7 +386,7 @@ def parse_table_schema(json, precise_float: bool) -> DataFrame:`
`386`	`386`	`'table="orient" can not yet read ISO-formatted Timedelta data'`
`387`	`387`	`)`
`388`	`388`
`389`		`- with option_context("mode.nan_is_na", True):`
	`389`	`+ with option_context("future.distinguish_nan_and_na", False):`
`390`	`390`	`df = df.astype(dtypes)`
`391`	`391`
`392`	`392`	`if "primaryKey" in table["schema"]:`