|
| 1 | +import numpy as np |
| 2 | +import pandas as pd |
| 3 | +import pandas._testing as tm |
| 4 | + |
| 5 | +def test_groupby_dataframe_dropna_false_preserves_nan_group(): |
| 6 | + # Ensure DataFrame.groupby(..., dropna=False) preserves NA entries as a single group |
| 7 | + # Tests-only addition to lock current behavior (GHxxxx) |
| 8 | + data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "val": [0, 1, 2, 3, 4]} |
| 9 | + df = pd.DataFrame(data) |
| 10 | + |
| 11 | + gb_keepna = df.groupby("group", dropna=False) |
| 12 | + result = gb_keepna.indices |
| 13 | + |
| 14 | + # expected: g1 -> [0,2], g2 -> [3], NaN -> [1,4] |
| 15 | + expected = { |
| 16 | + "g1": np.array([0, 2], dtype=np.intp), |
| 17 | + "g2": np.array([3], dtype=np.intp), |
| 18 | + np.nan: np.array([1, 4], dtype=np.intp), |
| 19 | + } |
| 20 | + |
| 21 | + # Compare group indices allowing for np.nan key |
| 22 | + for res_vals, exp_vals in zip(result.values(), expected.values()): |
| 23 | + tm.assert_numpy_array_equal(res_vals, exp_vals) |
| 24 | + # check there is an NaN key present |
| 25 | + assert any(pd.isna(k) for k in result.keys()) |
| 26 | + |
| 27 | + |
| 28 | +def test_groupby_series_dropna_false_preserves_nan_group(): |
| 29 | + # Verify Series.groupby(..., dropna=False) also preserves NA groups |
| 30 | + s = pd.Series([1, 2, 3, 4], index=["a", np.nan, "a", np.nan], name="s") |
| 31 | + gb = s.groupby(level=0, dropna=False) |
| 32 | + res = gb.indices |
| 33 | + |
| 34 | + expected = { |
| 35 | + "a": np.array([0, 2], dtype=np.intp), |
| 36 | + np.nan: np.array([1, 3], dtype=np.intp), |
| 37 | + } |
| 38 | + |
| 39 | + for res_vals, exp_vals in zip(res.values(), expected.values()): |
| 40 | + tm.assert_numpy_array_equal(res_vals, exp_vals) |
| 41 | + assert any(pd.isna(k) for k in res.keys()) |
0 commit comments