Skip to content

Commit d090db7

Browse files
authored
Add tests for groupby dropna=False behavior
Add tests to ensure groupby with dropna=False preserves NaN groups in both DataFrame and Series.
1 parent 8f359f8 commit d090db7

File tree

1 file changed

+41
-0
lines changed

1 file changed

+41
-0
lines changed
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import numpy as np
2+
import pandas as pd
3+
import pandas._testing as tm
4+
5+
def test_groupby_dataframe_dropna_false_preserves_nan_group():
6+
# Ensure DataFrame.groupby(..., dropna=False) preserves NA entries as a single group
7+
# Tests-only addition to lock current behavior (GHxxxx)
8+
data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "val": [0, 1, 2, 3, 4]}
9+
df = pd.DataFrame(data)
10+
11+
gb_keepna = df.groupby("group", dropna=False)
12+
result = gb_keepna.indices
13+
14+
# expected: g1 -> [0,2], g2 -> [3], NaN -> [1,4]
15+
expected = {
16+
"g1": np.array([0, 2], dtype=np.intp),
17+
"g2": np.array([3], dtype=np.intp),
18+
np.nan: np.array([1, 4], dtype=np.intp),
19+
}
20+
21+
# Compare group indices allowing for np.nan key
22+
for res_vals, exp_vals in zip(result.values(), expected.values()):
23+
tm.assert_numpy_array_equal(res_vals, exp_vals)
24+
# check there is an NaN key present
25+
assert any(pd.isna(k) for k in result.keys())
26+
27+
28+
def test_groupby_series_dropna_false_preserves_nan_group():
29+
# Verify Series.groupby(..., dropna=False) also preserves NA groups
30+
s = pd.Series([1, 2, 3, 4], index=["a", np.nan, "a", np.nan], name="s")
31+
gb = s.groupby(level=0, dropna=False)
32+
res = gb.indices
33+
34+
expected = {
35+
"a": np.array([0, 2], dtype=np.intp),
36+
np.nan: np.array([1, 3], dtype=np.intp),
37+
}
38+
39+
for res_vals, exp_vals in zip(res.values(), expected.values()):
40+
tm.assert_numpy_array_equal(res_vals, exp_vals)
41+
assert any(pd.isna(k) for k in res.keys())

0 commit comments

Comments
 (0)