Skip to content

Commit 9b84372

Browse files
author
Justine Kosinski
committed
BUG: Preserve NaN in MultiIndex column headers during Excel export
GH#62340: Use original column values (with NaN) instead of NBSP-filled values when writing MultiIndex headers to Excel. - Modify _format_header_mi() to use columns.get_level_values() to get the original column values with NaN preserved - Add test to verify MultiIndex structure and data integrity are preserved during Excel round-trip - Note: read_excel() limitation means NaN in headers become empty cells in Excel and cannot be reconstructed on read, but data values are correctly preserved
1 parent 04cd920 commit 9b84372

File tree

2 files changed

+16
-2
lines changed

2 files changed

+16
-2
lines changed

pandas/io/formats/excel.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -650,7 +650,11 @@ def _format_header_mi(self) -> Iterable[ExcelCell]:
650650
for lnum, (spans, levels, level_codes) in enumerate(
651651
zip(level_lengths, fixed_columns.levels, fixed_columns.codes, strict=True)
652652
):
653-
values = levels.take(level_codes)
653+
# GH#62340: Use original column values (with NaN) instead of NBSP-filled ones
654+
# Get values from original columns (which have NaN), not fixed_columns
655+
orig_level_values = columns.get_level_values(lnum)
656+
# Extract the values according to the order in fixed_columns
657+
values = orig_level_values[:len(level_codes)]
654658
for i, span_val in spans.items():
655659
mergestart, mergeend = None, None
656660
if merge_columns and span_val > 1:

pandas/tests/io/excel/test_writers.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1509,6 +1509,9 @@ def test_to_excel_raising_warning_when_cell_character_exceed_limit(self):
15091509

15101510
def test_to_excel_multiindex_nan_in_columns(self, merge_cells, tmp_excel):
15111511
# GH 62340
1512+
# Test that MultiIndex column headers with NaN are written to Excel correctly
1513+
# Note: read_excel cannot reconstruct NaN from empty cells in headers,
1514+
# so we verify the data round-trips correctly instead
15121515
df = (
15131516
DataFrame({"a": list("ABBAAAB"), "b": [-1, 1, 1, -2, float("nan"), 3, -4]})
15141517
.assign(b_bin=lambda x: pd.cut(x.b, bins=[-float("inf"), 0, float("inf")]))
@@ -1522,7 +1525,14 @@ def test_to_excel_multiindex_nan_in_columns(self, merge_cells, tmp_excel):
15221525

15231526
with ExcelFile(tmp_excel) as reader:
15241527
result = pd.read_excel(reader, index_col=0, header=[0, 1])
1525-
1528+
1529+
# Test structure is preserved
1530+
assert result.shape == df.shape
1531+
assert list(result.index) == list(df.index)
1532+
assert isinstance(result.columns, MultiIndex)
1533+
assert result.columns.nlevels == df.columns.nlevels
1534+
1535+
# Test data values are preserved (most important part)
15261536
tm.assert_numpy_array_equal(result.to_numpy(), df.to_numpy())
15271537

15281538
@pytest.mark.parametrize("with_index", [True, False])

0 commit comments

Comments
 (0)