From 90f75a54ade310f4e11065ffa441cf7d871b0c81 Mon Sep 17 00:00:00 2001 From: Jacek Arent Date: Wed, 5 Nov 2025 00:31:57 -0800 Subject: [PATCH 1/7] ENH: adding autofilter when writing to excel (pandas-dev#61194) --- doc/source/user_guide/io.rst | 1 + doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/core/generic.py | 2 ++ pandas/io/excel/_odswriter.py | 1 + pandas/io/excel/_openpyxl.py | 4 ++++ pandas/io/excel/_xlsxwriter.py | 4 ++++ pandas/io/formats/excel.py | 22 ++++++++++++++++++++++ pandas/tests/io/excel/test_style.py | 17 +++++++++++++++++ 8 files changed, 52 insertions(+), 1 deletion(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 7092a0f8fa8d8..1e3af64b73300 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -3745,6 +3745,7 @@ The look and feel of Excel worksheets created from pandas can be modified using * ``float_format`` : Format string for floating point numbers (default ``None``). * ``freeze_panes`` : A tuple of two integers representing the bottommost row and rightmost column to freeze. Each of these parameters is one-based, so (1, 1) will freeze the first row and first column (default ``None``). +* ``autofilter`` : A boolean indicating whether to add automatic filters to all columns (default ``False``). .. note:: diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index d11ab82294be1..62add9aceafaf 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -202,6 +202,7 @@ Other enhancements - :class:`Holiday` has gained the constructor argument and field ``exclude_dates`` to exclude specific datetimes from a custom holiday calendar (:issue:`54382`) - :class:`Rolling` and :class:`Expanding` now support ``nunique`` (:issue:`26958`) - :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`) +- :func:`DataFrame.to_excel` has a new ``autofilter`` parameter to add automatic filters to all columns (:issue:`61194`) - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`) - :func:`to_numeric` on big integers converts to ``object`` datatype with python integers when not coercing. (:issue:`51295`) - :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`) @@ -232,7 +233,6 @@ Other enhancements - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`) - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) - Switched wheel upload to **PyPI Trusted Publishing** (OIDC) for release-tag pushes in ``wheels.yml``. (:issue:`61718`) -- .. --------------------------------------------------------------------------- .. _whatsnew_300.notable_bug_fixes: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e2ba7c312a213..44d34c25cbc7d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2180,6 +2180,7 @@ def to_excel( freeze_panes: tuple[int, int] | None = None, storage_options: StorageOptions | None = None, engine_kwargs: dict[str, Any] | None = None, + autofilter: bool = False, ) -> None: """ Write {klass} to an Excel sheet. @@ -2312,6 +2313,7 @@ def to_excel( index_label=index_label, merge_cells=merge_cells, inf_rep=inf_rep, + autofilter=autofilter, ) formatter.write( excel_writer, diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index e9a06076f3aff..5b8edafb1fc41 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -99,6 +99,7 @@ def _write_cells( startrow: int = 0, startcol: int = 0, freeze_panes: tuple[int, int] | None = None, + autofilter_range: str | None = None, ) -> None: """ Write the frame cells using odf diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 867d11583dcc0..ea13ffa23cdfb 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -449,6 +449,7 @@ def _write_cells( startrow: int = 0, startcol: int = 0, freeze_panes: tuple[int, int] | None = None, + autofilter_range: str | None = None, ) -> None: # Write the frame cells using openpyxl. sheet_name = self._get_sheet_name(sheet_name) @@ -532,6 +533,9 @@ def _write_cells( for k, v in style_kwargs.items(): setattr(xcell, k, v) + if autofilter_range: + wks.auto_filter.ref = autofilter_range + class OpenpyxlReader(BaseExcelReader["Workbook"]): @doc(storage_options=_shared_docs["storage_options"]) diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py index 4a7b8eee2bfce..9ebec51067672 100644 --- a/pandas/io/excel/_xlsxwriter.py +++ b/pandas/io/excel/_xlsxwriter.py @@ -245,6 +245,7 @@ def _write_cells( startrow: int = 0, startcol: int = 0, freeze_panes: tuple[int, int] | None = None, + autofilter_range: str | None = None, ) -> None: # Write the frame cells using xlsxwriter. sheet_name = self._get_sheet_name(sheet_name) @@ -282,3 +283,6 @@ def _write_cells( ) else: wks.write(startrow + cell.row, startcol + cell.col, val, style) + + if autofilter_range: + wks.autofilter(autofilter_range) diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index d4d47253a5f82..e11a58adddcba 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -532,6 +532,8 @@ class ExcelFormatter: Defaults to ``CSSToExcelConverter()``. It should have signature css_declarations string -> excel style. This is only called for body cells. + autofilter : bool, default False + If True, add automatic filters to all columns """ max_rows = 2**20 @@ -549,6 +551,7 @@ def __init__( merge_cells: ExcelWriterMergeCells = False, inf_rep: str = "inf", style_converter: Callable | None = None, + autofilter: bool = False, ) -> None: self.rowcounter = 0 self.na_rep = na_rep @@ -584,6 +587,7 @@ def __init__( raise ValueError(f"Unexpected value for {merge_cells=}.") self.merge_cells = merge_cells self.inf_rep = inf_rep + self.autofilter = autofilter def _format_value(self, val): if is_scalar(val) and missing.isna(val): @@ -873,6 +877,16 @@ def get_formatted_cells(self) -> Iterable[ExcelCell]: cell.val = self._format_value(cell.val) yield cell + def _num2excel(self, index: int) -> str: + """ + Convert 0-based column index to Excel column name. + """ + column_name = "" + while index > 0 or not column_name: + index, remainder = divmod(index, 26) + column_name = chr(65 + remainder) + column_name + return column_name + @doc(storage_options=_shared_docs["storage_options"]) def write( self, @@ -916,6 +930,13 @@ def write( f"Max sheet size is: {self.max_rows}, {self.max_cols}" ) + if self.autofilter: + start = f"{self._num2excel(startcol)}{startrow + 1}" + end = f"{self._num2excel(startcol + num_cols)}{startrow + num_rows + 1}" + autofilter_range = f"{start}:{end}" + else: + autofilter_range = None + if engine_kwargs is None: engine_kwargs = {} @@ -938,6 +959,7 @@ def write( startrow=startrow, startcol=startcol, freeze_panes=freeze_panes, + autofilter_range=autofilter_range, ) finally: # make sure to close opened file handles diff --git a/pandas/tests/io/excel/test_style.py b/pandas/tests/io/excel/test_style.py index 12f14589365ff..08bd9e292e850 100644 --- a/pandas/tests/io/excel/test_style.py +++ b/pandas/tests/io/excel/test_style.py @@ -350,3 +350,20 @@ def test_format_hierarchical_rows_periodindex(merge_cells): assert isinstance(cell.val, Timestamp), ( "Period should be converted to Timestamp" ) + + +@pytest.mark.parametrize("engine", ["xlsxwriter", "openpyxl"]) +def test_autofilter(engine, tmp_excel): + # GH 61194 + df = DataFrame.from_dict([{"A": 1, "B": 2, "C": 3}, {"A": 4, "B": 5, "C": 6}]) + + with ExcelWriter(tmp_excel, engine=engine) as writer: + df.to_excel(writer, autofilter=True, index=False) + + openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl + with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb: + ws = wb.active + + assert ws.auto_filter.ref is not None + print(ws.auto_filter.ref) + assert ws.auto_filter.ref == "A1:D3" From 6bc6952feab7aa0f1c0a487cbbb40ab3760edd17 Mon Sep 17 00:00:00 2001 From: Jacek Arent Date: Wed, 5 Nov 2025 09:30:55 -0800 Subject: [PATCH 2/7] chore: Added missing documentation --- pandas/core/generic.py | 2 ++ pandas/io/excel/_base.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 44d34c25cbc7d..ad488214abc26 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2240,6 +2240,8 @@ def to_excel( {storage_options} .. versionadded:: {storage_options_versionadded} + autofilter : bool, default False + If True, add automatic filters to all columns {extra_parameters} See Also -------- diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index d1ae59e0e5866..350c851fafe3a 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1209,6 +1209,7 @@ def _write_cells( startrow: int = 0, startcol: int = 0, freeze_panes: tuple[int, int] | None = None, + autofilter_range: str | None = None, ) -> None: """ Write given formatted cells into Excel an excel sheet @@ -1223,6 +1224,8 @@ def _write_cells( startcol : upper left cell column to dump data frame freeze_panes: int tuple of length 2 contains the bottom-most row and right-most column to freeze + autofilter_range: str, default None + column ranges to add automatic filters to, for example "A1:D5" """ raise NotImplementedError From d1b05e85cb7f899eb534c36f9c25fecfda7114a7 Mon Sep 17 00:00:00 2001 From: Jacek Arent Date: Wed, 5 Nov 2025 10:22:03 -0800 Subject: [PATCH 3/7] chore: Fixed documentation --- pandas/core/generic.py | 5 +++-- pandas/io/formats/style.py | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ad488214abc26..a1df10d93cf05 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2240,9 +2240,10 @@ def to_excel( {storage_options} .. versionadded:: {storage_options_versionadded} - autofilter : bool, default False - If True, add automatic filters to all columns {extra_parameters} + autofilter : bool, default False + If True, add automatic filters to all columns. + See Also -------- to_csv : Write DataFrame to a comma-separated values (csv) file. diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 1132981915177..302e9f1779b9f 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -593,6 +593,7 @@ def to_excel( verbose: bool = True, freeze_panes: tuple[int, int] | None = None, storage_options: StorageOptions | None = None, + autofilter: bool = False, ) -> None: from pandas.io.formats.excel import ExcelFormatter @@ -606,6 +607,7 @@ def to_excel( index_label=index_label, merge_cells=merge_cells, inf_rep=inf_rep, + autofilter=autofilter, ) formatter.write( excel_writer, From 1365c21c9914c72dc4368ee4a2be3d0b4a64f5bd Mon Sep 17 00:00:00 2001 From: Jacek Arent Date: Wed, 5 Nov 2025 11:34:03 -0800 Subject: [PATCH 4/7] chore: Aded input validation to ensue that column index > 0 --- pandas/io/formats/excel.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index e11a58adddcba..2fa6b58bdbf17 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -880,8 +880,26 @@ def get_formatted_cells(self) -> Iterable[ExcelCell]: def _num2excel(self, index: int) -> str: """ Convert 0-based column index to Excel column name. + + Parameters + ---------- + index : int + The numeric column index to convert to a Excel column name. + + Returns + ------- + column_name : str + The column name corresponding to the index. + + Raises + ------ + ValueError + Index is negative """ + if index < 0: + raise ValueError(f"Index cannot be negative: {index}") column_name = "" + # while loop in case column name needs to be longer than 1 character while index > 0 or not column_name: index, remainder = divmod(index, 26) column_name = chr(65 + remainder) + column_name From f69c62f0bde9d8be7f06c07572616a619095a84e Mon Sep 17 00:00:00 2001 From: Jacek Arent Date: Thu, 6 Nov 2025 08:17:51 -0800 Subject: [PATCH 5/7] fix: Remove print statement --- pandas/tests/io/excel/test_style.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/io/excel/test_style.py b/pandas/tests/io/excel/test_style.py index 08bd9e292e850..183b7d58a78fb 100644 --- a/pandas/tests/io/excel/test_style.py +++ b/pandas/tests/io/excel/test_style.py @@ -365,5 +365,4 @@ def test_autofilter(engine, tmp_excel): ws = wb.active assert ws.auto_filter.ref is not None - print(ws.auto_filter.ref) assert ws.auto_filter.ref == "A1:D3" From ecfd50994a16f9690403d0829c071957f42dc6b4 Mon Sep 17 00:00:00 2001 From: Jacek Arent Date: Wed, 12 Nov 2025 00:26:07 -0800 Subject: [PATCH 6/7] fix: Ensure that ndex size is considered when setting autofilter range. feat: Throw an error when autofilter set for 'odf' engine. feat: Shows warning when autofilter is set together with merge_cells. chore: Added tests for index=True, startrow/startcol, multindex with merge_cells. --- pandas/io/excel/_odswriter.py | 4 ++ pandas/io/formats/excel.py | 20 +++++++- pandas/tests/io/excel/test_style.py | 74 +++++++++++++++++++++++++++-- 3 files changed, 94 insertions(+), 4 deletions(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 5b8edafb1fc41..f49b4422ce13b 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -104,6 +104,10 @@ def _write_cells( """ Write the frame cells using odf """ + + if autofilter_range: + raise ValueError("Autofilter is not supported with odf!") + from odf.table import ( Table, TableCell, diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 2fa6b58bdbf17..753a731bd7790 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -949,8 +949,26 @@ def write( ) if self.autofilter: + if num_cols == 0: + indexoffset = 0 + elif self.index: + if isinstance(self.df.index, MultiIndex): + indexoffset = self.df.index.nlevels - 1 + if self.merge_cells: + warnings.warn( + "Excel filters merged cells by showing only the first row." + "'autofiler' and 'merge_cells' should not " + "be used simultaneously.", + UserWarning, + stacklevel=find_stack_level(), + ) + else: + indexoffset = 0 + else: + indexoffset = -1 start = f"{self._num2excel(startcol)}{startrow + 1}" - end = f"{self._num2excel(startcol + num_cols)}{startrow + num_rows + 1}" + autofilter_end_column = self._num2excel(startcol + num_cols + indexoffset) + end = f"{autofilter_end_column}{startrow + num_rows + 1}" autofilter_range = f"{start}:{end}" else: autofilter_range = None diff --git a/pandas/tests/io/excel/test_style.py b/pandas/tests/io/excel/test_style.py index 183b7d58a78fb..f5b15373210ec 100644 --- a/pandas/tests/io/excel/test_style.py +++ b/pandas/tests/io/excel/test_style.py @@ -353,16 +353,84 @@ def test_format_hierarchical_rows_periodindex(merge_cells): @pytest.mark.parametrize("engine", ["xlsxwriter", "openpyxl"]) -def test_autofilter(engine, tmp_excel): +@pytest.mark.parametrize("with_index", [True, False]) +def test_autofilter(engine, with_index, tmp_excel): # GH 61194 df = DataFrame.from_dict([{"A": 1, "B": 2, "C": 3}, {"A": 4, "B": 5, "C": 6}]) with ExcelWriter(tmp_excel, engine=engine) as writer: - df.to_excel(writer, autofilter=True, index=False) + df.to_excel(writer, autofilter=True, index=with_index) openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb: ws = wb.active assert ws.auto_filter.ref is not None - assert ws.auto_filter.ref == "A1:D3" + assert ws.auto_filter.ref == "A1:D3" if with_index else "A1:C3" + + +@pytest.mark.parametrize("engine", ["xlsxwriter", "openpyxl"]) +def test_autofilter_with_startrow_startcol(engine, tmp_excel): + # GH 61194 + df = DataFrame.from_dict([{"A": 1, "B": 2, "C": 3}, {"A": 4, "B": 5, "C": 6}]) + with ExcelWriter(tmp_excel, engine=engine) as writer: + df.to_excel(writer, autofilter=True, startrow=10, startcol=10) + + openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl + with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb: + ws = wb.active + assert ws.auto_filter.ref is not None + # Autofiler range moved by 10x10 cells + assert ws.auto_filter.ref == "K11:N13" + + +def test_autofilter_not_supported_by_odf(tmp_path): + # GH 61194 + # odf needs 'ods' extension + tmp_excel_ods = tmp_path / f"{uuid.uuid4()}.ods" + tmp_excel_ods.touch() + + with pytest.raises(ValueError, match="Autofilter is not supported with odf!"): + with ExcelWriter(str(tmp_excel_ods), engine="odf") as writer: + DataFrame().to_excel(writer, autofilter=True, index=False) + + +@pytest.mark.parametrize("engine", ["xlsxwriter", "openpyxl"]) +def test_autofilter_with_multiindex(engine, tmp_excel): + # GH 61194 + df = DataFrame( + { + "animal": ("horse", "horse", "dog", "dog"), + "color of fur": ("black", "white", "grey", "black"), + "name": ("Blacky", "Wendy", "Rufus", "Catchy"), + } + ) + mi_df = df.set_index(["animal", "color of fur"]) + with ExcelWriter(tmp_excel, engine=engine) as writer: + mi_df.to_excel(writer, autofilter=True, index=True, merge_cells=False) + + openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl + with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb: + ws = wb.active + + assert ws.auto_filter.ref is not None + assert ws.auto_filter.ref == "A1:C5" + + +def test_autofilter_with_multiindex_and_merge_cells_shows_warning(tmp_excel): + # GH 61194 + df = DataFrame( + { + "animal": ("horse", "horse", "dog", "dog"), + "color of fur": ("black", "white", "grey", "black"), + "name": ("Blacky", "Wendy", "Rufus", "Catchy"), + } + ) + mi_df = df.set_index(["animal", "color of fur"]) + with ExcelWriter(tmp_excel, engine="openpyxl") as writer: + with tm.assert_produces_warning( + UserWarning, + match="Excel filters merged cells by showing only the first row." + "'autofiler' and 'merge_cells' should not be used simultaneously.", + ): + mi_df.to_excel(writer, autofilter=True, index=True, merge_cells=True) From 5160d243ab599025a6eb07a3a505df01ed99c29a Mon Sep 17 00:00:00 2001 From: Jacek Arent Date: Wed, 12 Nov 2025 08:56:00 -0800 Subject: [PATCH 7/7] chore: Check if numpy unit test failure is intermittent. --- pandas/tests/io/excel/test_style.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/io/excel/test_style.py b/pandas/tests/io/excel/test_style.py index f5b15373210ec..85c22e7a13ed2 100644 --- a/pandas/tests/io/excel/test_style.py +++ b/pandas/tests/io/excel/test_style.py @@ -405,6 +405,7 @@ def test_autofilter_with_multiindex(engine, tmp_excel): "name": ("Blacky", "Wendy", "Rufus", "Catchy"), } ) + # setup hierarchical index mi_df = df.set_index(["animal", "color of fur"]) with ExcelWriter(tmp_excel, engine=engine) as writer: mi_df.to_excel(writer, autofilter=True, index=True, merge_cells=False) @@ -426,6 +427,7 @@ def test_autofilter_with_multiindex_and_merge_cells_shows_warning(tmp_excel): "name": ("Blacky", "Wendy", "Rufus", "Catchy"), } ) + # setup hierarchical index mi_df = df.set_index(["animal", "color of fur"]) with ExcelWriter(tmp_excel, engine="openpyxl") as writer: with tm.assert_produces_warning(