From fb230feeb6acc8b5da972133750eeb5278d0f7c2 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Sun, 8 Mar 2026 20:00:45 +0530
Subject: [PATCH 01/32] feat(CategoricalImputer): add errors param to handle
 multimodal variables (#904)

---
 docs/whats_new/v_190.rst                      |  1 +
 feature_engine/imputation/categorical.py      | 54 +++++++++++--
 .../test_categorical_imputer.py               | 77 ++++++++++++++++++-
 3 files changed, 122 insertions(+), 10 deletions(-)

diff --git a/docs/whats_new/v_190.rst b/docs/whats_new/v_190.rst
index 3ee3222fb..f1b6e22da 100644
--- a/docs/whats_new/v_190.rst
+++ b/docs/whats_new/v_190.rst
@@ -53,6 +53,7 @@ New transformers
 Enhancements
 ~~~~~~~~~~~~
 
+- Added `errors` parameter to `CategoricalImputer` to handle categorical variables with multiple frequent categories instead of automatically raising a `ValueError`. (`DirekKakkar <https://github.com/DirekKakkar>`_)
 - Our variable handling functions now return empty lists when no variables of the desired type are found. (`Soledad Galli <https://github.com/solegalli>`_)
 
 BUG
diff --git a/feature_engine/imputation/categorical.py b/feature_engine/imputation/categorical.py
index 8c4000a0c..40c0a1276 100644
--- a/feature_engine/imputation/categorical.py
+++ b/feature_engine/imputation/categorical.py
@@ -2,6 +2,7 @@
 # License: BSD 3 clause
 
 from typing import List, Optional, Union
+import warnings
 
 import pandas as pd
 
@@ -88,6 +89,18 @@ class CategoricalImputer(BaseImputer):
         type object or categorical. If True, the imputer will select all variables or
         accept all variables entered by the user, including those cast as numeric.
 
+    errors : str, default='raise'
+        Indicates what to do when the selected imputation_method='frequent'
+        and a variable has more than 1 mode.
+
+        If 'raise', raises a ValueError and stops the fit.
+
+        If 'warn', raises a UserWarning and continues, imputing using the
+        first most frequent category found.
+
+        If 'ignore', continues without warnings, imputing using the first
+        most frequent category found.
+
     Attributes
     ----------
     {imputer_dict_}
@@ -135,6 +148,7 @@ def __init__(
         variables: Union[None, int, str, List[Union[str, int]]] = None,
         return_object: bool = False,
         ignore_format: bool = False,
+        errors: str = "raise",
     ) -> None:
         if imputation_method not in ["missing", "frequent"]:
             raise ValueError(
@@ -144,11 +158,18 @@ def __init__(
         if not isinstance(ignore_format, bool):
             raise ValueError("ignore_format takes only booleans True and False")
 
+        if errors not in ("raise", "warn", "ignore"):
+            raise ValueError(
+                "errors takes only values 'raise', 'warn', or 'ignore'. "
+                f"Got {errors} instead."
+            )
+
         self.imputation_method = imputation_method
         self.fill_value = fill_value
         self.variables = _check_variables_input_value(variables)
         self.return_object = return_object
         self.ignore_format = ignore_format
+        self.errors = errors
 
     def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
         """
@@ -189,9 +210,19 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
 
                 # Some variables may contain more than 1 mode:
                 if len(mode_vals) > 1:
-                    raise ValueError(
-                        f"The variable {var} contains multiple frequent categories."
-                    )
+                    if self.errors == "raise":
+                        raise ValueError(
+                            f"The variable {var} contains multiple frequent categories. "
+                            f"Set errors='warn' or errors='ignore' to allow imputation "
+                            f"using the first most frequent category found."
+                        )
+                    elif self.errors == "warn":
+                        warnings.warn(
+                            f"Variable {var} has multiple frequent categories. "
+                            f"The first category found, {mode_vals[0]}, will be used "
+                            f"for imputation.",
+                            UserWarning,
+                        )
 
                 self.imputer_dict_ = {var: mode_vals[0]}
 
@@ -208,10 +239,19 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
                         varnames_str = ", ".join(varnames)
                     else:
                         varnames_str = varnames[0]
-                    raise ValueError(
-                        f"The variable(s) {varnames_str} contain(s) multiple frequent "
-                        f"categories."
-                    )
+
+                    if self.errors == "raise":
+                        raise ValueError(
+                            f"The variable(s) {varnames_str} contain(s) multiple frequent "
+                            f"categories. Set errors='warn' or errors='ignore' to allow "
+                            f"imputation using the first most frequent category found."
+                        )
+                    elif self.errors == "warn":
+                        warnings.warn(
+                            f"Variable(s) {varnames_str} have multiple frequent categories. "
+                            f"The first category found will be used for imputation.",
+                            UserWarning,
+                        )
 
                 self.imputer_dict_ = mode_vals.iloc[0].to_dict()
 
diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index 182e8826b..1e55212d5 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -1,8 +1,19 @@
+import numpy as np
+import pandas as pd
 import pandas as pd
 import pytest
+import warnings
 
 from feature_engine.imputation import CategoricalImputer
 
+# --- Shared fixture: perfectly multimodal variable ---
+@pytest.fixture
+def multimodal_df():
+    return pd.DataFrame({
+        "city": ["London", "London", "Paris", "Paris", "Berlin", "Berlin"],
+        "country": ["UK", "UK", "FR", "FR", "DE", "DE"],
+    })
+
 
 def test_impute_with_string_missing_and_automatically_find_variables(df_na):
     # set up transformer
@@ -150,14 +161,22 @@ def test_error_when_imputation_method_not_frequent_or_missing():
 
 
 def test_error_when_variable_contains_multiple_modes(df_na):
-    msg = "The variable Name contains multiple frequent categories."
+    msg = (
+        "The variable Name contains multiple frequent categories. "
+        "Set errors='warn' or errors='ignore' to allow imputation "
+        "using the first most frequent category found."
+    )
     imputer = CategoricalImputer(imputation_method="frequent", variables="Name")
     with pytest.raises(ValueError) as record:
         imputer.fit(df_na)
     # check that error message matches
     assert str(record.value) == msg
 
-    msg = "The variable(s) Name contain(s) multiple frequent categories."
+    msg = (
+        "The variable(s) Name contain(s) multiple frequent categories. "
+        "Set errors='warn' or errors='ignore' to allow imputation "
+        "using the first most frequent category found."
+    )
     imputer = CategoricalImputer(imputation_method="frequent")
     with pytest.raises(ValueError) as record:
         imputer.fit(df_na)
@@ -166,7 +185,11 @@ def test_error_when_variable_contains_multiple_modes(df_na):
 
     df_ = df_na.copy()
     df_["Name_dup"] = df_["Name"]
-    msg = "The variable(s) Name, Name_dup contain(s) multiple frequent categories."
+    msg = (
+        "The variable(s) Name, Name_dup contain(s) multiple frequent categories. "
+        "Set errors='warn' or errors='ignore' to allow imputation "
+        "using the first most frequent category found."
+    )
     imputer = CategoricalImputer(imputation_method="frequent")
     with pytest.raises(ValueError) as record:
         imputer.fit(df_)
@@ -305,3 +328,51 @@ def test_error_when_ignore_format_is_not_boolean(ignore_format):
 
     # check that error message matches
     assert str(record.value) == msg
+
+
+def test_errors_raise_on_multimodal_is_default(multimodal_df):
+    """Default behaviour: raise ValueError on multimodal variable."""
+    imputer = CategoricalImputer(imputation_method="frequent")
+    with pytest.raises(ValueError, match="multiple frequent categories"):
+        imputer.fit(multimodal_df)
+
+
+def test_errors_warn_emits_userwarning(multimodal_df):
+    """errors='warn': UserWarning must be emitted."""
+    imputer = CategoricalImputer(imputation_method="frequent", errors="warn")
+    with pytest.warns(UserWarning, match="multiple frequent categories"):
+        imputer.fit(multimodal_df)
+
+
+def test_errors_warn_uses_first_mode(multimodal_df):
+    """errors='warn': imputer_dict_ should contain the first mode."""
+    imputer = CategoricalImputer(imputation_method="frequent", errors="warn")
+    with pytest.warns(UserWarning):
+        imputer.fit(multimodal_df)
+    expected = multimodal_df["city"].mode()[0]
+    assert imputer.imputer_dict_["city"] == expected
+
+
+def test_errors_ignore_no_warning_raised(multimodal_df):
+    """errors='ignore': no warnings should be emitted."""
+    imputer = CategoricalImputer(imputation_method="frequent", errors="ignore")
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")  # Promote all warnings to errors
+        imputer.fit(multimodal_df)  # Should NOT raise
+    assert imputer.imputer_dict_["city"] == multimodal_df["city"].mode()[0]
+
+
+def test_errors_invalid_value_raises():
+    """Passing an unsupported value for errors should raise ValueError at init."""
+    with pytest.raises(ValueError, match="errors takes only values"):
+        CategoricalImputer(imputation_method="frequent", errors="bad_value")
+
+
+def test_errors_param_ignored_when_imputation_method_is_missing():
+    """errors param has no effect for imputation_method='missing'."""
+    df = pd.DataFrame({"city": ["London", np.nan, "Paris"]})
+    imputer = CategoricalImputer(imputation_method="missing", errors="warn")
+    # Should fit without warnings since there's no mode computation
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
+        imputer.fit(df)

From 81be3489fb56fc80ab1f8906bc5d12111bb19858 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Sun, 8 Mar 2026 20:41:13 +0530
Subject: [PATCH 02/32] style: fix flake8 line length in CategoricalImputer

---
 feature_engine/imputation/categorical.py      | 28 +++++++++++--------
 .../test_categorical_imputer.py               | 11 +++++---
 2 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/feature_engine/imputation/categorical.py b/feature_engine/imputation/categorical.py
index 40c0a1276..cc1c2e2d2 100644
--- a/feature_engine/imputation/categorical.py
+++ b/feature_engine/imputation/categorical.py
@@ -212,15 +212,16 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
                 if len(mode_vals) > 1:
                     if self.errors == "raise":
                         raise ValueError(
-                            f"The variable {var} contains multiple frequent categories. "
-                            f"Set errors='warn' or errors='ignore' to allow imputation "
-                            f"using the first most frequent category found."
+                            f"The variable {var} contains multiple "
+                            f"frequent categories. Set errors='warn' or "
+                            f"errors='ignore' to allow imputation using "
+                            f"the first most frequent category found."
                         )
                     elif self.errors == "warn":
                         warnings.warn(
-                            f"Variable {var} has multiple frequent categories. "
-                            f"The first category found, {mode_vals[0]}, will be used "
-                            f"for imputation.",
+                            f"Variable {var} has multiple frequent "
+                            f"categories. The first category found, "
+                            f"{mode_vals[0]}, will be used for imputation.",
                             UserWarning,
                         )
 
@@ -242,14 +243,17 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
 
                     if self.errors == "raise":
                         raise ValueError(
-                            f"The variable(s) {varnames_str} contain(s) multiple frequent "
-                            f"categories. Set errors='warn' or errors='ignore' to allow "
-                            f"imputation using the first most frequent category found."
+                            f"The variable(s) {varnames_str} contain(s) "
+                            f"multiple frequent categories. Set "
+                            f"errors='warn' or errors='ignore' to allow "
+                            f"imputation using the first most frequent "
+                            f"category found."
                         )
                     elif self.errors == "warn":
                         warnings.warn(
-                            f"Variable(s) {varnames_str} have multiple frequent categories. "
-                            f"The first category found will be used for imputation.",
+                            f"Variable(s) {varnames_str} have multiple "
+                            f"frequent categories. The first category "
+                            f"found will be used for imputation.",
                             UserWarning,
                         )
 
@@ -301,4 +305,4 @@ def _more_tags(self):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.input_tags.allow_nan = True
-        return tags
+        return tags
\ No newline at end of file
diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index 1e55212d5..c6ea41d89 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -6,13 +6,16 @@
 
 from feature_engine.imputation import CategoricalImputer
 
+
 # --- Shared fixture: perfectly multimodal variable ---
 @pytest.fixture
 def multimodal_df():
-    return pd.DataFrame({
-        "city": ["London", "London", "Paris", "Paris", "Berlin", "Berlin"],
-        "country": ["UK", "UK", "FR", "FR", "DE", "DE"],
-    })
+    return pd.DataFrame(
+        {
+            "city": ["London", "London", "Paris", "Paris", "Berlin", "Berlin"],
+            "country": ["UK", "UK", "FR", "FR", "DE", "DE"],
+        }
+    )
 
 
 def test_impute_with_string_missing_and_automatically_find_variables(df_na):

From 4fb5b7aa6cd37077cd91a046df8bf921e02e52b6 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Sun, 8 Mar 2026 20:48:01 +0530
Subject: [PATCH 03/32] style: fix import order and duplicate pandas import

---
 feature_engine/imputation/categorical.py      | 32 +++++++------------
 .../test_categorical_imputer.py               |  1 -
 2 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/feature_engine/imputation/categorical.py b/feature_engine/imputation/categorical.py
index cc1c2e2d2..2d1f48e97 100644
--- a/feature_engine/imputation/categorical.py
+++ b/feature_engine/imputation/categorical.py
@@ -1,34 +1,26 @@
 # Authors: Soledad Galli <solegalli@protonmail.com>
 # License: BSD 3 clause
 
-from typing import List, Optional, Union
 import warnings
+from typing import List, Optional, Union
 
 import pandas as pd
 
-from feature_engine._check_init_parameters.check_variables import (
-    _check_variables_input_value,
-)
+from feature_engine._check_init_parameters.check_variables import \
+    _check_variables_input_value
 from feature_engine._docstrings.fit_attributes import (
-    _feature_names_in_docstring,
-    _imputer_dict_docstring,
-    _n_features_in_docstring,
-    _variables_attribute_docstring,
-)
-from feature_engine._docstrings.methods import (
-    _fit_transform_docstring,
-    _transform_imputers_docstring,
-)
+    _feature_names_in_docstring, _imputer_dict_docstring,
+    _n_features_in_docstring, _variables_attribute_docstring)
+from feature_engine._docstrings.methods import (_fit_transform_docstring,
+                                                _transform_imputers_docstring)
 from feature_engine._docstrings.substitute import Substitution
 from feature_engine.dataframe_checks import check_X
 from feature_engine.imputation.base_imputer import BaseImputer
 from feature_engine.tags import _return_tags
-from feature_engine.variable_handling import (
-    check_all_variables,
-    check_categorical_variables,
-    find_all_variables,
-    find_categorical_variables,
-)
+from feature_engine.variable_handling import (check_all_variables,
+                                              check_categorical_variables,
+                                              find_all_variables,
+                                              find_categorical_variables)
 
 
 @Substitution(
@@ -305,4 +297,4 @@ def _more_tags(self):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.input_tags.allow_nan = True
-        return tags
\ No newline at end of file
+        return tags
diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index c6ea41d89..788a7b924 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -1,6 +1,5 @@
 import numpy as np
 import pandas as pd
-import pandas as pd
 import pytest
 import warnings
 

From 835133f4c12b072f09310d6a17c4f81aaadbc11f Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Sun, 8 Mar 2026 22:49:48 +0530
Subject: [PATCH 04/32] test: add coverage for errors='ignore' branches

---
 .../test_categorical_imputer.py               | 27 ++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index 788a7b924..995db0c69 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -1,7 +1,8 @@
+import warnings
+
 import numpy as np
 import pandas as pd
 import pytest
-import warnings
 
 from feature_engine.imputation import CategoricalImputer
 
@@ -378,3 +379,27 @@ def test_errors_param_ignored_when_imputation_method_is_missing():
     with warnings.catch_warnings():
         warnings.simplefilter("error")
         imputer.fit(df)
+
+
+def test_errors_ignore_single_variable():
+    """errors='ignore' on single multimodal variable — silent, uses first mode."""
+    X = pd.DataFrame(
+        {"city": ["London", "London", "Paris", "Paris", "Berlin", "Berlin"]}
+    )
+    imputer = CategoricalImputer(imputation_method="frequent", errors="ignore")
+    imputer.fit(X)
+    assert imputer.imputer_dict_["city"] == X["city"].mode()[0]
+
+
+def test_errors_ignore_multiple_variables():
+    """errors='ignore' on multiple multimodal variables — silent, uses first mode."""
+    X = pd.DataFrame(
+        {
+            "city": ["London", "London", "Paris", "Paris", "Berlin", "Berlin"],
+            "country": ["UK", "UK", "FR", "FR", "DE", "DE"],
+        }
+    )
+    imputer = CategoricalImputer(imputation_method="frequent", errors="ignore")
+    imputer.fit(X)
+    assert imputer.imputer_dict_["city"] == X["city"].mode()[0]
+    assert imputer.imputer_dict_["country"] == X["country"].mode()[0]
\ No newline at end of file

From 81f31d8af4613b2fbfd2b7ebbdbc6f3fa087c4b7 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Sun, 8 Mar 2026 22:53:33 +0530
Subject: [PATCH 05/32] style: add missing newline at end of test file

---
 tests/test_imputation/test_categorical_imputer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index 995db0c69..de4ce0bc4 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -402,4 +402,4 @@ def test_errors_ignore_multiple_variables():
     imputer = CategoricalImputer(imputation_method="frequent", errors="ignore")
     imputer.fit(X)
     assert imputer.imputer_dict_["city"] == X["city"].mode()[0]
-    assert imputer.imputer_dict_["country"] == X["country"].mode()[0]
\ No newline at end of file
+    assert imputer.imputer_dict_["country"] == X["country"].mode()[0]

From 657de1f8468242f555b0a5fca602ad2e2374a8b6 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Mon, 9 Mar 2026 19:19:12 +0530
Subject: [PATCH 06/32] Changes for codedev tests

---
 .../test_categorical_imputer.py               | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index de4ce0bc4..bc9d69a04 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -403,3 +403,50 @@ def test_errors_ignore_multiple_variables():
     imputer.fit(X)
     assert imputer.imputer_dict_["city"] == X["city"].mode()[0]
     assert imputer.imputer_dict_["country"] == X["country"].mode()[0]
+
+
+# =============================================================================
+# NEW TESTS — added to fix codecov patch coverage (1 missing + 1 partial line)
+# =============================================================================
+
+def test_errors_warn_single_variable_emits_userwarning():
+    """
+    Covers the warnings.warn() inside the SINGLE-VARIABLE block of fit().
+
+    The existing test_errors_warn_emits_userwarning uses multimodal_df (2 columns),
+    which goes through the multi-variable code path. This test uses variables='city'
+    (a single variable) to hit the separate single-variable warn branch.
+    """
+    X = pd.DataFrame(
+        {"city": ["London", "London", "Paris", "Paris", "Berlin", "Berlin"]}
+    )
+    imputer = CategoricalImputer(
+        imputation_method="frequent", variables="city", errors="warn"
+    )
+    with pytest.warns(UserWarning, match="multiple frequent categories"):
+        imputer.fit(X)
+    # First mode is used
+    assert imputer.imputer_dict_["city"] == X["city"].mode()[0]
+
+
+def test_errors_raise_one_multimodal_among_multiple_variables():
+    """
+    Covers the `varnames_str = varnames[0]` else-branch in the MULTI-VARIABLE block.
+
+    This branch is reached when multiple variables are selected but only ONE of them
+    turns out to have multiple modes. The existing tests either raise on all-multimodal
+    datasets (len(varnames) > 1) or use errors='ignore'/'warn' (skipping the raise).
+    Here we select two variables where only 'city' is multimodal, triggering the
+    singular else-branch before the ValueError is raised.
+    """
+    X = pd.DataFrame(
+        {
+            # 'city': 3 equally frequent values → multimodal
+            "city": ["London", "London", "Paris", "Paris", "Berlin", "Berlin"],
+            # 'country': clear single mode (UK appears 3×, others once)
+            "country": ["UK", "UK", "UK", "FR", "DE", "SE"],
+        }
+    )
+    imputer = CategoricalImputer(imputation_method="frequent", errors="raise")
+    with pytest.raises(ValueError, match="city"):
+        imputer.fit(X)
\ No newline at end of file

From a0ea71dc5f06afa68659efaf762823f5a7cf15d9 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Mon, 16 Mar 2026 19:02:34 +0530
Subject: [PATCH 07/32] added space at last of test_categorical_imputer.py

---
 .gitignore                                        | 1 +
 tests/test_imputation/test_categorical_imputer.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 3ba72acd9..0096d1595 100644
--- a/.gitignore
+++ b/.gitignore
@@ -86,6 +86,7 @@ celerybeat-schedule
 # Environments
 .env
 .venv
+.venv_wsl
 env/
 venv/
 ENV/
diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index bc9d69a04..57fe62a3f 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -449,4 +449,4 @@ def test_errors_raise_one_multimodal_among_multiple_variables():
     )
     imputer = CategoricalImputer(imputation_method="frequent", errors="raise")
     with pytest.raises(ValueError, match="city"):
-        imputer.fit(X)
\ No newline at end of file
+        imputer.fit(X)

From 0cdcf03b018d2c8b181839922fb8298f213e7d13 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Thu, 26 Mar 2026 22:53:09 +0530
Subject: [PATCH 08/32] Revert docs/whats_new/v_190.rst to upstream version

---
 docs/whats_new/v_190.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/whats_new/v_190.rst b/docs/whats_new/v_190.rst
index f1b6e22da..3ee3222fb 100644
--- a/docs/whats_new/v_190.rst
+++ b/docs/whats_new/v_190.rst
@@ -53,7 +53,6 @@ New transformers
 Enhancements
 ~~~~~~~~~~~~
 
-- Added `errors` parameter to `CategoricalImputer` to handle categorical variables with multiple frequent categories instead of automatically raising a `ValueError`. (`DirekKakkar <https://github.com/DirekKakkar>`_)
 - Our variable handling functions now return empty lists when no variables of the desired type are found. (`Soledad Galli <https://github.com/solegalli>`_)
 
 BUG

From cf7670eb5431126c80e3febdeb103c0af4d08daa Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Thu, 26 Mar 2026 23:06:12 +0530
Subject: [PATCH 09/32] changes done to
 `feature_engine/imputation/categorical.py`

---
 feature_engine/imputation/categorical.py | 30 +++++++++++++++---------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/feature_engine/imputation/categorical.py b/feature_engine/imputation/categorical.py
index 2d1f48e97..6996e8bad 100644
--- a/feature_engine/imputation/categorical.py
+++ b/feature_engine/imputation/categorical.py
@@ -6,21 +6,29 @@
 
 import pandas as pd
 
-from feature_engine._check_init_parameters.check_variables import \
-    _check_variables_input_value
+from 
+feature_engine._check_init_parameters.check_variables
+import (
+    _check_variables_input_value,
+)
 from feature_engine._docstrings.fit_attributes import (
-    _feature_names_in_docstring, _imputer_dict_docstring,
-    _n_features_in_docstring, _variables_attribute_docstring)
+    _feature_names_in_docstring,
+    _imputer_dict_docstring,
+    _n_features_in_docstring,
+    _variables_attribute_docstring
+)
 from feature_engine._docstrings.methods import (_fit_transform_docstring,
                                                 _transform_imputers_docstring)
 from feature_engine._docstrings.substitute import Substitution
 from feature_engine.dataframe_checks import check_X
 from feature_engine.imputation.base_imputer import BaseImputer
 from feature_engine.tags import _return_tags
-from feature_engine.variable_handling import (check_all_variables,
-                                              check_categorical_variables,
-                                              find_all_variables,
-                                              find_categorical_variables)
+from feature_engine.variable_handling import (
+    check_all_variables,
+    check_categorical_variables,
+    find_all_variables,
+    find_categorical_variables
+)
 
 
 @Substitution(
@@ -81,8 +89,8 @@ class CategoricalImputer(BaseImputer):
         type object or categorical. If True, the imputer will select all variables or
         accept all variables entered by the user, including those cast as numeric.
 
-    errors : str, default='raise'
-        Indicates what to do when the selected imputation_method='frequent'
+    multimodal : str, default='raise'
+        Indicates what to do when imputation_method='frequent'
         and a variable has more than 1 mode.
 
         If 'raise', raises a ValueError and stops the fit.
@@ -150,7 +158,7 @@ def __init__(
         if not isinstance(ignore_format, bool):
             raise ValueError("ignore_format takes only booleans True and False")
 
-        if errors not in ("raise", "warn", "ignore"):
+        if not isinstance(errors, str):
             raise ValueError(
                 "errors takes only values 'raise', 'warn', or 'ignore'. "
                 f"Got {errors} instead."

From fb2f8db6191c659f392411f175f6633a5ef3634d Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Thu, 26 Mar 2026 23:07:48 +0530
Subject: [PATCH 10/32] changes made to
 `tests/test_imputation/test_categorical_imputer.py`

---
 tests/test_imputation/test_categorical_imputer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index 57fe62a3f..7874abd36 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -7,7 +7,6 @@
 from feature_engine.imputation import CategoricalImputer
 
 
-# --- Shared fixture: perfectly multimodal variable ---
 @pytest.fixture
 def multimodal_df():
     return pd.DataFrame(

From 97d6053b7eb1be7a16fe8a3be1693dc6c196a109 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Thu, 26 Mar 2026 23:30:08 +0530
Subject: [PATCH 11/32] resolved comment done on R15

---
 feature_engine/imputation/categorical.py      |  6 +--
 .../test_categorical_imputer.py               | 45 +++++++++++--------
 2 files changed, 29 insertions(+), 22 deletions(-)

diff --git a/feature_engine/imputation/categorical.py b/feature_engine/imputation/categorical.py
index 6996e8bad..f4a4770c6 100644
--- a/feature_engine/imputation/categorical.py
+++ b/feature_engine/imputation/categorical.py
@@ -6,9 +6,7 @@
 
 import pandas as pd
 
-from 
-feature_engine._check_init_parameters.check_variables
-import (
+from feature_engine._check_init_parameters.check_variables import (
     _check_variables_input_value,
 )
 from feature_engine._docstrings.fit_attributes import (
@@ -158,7 +156,7 @@ def __init__(
         if not isinstance(ignore_format, bool):
             raise ValueError("ignore_format takes only booleans True and False")
 
-        if not isinstance(errors, str):
+        if errors not in ["raise", "warn", "ignore"]:
             raise ValueError(
                 "errors takes only values 'raise', 'warn', or 'ignore'. "
                 f"Got {errors} instead."
diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index 7874abd36..1ea0661f0 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -11,8 +11,9 @@
 def multimodal_df():
     return pd.DataFrame(
         {
-            "city": ["London", "London", "Paris", "Paris", "Berlin", "Berlin"],
-            "country": ["UK", "UK", "FR", "FR", "DE", "DE"],
+            "city": ["London", "London", "Paris", "Paris", "Berlin", "Berlin", "Madrid"],
+            "country": ["UK", "UK", "FR", "FR", "DE", "DE", "ES"],
+            "one_mode": ["London", "London", "London", "Paris", "Paris", "Berlin", "Berlin"],
         }
     )
 
@@ -332,18 +333,31 @@ def test_error_when_ignore_format_is_not_boolean(ignore_format):
     assert str(record.value) == msg
 
 
-def test_errors_raise_on_multimodal_is_default(multimodal_df):
-    """Default behaviour: raise ValueError on multimodal variable."""
+def test_multimodal_raises_errors(multimodal_df):
     imputer = CategoricalImputer(imputation_method="frequent")
-    with pytest.raises(ValueError, match="multiple frequent categories"):
+    msg = (
+        "The variable(s) city, country contain(s) multiple frequent categories. "
+        "Set errors='warn' or errors='ignore' to allow imputation "
+        "using the first most frequent category found."
+    )
+    with pytest.raises(ValueError) as record:
         imputer.fit(multimodal_df)
+    assert str(record.value) == msg
 
 
-def test_errors_warn_emits_userwarning(multimodal_df):
-    """errors='warn': UserWarning must be emitted."""
+def test_multimodal_raises_warning(multimodal_df):
     imputer = CategoricalImputer(imputation_method="frequent", errors="warn")
-    with pytest.warns(UserWarning, match="multiple frequent categories"):
+    msg = (
+        "Variable(s) city, country have multiple frequent categories. "
+        "The first category found will be used for imputation."
+    )
+    with pytest.warns(UserWarning, match="multiple frequent categories") as record:
         imputer.fit(multimodal_df)
+    # Filter for the specific warning message in case others were raised
+    matching_warnings = [
+        w for w in record if "multiple frequent categories" in str(w.message)
+    ]
+    assert str(matching_warnings[0].message) == msg
 
 
 def test_errors_warn_uses_first_mode(multimodal_df):
@@ -351,17 +365,19 @@ def test_errors_warn_uses_first_mode(multimodal_df):
     imputer = CategoricalImputer(imputation_method="frequent", errors="warn")
     with pytest.warns(UserWarning):
         imputer.fit(multimodal_df)
-    expected = multimodal_df["city"].mode()[0]
-    assert imputer.imputer_dict_["city"] == expected
+    assert imputer.imputer_dict_["city"] == multimodal_df["city"].mode()[0]
+    assert imputer.imputer_dict_["country"] == multimodal_df["country"].mode()[0]
+    assert imputer.imputer_dict_["one_mode"] == "London"
 
 
 def test_errors_ignore_no_warning_raised(multimodal_df):
-    """errors='ignore': no warnings should be emitted."""
     imputer = CategoricalImputer(imputation_method="frequent", errors="ignore")
     with warnings.catch_warnings():
         warnings.simplefilter("error")  # Promote all warnings to errors
         imputer.fit(multimodal_df)  # Should NOT raise
     assert imputer.imputer_dict_["city"] == multimodal_df["city"].mode()[0]
+    assert imputer.imputer_dict_["country"] == multimodal_df["country"].mode()[0]
+    assert imputer.imputer_dict_["one_mode"] == "London"
 
 
 def test_errors_invalid_value_raises():
@@ -409,13 +425,6 @@ def test_errors_ignore_multiple_variables():
 # =============================================================================
 
 def test_errors_warn_single_variable_emits_userwarning():
-    """
-    Covers the warnings.warn() inside the SINGLE-VARIABLE block of fit().
-
-    The existing test_errors_warn_emits_userwarning uses multimodal_df (2 columns),
-    which goes through the multi-variable code path. This test uses variables='city'
-    (a single variable) to hit the separate single-variable warn branch.
-    """
     X = pd.DataFrame(
         {"city": ["London", "London", "Paris", "Paris", "Berlin", "Berlin"]}
     )

From c454edd5ee786b2dac970fb89e72b0c693be0248 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Thu, 26 Mar 2026 23:40:15 +0530
Subject: [PATCH 12/32] reformated the error tests to match the error from
 within pytest

---
 .../test_categorical_imputer.py               | 24 +++++++------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index 1ea0661f0..3cd3658ab 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -1,3 +1,4 @@
+import re
 import warnings
 
 import numpy as np
@@ -159,7 +160,8 @@ def test_imputation_of_numerical_vars_cast_as_object_and_returned_as_object(df_n
 
 
 def test_error_when_imputation_method_not_frequent_or_missing():
-    with pytest.raises(ValueError):
+    msg = "imputation_method takes only values 'missing' or 'frequent'"
+    with pytest.raises(ValueError, match=msg):
         CategoricalImputer(imputation_method="arbitrary")
 
 
@@ -170,10 +172,8 @@ def test_error_when_variable_contains_multiple_modes(df_na):
         "using the first most frequent category found."
     )
     imputer = CategoricalImputer(imputation_method="frequent", variables="Name")
-    with pytest.raises(ValueError) as record:
+    with pytest.raises(ValueError, match=re.escape(msg)):
         imputer.fit(df_na)
-    # check that error message matches
-    assert str(record.value) == msg
 
     msg = (
         "The variable(s) Name contain(s) multiple frequent categories. "
@@ -181,10 +181,8 @@ def test_error_when_variable_contains_multiple_modes(df_na):
         "using the first most frequent category found."
     )
     imputer = CategoricalImputer(imputation_method="frequent")
-    with pytest.raises(ValueError) as record:
+    with pytest.raises(ValueError, match=re.escape(msg)):
         imputer.fit(df_na)
-    # check that error message matches
-    assert str(record.value) == msg
 
     df_ = df_na.copy()
     df_["Name_dup"] = df_["Name"]
@@ -194,10 +192,8 @@ def test_error_when_variable_contains_multiple_modes(df_na):
         "using the first most frequent category found."
     )
     imputer = CategoricalImputer(imputation_method="frequent")
-    with pytest.raises(ValueError) as record:
+    with pytest.raises(ValueError, match=re.escape(msg)):
         imputer.fit(df_)
-    # check that error message matches
-    assert str(record.value) == msg
 
 
 def test_impute_numerical_variables(df_na):
@@ -326,12 +322,9 @@ def test_variables_cast_as_category_frequent(df_na):
 )
 def test_error_when_ignore_format_is_not_boolean(ignore_format):
     msg = "ignore_format takes only booleans True and False"
-    with pytest.raises(ValueError) as record:
+    with pytest.raises(ValueError, match=msg):
         CategoricalImputer(imputation_method="missing", ignore_format=ignore_format)
 
-    # check that error message matches
-    assert str(record.value) == msg
-
 
 def test_multimodal_raises_errors(multimodal_df):
     imputer = CategoricalImputer(imputation_method="frequent")
@@ -340,9 +333,8 @@ def test_multimodal_raises_errors(multimodal_df):
         "Set errors='warn' or errors='ignore' to allow imputation "
         "using the first most frequent category found."
     )
-    with pytest.raises(ValueError) as record:
+    with pytest.raises(ValueError, match=re.escape(msg)):
         imputer.fit(multimodal_df)
-    assert str(record.value) == msg
 
 
 def test_multimodal_raises_warning(multimodal_df):

From 5992d09aa27554044e01859b7e6bab998cd121c8 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Thu, 26 Mar 2026 23:47:28 +0530
Subject: [PATCH 13/32] made three tests in on test

---
 .../test_categorical_imputer.py               | 40 +++++--------------
 1 file changed, 11 insertions(+), 29 deletions(-)

diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index 3cd3658ab..fa918ed90 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -337,36 +337,18 @@ def test_multimodal_raises_errors(multimodal_df):
         imputer.fit(multimodal_df)
 
 
-def test_multimodal_raises_warning(multimodal_df):
-    imputer = CategoricalImputer(imputation_method="frequent", errors="warn")
-    msg = (
-        "Variable(s) city, country have multiple frequent categories. "
-        "The first category found will be used for imputation."
-    )
-    with pytest.warns(UserWarning, match="multiple frequent categories") as record:
-        imputer.fit(multimodal_df)
-    # Filter for the specific warning message in case others were raised
-    matching_warnings = [
-        w for w in record if "multiple frequent categories" in str(w.message)
-    ]
-    assert str(matching_warnings[0].message) == msg
-
-
-def test_errors_warn_uses_first_mode(multimodal_df):
-    """errors='warn': imputer_dict_ should contain the first mode."""
-    imputer = CategoricalImputer(imputation_method="frequent", errors="warn")
-    with pytest.warns(UserWarning):
-        imputer.fit(multimodal_df)
-    assert imputer.imputer_dict_["city"] == multimodal_df["city"].mode()[0]
-    assert imputer.imputer_dict_["country"] == multimodal_df["country"].mode()[0]
-    assert imputer.imputer_dict_["one_mode"] == "London"
+@pytest.mark.parametrize("errors", ["warn", "ignore"])
+def test_multimodal_imputation_result(multimodal_df, errors):
+    """Check that result is the same when errors='warn' or 'ignore'."""
+    imputer = CategoricalImputer(imputation_method="frequent", errors=errors)
+    if errors == "warn":
+        with pytest.warns(UserWarning, match="multiple frequent categories"):
+            imputer.fit(multimodal_df)
+    else:
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
+            imputer.fit(multimodal_df)
 
-
-def test_errors_ignore_no_warning_raised(multimodal_df):
-    imputer = CategoricalImputer(imputation_method="frequent", errors="ignore")
-    with warnings.catch_warnings():
-        warnings.simplefilter("error")  # Promote all warnings to errors
-        imputer.fit(multimodal_df)  # Should NOT raise
     assert imputer.imputer_dict_["city"] == multimodal_df["city"].mode()[0]
     assert imputer.imputer_dict_["country"] == multimodal_df["country"].mode()[0]
     assert imputer.imputer_dict_["one_mode"] == "London"

From 85b1974bb813d7dd7cd3d76c217a1583833446f5 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Thu, 26 Mar 2026 23:49:09 +0530
Subject: [PATCH 14/32] left change

---
 tests/test_imputation/test_categorical_imputer.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index fa918ed90..e86262554 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -345,9 +345,10 @@ def test_multimodal_imputation_result(multimodal_df, errors):
         with pytest.warns(UserWarning, match="multiple frequent categories"):
             imputer.fit(multimodal_df)
     else:
-        with warnings.catch_warnings():
-            warnings.simplefilter("error")
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
             imputer.fit(multimodal_df)
+            assert len(w) == 0
 
     assert imputer.imputer_dict_["city"] == multimodal_df["city"].mode()[0]
     assert imputer.imputer_dict_["country"] == multimodal_df["country"].mode()[0]

From 09429f3603962c2eefb8773232e32af685539dd3 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Thu, 26 Mar 2026 23:56:25 +0530
Subject: [PATCH 15/32] refaactored the multimodal tests

---
 tests/test_imputation/test_categorical_imputer.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index e86262554..37e4a0be5 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -348,11 +348,11 @@ def test_multimodal_imputation_result(multimodal_df, errors):
         with warnings.catch_warnings(record=True) as w:
             warnings.simplefilter("always")
             imputer.fit(multimodal_df)
-            assert len(w) == 0
-
-    assert imputer.imputer_dict_["city"] == multimodal_df["city"].mode()[0]
-    assert imputer.imputer_dict_["country"] == multimodal_df["country"].mode()[0]
-    assert imputer.imputer_dict_["one_mode"] == "London"
+            # Check that no warnings with the specific message were raised
+            matching_warnings = [
+                msg for msg in w if "multiple frequent categories" in str(msg.message)
+            ]
+            assert len(matching_warnings) == 0
 
 
 def test_errors_invalid_value_raises():

From 0b86cfa702fe803f41706026f7c81794322202ed Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Fri, 27 Mar 2026 00:01:00 +0530
Subject: [PATCH 16/32] refactored test_errors_invalid_value_raises

---
 tests/test_imputation/test_categorical_imputer.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index 37e4a0be5..7929854b4 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -355,10 +355,11 @@ def test_multimodal_imputation_result(multimodal_df, errors):
             assert len(matching_warnings) == 0
 
 
-def test_errors_invalid_value_raises():
+@pytest.mark.parametrize("errors", ["bad_value", 1, True])
+def test_errors_invalid_value_raises(errors):
     """Passing an unsupported value for errors should raise ValueError at init."""
     with pytest.raises(ValueError, match="errors takes only values"):
-        CategoricalImputer(imputation_method="frequent", errors="bad_value")
+        CategoricalImputer(imputation_method="frequent", errors=errors)
 
 
 def test_errors_param_ignored_when_imputation_method_is_missing():

From 45f4e2f9749e5c1b55a9464fb4e67d1f09b30c91 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Fri, 27 Mar 2026 00:06:48 +0530
Subject: [PATCH 17/32] changed the function
 `test_errors_param_ignored_when_imputation_method_is_missing`

---
 tests/test_imputation/test_categorical_imputer.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index 7929854b4..5d3ecb6ef 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -367,9 +367,13 @@ def test_errors_param_ignored_when_imputation_method_is_missing():
     df = pd.DataFrame({"city": ["London", np.nan, "Paris"]})
     imputer = CategoricalImputer(imputation_method="missing", errors="warn")
     # Should fit without warnings since there's no mode computation
-    with warnings.catch_warnings():
-        warnings.simplefilter("error")
+    with warnings.catch_warnings(record=True) as w:
+        warnings.simplefilter("always")
         imputer.fit(df)
+        matching_warnings = [
+            msg for msg in w if "multiple frequent categories" in str(msg.message)
+        ]
+        assert len(matching_warnings) == 0
 
 
 def test_errors_ignore_single_variable():

From cda93e70b49a57fc5f54f7463fda14cd3f92ba06 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Fri, 27 Mar 2026 00:10:51 +0530
Subject: [PATCH 18/32] removed `test_errors_ignore_single_variable`
 `test_errors_ignore_multiple_variables`

---
 .../test_categorical_imputer.py               | 24 -------------------
 1 file changed, 24 deletions(-)

diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index 5d3ecb6ef..b2e3f9726 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -376,30 +376,6 @@ def test_errors_param_ignored_when_imputation_method_is_missing():
         assert len(matching_warnings) == 0
 
 
-def test_errors_ignore_single_variable():
-    """errors='ignore' on single multimodal variable — silent, uses first mode."""
-    X = pd.DataFrame(
-        {"city": ["London", "London", "Paris", "Paris", "Berlin", "Berlin"]}
-    )
-    imputer = CategoricalImputer(imputation_method="frequent", errors="ignore")
-    imputer.fit(X)
-    assert imputer.imputer_dict_["city"] == X["city"].mode()[0]
-
-
-def test_errors_ignore_multiple_variables():
-    """errors='ignore' on multiple multimodal variables — silent, uses first mode."""
-    X = pd.DataFrame(
-        {
-            "city": ["London", "London", "Paris", "Paris", "Berlin", "Berlin"],
-            "country": ["UK", "UK", "FR", "FR", "DE", "DE"],
-        }
-    )
-    imputer = CategoricalImputer(imputation_method="frequent", errors="ignore")
-    imputer.fit(X)
-    assert imputer.imputer_dict_["city"] == X["city"].mode()[0]
-    assert imputer.imputer_dict_["country"] == X["country"].mode()[0]
-
-
 # =============================================================================
 # NEW TESTS — added to fix codecov patch coverage (1 missing + 1 partial line)
 # =============================================================================

From 04be1a0255416b25c1a608e1fd481c9c7c89c876 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Fri, 27 Mar 2026 00:11:41 +0530
Subject: [PATCH 19/32] emove the commented block

---
 tests/test_imputation/test_categorical_imputer.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index b2e3f9726..011a8b2f8 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -376,10 +376,6 @@ def test_errors_param_ignored_when_imputation_method_is_missing():
         assert len(matching_warnings) == 0
 
 
-# =============================================================================
-# NEW TESTS — added to fix codecov patch coverage (1 missing + 1 partial line)
-# =============================================================================
-
 def test_errors_warn_single_variable_emits_userwarning():
     X = pd.DataFrame(
         {"city": ["London", "London", "Paris", "Paris", "Berlin", "Berlin"]}

From 94643d8d7fc27c6acbabc922dacb68d5ed18be17 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Fri, 27 Mar 2026 00:15:30 +0530
Subject: [PATCH 20/32] last few changes made

---
 .../test_categorical_imputer.py               | 31 +++++--------------
 1 file changed, 8 insertions(+), 23 deletions(-)

diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index 011a8b2f8..c0819ea6b 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -376,37 +376,22 @@ def test_errors_param_ignored_when_imputation_method_is_missing():
         assert len(matching_warnings) == 0
 
 
-def test_errors_warn_single_variable_emits_userwarning():
-    X = pd.DataFrame(
-        {"city": ["London", "London", "Paris", "Paris", "Berlin", "Berlin"]}
-    )
+def test_warning_when_single_variable_is_multimodal(multimodal_df):
     imputer = CategoricalImputer(
         imputation_method="frequent", variables="city", errors="warn"
     )
     with pytest.warns(UserWarning, match="multiple frequent categories"):
-        imputer.fit(X)
-    # First mode is used
-    assert imputer.imputer_dict_["city"] == X["city"].mode()[0]
+        imputer.fit(multimodal_df)
+    assert imputer.imputer_dict_["city"] == multimodal_df["city"].mode()[0]
 
 
-def test_errors_raise_one_multimodal_among_multiple_variables():
+def test_errors_raise_when_only_one_variable_is_multimodal(multimodal_df):
     """
-    Covers the `varnames_str = varnames[0]` else-branch in the MULTI-VARIABLE block.
-
     This branch is reached when multiple variables are selected but only ONE of them
-    turns out to have multiple modes. The existing tests either raise on all-multimodal
-    datasets (len(varnames) > 1) or use errors='ignore'/'warn' (skipping the raise).
-    Here we select two variables where only 'city' is multimodal, triggering the
-    singular else-branch before the ValueError is raised.
+    turns out to have multiple modes.
     """
-    X = pd.DataFrame(
-        {
-            # 'city': 3 equally frequent values → multimodal
-            "city": ["London", "London", "Paris", "Paris", "Berlin", "Berlin"],
-            # 'country': clear single mode (UK appears 3×, others once)
-            "country": ["UK", "UK", "UK", "FR", "DE", "SE"],
-        }
+    imputer = CategoricalImputer(
+        imputation_method="frequent", variables=["city", "one_mode"], errors="raise"
     )
-    imputer = CategoricalImputer(imputation_method="frequent", errors="raise")
     with pytest.raises(ValueError, match="city"):
-        imputer.fit(X)
+        imputer.fit(multimodal_df)

From ab6ba66033d979a882d2e3838905129c4a0d46e1 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Fri, 27 Mar 2026 00:37:12 +0530
Subject: [PATCH 21/32] test case style updated

---
 tests/test_imputation/test_categorical_imputer.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index c0819ea6b..f5d0b8de0 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -12,9 +12,13 @@
 def multimodal_df():
     return pd.DataFrame(
         {
-            "city": ["London", "London", "Paris", "Paris", "Berlin", "Berlin", "Madrid"],
+            "city": [
+                "London", "London", "Paris", "Paris", "Berlin", "Berlin", "Madrid"
+            ],
             "country": ["UK", "UK", "FR", "FR", "DE", "DE", "ES"],
-            "one_mode": ["London", "London", "London", "Paris", "Paris", "Berlin", "Berlin"],
+            "one_mode": [
+                "London", "London", "London", "Paris", "Paris", "Berlin", "Berlin"
+            ],
         }
     )
 

From 6ba7fceb7eee54f36c7ca5db821ca4c709029231 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Fri, 27 Mar 2026 19:49:34 +0530
Subject: [PATCH 22/32] Renamed `errors` to `multimodal` in CategoricalImputer
 and add missing test

---
 feature_engine/imputation/categorical.py      | 24 ++++----
 .../test_categorical_imputer.py               | 56 ++++++++++++-------
 2 files changed, 48 insertions(+), 32 deletions(-)

diff --git a/feature_engine/imputation/categorical.py b/feature_engine/imputation/categorical.py
index f4a4770c6..42e5002c3 100644
--- a/feature_engine/imputation/categorical.py
+++ b/feature_engine/imputation/categorical.py
@@ -146,7 +146,7 @@ def __init__(
         variables: Union[None, int, str, List[Union[str, int]]] = None,
         return_object: bool = False,
         ignore_format: bool = False,
-        errors: str = "raise",
+        multimodal: str = "raise",
     ) -> None:
         if imputation_method not in ["missing", "frequent"]:
             raise ValueError(
@@ -156,10 +156,10 @@ def __init__(
         if not isinstance(ignore_format, bool):
             raise ValueError("ignore_format takes only booleans True and False")
 
-        if errors not in ["raise", "warn", "ignore"]:
+        if multimodal not in ["raise", "warn", "ignore"]:
             raise ValueError(
-                "errors takes only values 'raise', 'warn', or 'ignore'. "
-                f"Got {errors} instead."
+                "multimodal takes only values 'raise', 'warn', or 'ignore'. "
+                f"Got {multimodal} instead."
             )
 
         self.imputation_method = imputation_method
@@ -167,7 +167,7 @@ def __init__(
         self.variables = _check_variables_input_value(variables)
         self.return_object = return_object
         self.ignore_format = ignore_format
-        self.errors = errors
+        self.multimodal = multimodal
 
     def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
         """
@@ -208,14 +208,14 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
 
                 # Some variables may contain more than 1 mode:
                 if len(mode_vals) > 1:
-                    if self.errors == "raise":
+                    if self.multimodal == "raise":
                         raise ValueError(
                             f"The variable {var} contains multiple "
-                            f"frequent categories. Set errors='warn' or "
-                            f"errors='ignore' to allow imputation using "
+                            f"frequent categories. Set multimodal='warn' or "
+                            f"multimodal='ignore' to allow imputation using "
                             f"the first most frequent category found."
                         )
-                    elif self.errors == "warn":
+                    elif self.multimodal == "warn":
                         warnings.warn(
                             f"Variable {var} has multiple frequent "
                             f"categories. The first category found, "
@@ -239,15 +239,15 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
                     else:
                         varnames_str = varnames[0]
 
-                    if self.errors == "raise":
+                    if self.multimodal == "raise":
                         raise ValueError(
                             f"The variable(s) {varnames_str} contain(s) "
                             f"multiple frequent categories. Set "
-                            f"errors='warn' or errors='ignore' to allow "
+                            f"multimodal='warn' or multimodal='ignore' to allow "
                             f"imputation using the first most frequent "
                             f"category found."
                         )
-                    elif self.errors == "warn":
+                    elif self.multimodal == "warn":
                         warnings.warn(
                             f"Variable(s) {varnames_str} have multiple "
                             f"frequent categories. The first category "
diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index f5d0b8de0..066baa0f7 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -172,7 +172,7 @@ def test_error_when_imputation_method_not_frequent_or_missing():
 def test_error_when_variable_contains_multiple_modes(df_na):
     msg = (
         "The variable Name contains multiple frequent categories. "
-        "Set errors='warn' or errors='ignore' to allow imputation "
+        "Set multimodal='warn' or multimodal='ignore' to allow imputation "
         "using the first most frequent category found."
     )
     imputer = CategoricalImputer(imputation_method="frequent", variables="Name")
@@ -181,7 +181,7 @@ def test_error_when_variable_contains_multiple_modes(df_na):
 
     msg = (
         "The variable(s) Name contain(s) multiple frequent categories. "
-        "Set errors='warn' or errors='ignore' to allow imputation "
+        "Set multimodal='warn' or multimodal='ignore' to allow imputation "
         "using the first most frequent category found."
     )
     imputer = CategoricalImputer(imputation_method="frequent")
@@ -192,7 +192,7 @@ def test_error_when_variable_contains_multiple_modes(df_na):
     df_["Name_dup"] = df_["Name"]
     msg = (
         "The variable(s) Name, Name_dup contain(s) multiple frequent categories. "
-        "Set errors='warn' or errors='ignore' to allow imputation "
+        "Set multimodal='warn' or multimodal='ignore' to allow imputation "
         "using the first most frequent category found."
     )
     imputer = CategoricalImputer(imputation_method="frequent")
@@ -334,18 +334,18 @@ def test_multimodal_raises_errors(multimodal_df):
     imputer = CategoricalImputer(imputation_method="frequent")
     msg = (
         "The variable(s) city, country contain(s) multiple frequent categories. "
-        "Set errors='warn' or errors='ignore' to allow imputation "
+        "Set multimodal='warn' or multimodal='ignore' to allow imputation "
         "using the first most frequent category found."
     )
     with pytest.raises(ValueError, match=re.escape(msg)):
         imputer.fit(multimodal_df)
 
 
-@pytest.mark.parametrize("errors", ["warn", "ignore"])
-def test_multimodal_imputation_result(multimodal_df, errors):
-    """Check that result is the same when errors='warn' or 'ignore'."""
-    imputer = CategoricalImputer(imputation_method="frequent", errors=errors)
-    if errors == "warn":
+@pytest.mark.parametrize("multimodal", ["warn", "ignore"])
+def test_multimodal_imputation_result(multimodal_df, multimodal):
+    """Check that result is the same when multimodal='warn' or 'ignore'."""
+    imputer = CategoricalImputer(imputation_method="frequent", multimodal=multimodal)
+    if multimodal == "warn":
         with pytest.warns(UserWarning, match="multiple frequent categories"):
             imputer.fit(multimodal_df)
     else:
@@ -359,17 +359,17 @@ def test_multimodal_imputation_result(multimodal_df, errors):
             assert len(matching_warnings) == 0
 
 
-@pytest.mark.parametrize("errors", ["bad_value", 1, True])
-def test_errors_invalid_value_raises(errors):
-    """Passing an unsupported value for errors should raise ValueError at init."""
-    with pytest.raises(ValueError, match="errors takes only values"):
-        CategoricalImputer(imputation_method="frequent", errors=errors)
+@pytest.mark.parametrize("multimodal", ["bad_value", 1, True])
+def test_multimodal_invalid_value_raises(multimodal):
+    """Passing an unsupported value for multimodal should raise ValueError at init."""
+    with pytest.raises(ValueError, match="multimodal takes only values"):
+        CategoricalImputer(imputation_method="frequent", multimodal=multimodal)
 
 
-def test_errors_param_ignored_when_imputation_method_is_missing():
-    """errors param has no effect for imputation_method='missing'."""
+def test_multimodal_param_ignored_when_imputation_method_is_missing():
+    """multimodal param has no effect for imputation_method='missing'."""
     df = pd.DataFrame({"city": ["London", np.nan, "Paris"]})
-    imputer = CategoricalImputer(imputation_method="missing", errors="warn")
+    imputer = CategoricalImputer(imputation_method="missing", multimodal="warn")
     # Should fit without warnings since there's no mode computation
     with warnings.catch_warnings(record=True) as w:
         warnings.simplefilter("always")
@@ -382,20 +382,36 @@ def test_errors_param_ignored_when_imputation_method_is_missing():
 
 def test_warning_when_single_variable_is_multimodal(multimodal_df):
     imputer = CategoricalImputer(
-        imputation_method="frequent", variables="city", errors="warn"
+        imputation_method="frequent", variables="city", multimodal="warn"
     )
     with pytest.warns(UserWarning, match="multiple frequent categories"):
         imputer.fit(multimodal_df)
     assert imputer.imputer_dict_["city"] == multimodal_df["city"].mode()[0]
 
 
-def test_errors_raise_when_only_one_variable_is_multimodal(multimodal_df):
+def test_warning_when_single_variable_in_list_is_multimodal(multimodal_df):
+    # Test for multimodal='warn' when passing 1 variable in a list
+    # to the variables parameter.
+    imputer = CategoricalImputer(
+        imputation_method="frequent", variables=["city"], multimodal="warn"
+    )
+    with pytest.warns(UserWarning) as record:
+        imputer.fit(multimodal_df)
+
+    # check that warning was raised exactly once
+    assert len(record) == 1
+    # check that warning message is as expected
+    assert "Variable city has multiple frequent categories" in str(record[0].message)
+    assert imputer.imputer_dict_["city"] == multimodal_df["city"].mode()[0]
+
+
+def test_multimodal_raise_when_only_one_variable_is_multimodal(multimodal_df):
     """
     This branch is reached when multiple variables are selected but only ONE of them
     turns out to have multiple modes.
     """
     imputer = CategoricalImputer(
-        imputation_method="frequent", variables=["city", "one_mode"], errors="raise"
+        imputation_method="frequent", variables=["city", "one_mode"], multimodal="raise"
     )
     with pytest.raises(ValueError, match="city"):
         imputer.fit(multimodal_df)

From 1a3fde2b0913d4b9d2467bcb5438fbdfa848c64b Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Fri, 27 Mar 2026 19:56:36 +0530
Subject: [PATCH 23/32] Apply suggestion from @solegalli

Co-authored-by: Soledad Galli <solegalli@protonmail.com>
---
 feature_engine/imputation/categorical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/feature_engine/imputation/categorical.py b/feature_engine/imputation/categorical.py
index 42e5002c3..db5a7be04 100644
--- a/feature_engine/imputation/categorical.py
+++ b/feature_engine/imputation/categorical.py
@@ -93,7 +93,7 @@ class CategoricalImputer(BaseImputer):
 
         If 'raise', raises a ValueError and stops the fit.
 
-        If 'warn', raises a UserWarning and continues, imputing using the
+        If 'warn', raises a UserWarning and continues the imputation using the
         first most frequent category found.
 
         If 'ignore', continues without warnings, imputing using the first

From 36eb1dcafb5273fb7e1a180d69fe06e435abcdc2 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Fri, 27 Mar 2026 19:56:46 +0530
Subject: [PATCH 24/32] Apply suggestion from @solegalli

Co-authored-by: Soledad Galli <solegalli@protonmail.com>
---
 feature_engine/imputation/categorical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/feature_engine/imputation/categorical.py b/feature_engine/imputation/categorical.py
index db5a7be04..4c15e8573 100644
--- a/feature_engine/imputation/categorical.py
+++ b/feature_engine/imputation/categorical.py
@@ -25,7 +25,7 @@
     check_all_variables,
     check_categorical_variables,
     find_all_variables,
-    find_categorical_variables
+    find_categorical_variables,
 )
 
 

From aa37d1915bbf4a383b9870b8cedacd1370a4c76f Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Fri, 27 Mar 2026 19:58:03 +0530
Subject: [PATCH 25/32] Update categorical.py

---
 feature_engine/imputation/categorical.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/feature_engine/imputation/categorical.py b/feature_engine/imputation/categorical.py
index 4c15e8573..baaff30ea 100644
--- a/feature_engine/imputation/categorical.py
+++ b/feature_engine/imputation/categorical.py
@@ -15,8 +15,10 @@
     _n_features_in_docstring,
     _variables_attribute_docstring
 )
-from feature_engine._docstrings.methods import (_fit_transform_docstring,
-                                                _transform_imputers_docstring)
+from feature_engine._docstrings.methods import (
+    _fit_transform_docstring,
+    _transform_imputers_docstring,
+)
 from feature_engine._docstrings.substitute import Substitution
 from feature_engine.dataframe_checks import check_X
 from feature_engine.imputation.base_imputer import BaseImputer

From 3e58d8bc22f14322c306c28ddaac58451829e7e2 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Fri, 27 Mar 2026 21:08:33 +0530
Subject: [PATCH 26/32] removed comments and added tests

---
 feature_engine/imputation/categorical.py      | 17 +----
 .../test_categorical_imputer.py               | 63 +++++--------------
 2 files changed, 17 insertions(+), 63 deletions(-)

diff --git a/feature_engine/imputation/categorical.py b/feature_engine/imputation/categorical.py
index 42e5002c3..60e0ff75c 100644
--- a/feature_engine/imputation/categorical.py
+++ b/feature_engine/imputation/categorical.py
@@ -182,10 +182,8 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
             y is not needed in this imputation. You can pass None or y.
         """
 
-        # check input dataframe
         X = check_X(X)
 
-        # select variables to encode
         if self.ignore_format is True:
             if self.variables is None:
                 self.variables_ = find_all_variables(X)
@@ -201,12 +199,10 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
             self.imputer_dict_ = {var: self.fill_value for var in self.variables_}
 
         elif self.imputation_method == "frequent":
-            # if imputing only 1 variable:
             if len(self.variables_) == 1:
                 var = self.variables_[0]
                 mode_vals = X[var].mode()
 
-                # Some variables may contain more than 1 mode:
                 if len(mode_vals) > 1:
                     if self.multimodal == "raise":
                         raise ValueError(
@@ -225,13 +221,9 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
 
                 self.imputer_dict_ = {var: mode_vals[0]}
 
-            # imputing multiple variables:
             else:
-                # Returns a dataframe with 1 row if there is one mode per
-                # variable, or more rows if there are more modes:
                 mode_vals = X[self.variables_].mode()
 
-                # Careful: some variables contain multiple modes
                 if len(mode_vals) > 1:
                     varnames = mode_vals.dropna(axis=1).columns.to_list()
                     if len(varnames) > 1:
@@ -262,16 +254,14 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
         return self
 
     def transform(self, X: pd.DataFrame) -> pd.DataFrame:
-        # Frequent category imputation
         if self.imputation_method == "frequent":
             X = super().transform(X)
 
-        # Imputation with string
+
         else:
             X = self._transform(X)
 
-            # if variable is of type category, we need to add the new
-            # category, before filling in the nan
+
             add_cats = {}
             for variable in self.variables_:
                 if X[variable].dtype.name == "category":
@@ -285,13 +275,12 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
 
             X = X.assign(**add_cats).fillna(self.imputer_dict_)
 
-        # add additional step to return variables cast as object
+
         if self.return_object:
             X[self.variables_] = X[self.variables_].astype("O")
 
         return X
 
-    # Get docstring from BaseClass
     transform.__doc__ = BaseImputer.transform.__doc__
 
     def _more_tags(self):
diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index 066baa0f7..bf216b4d0 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -56,24 +56,20 @@ def test_impute_with_string_missing_and_automatically_find_variables(df_na):
 
 
 def test_user_defined_string_and_automatically_find_variables(df_na):
-    # set up imputer
     imputer = CategoricalImputer(
         imputation_method="missing", fill_value="Unknown", variables=None
     )
     X_transformed = imputer.fit_transform(df_na)
 
-    # set up expected output
     X_reference = df_na.copy()
     X_reference["Name"] = X_reference["Name"].fillna("Unknown")
     X_reference["City"] = X_reference["City"].fillna("Unknown")
     X_reference["Studies"] = X_reference["Studies"].fillna("Unknown")
 
-    # test init params
     assert imputer.imputation_method == "missing"
     assert imputer.fill_value == "Unknown"
     assert imputer.variables is None
 
-    # tes fit attributes
     assert imputer.variables_ == ["Name", "City", "Studies"]
     assert imputer.n_features_in_ == 6
     assert imputer.imputer_dict_ == {
@@ -82,22 +78,18 @@ def test_user_defined_string_and_automatically_find_variables(df_na):
         "Studies": "Unknown",
     }
 
-    # test transform output:
     assert X_transformed[["Name", "City", "Studies"]].isnull().sum().sum() == 0
     assert X_transformed[["Age", "Marks"]].isnull().sum().sum() > 0
     pd.testing.assert_frame_equal(X_transformed, X_reference)
 
 
 def test_mode_imputation_and_single_variable(df_na):
-    # set up imputer
     imputer = CategoricalImputer(imputation_method="frequent", variables="City")
     X_transformed = imputer.fit_transform(df_na)
 
-    # set up expected result
     X_reference = df_na.copy()
     X_reference["City"] = X_reference["City"].fillna("London")
 
-    # test init, fit and transform params, attr and output
     assert imputer.imputation_method == "frequent"
     assert imputer.variables == "City"
     assert imputer.variables_ == ["City"]
@@ -109,24 +101,20 @@ def test_mode_imputation_and_single_variable(df_na):
 
 
 def test_mode_imputation_with_multiple_variables(df_na):
-    # set up imputer
     imputer = CategoricalImputer(
         imputation_method="frequent", variables=["Studies", "City"]
     )
     X_transformed = imputer.fit_transform(df_na)
 
-    # set up expected output
     X_reference = df_na.copy()
     X_reference["City"] = X_reference["City"].fillna("London")
     X_reference["Studies"] = X_reference["Studies"].fillna("Bachelor")
 
-    # test fit attr and transform output
     assert imputer.imputer_dict_ == {"Studies": "Bachelor", "City": "London"}
     pd.testing.assert_frame_equal(X_transformed, X_reference)
 
 
 def test_imputation_of_numerical_vars_cast_as_object_and_returned_as_numerical(df_na):
-    # test case: imputing of numerical variables cast as object + return numeric
     df_na = df_na.copy()
     df_na["Marks"] = df_na["Marks"].astype("O")
     imputer = CategoricalImputer(
@@ -150,8 +138,6 @@ def test_imputation_of_numerical_vars_cast_as_object_and_returned_as_numerical(d
 
 
 def test_imputation_of_numerical_vars_cast_as_object_and_returned_as_object(df_na):
-    # test case 6: imputing of numerical variables cast as object + return as object
-    # after imputation
     df_na = df_na.copy()
     df_na["Marks"] = df_na["Marks"].astype("O")
     imputer = CategoricalImputer(
@@ -201,7 +187,6 @@ def test_error_when_variable_contains_multiple_modes(df_na):
 
 
 def test_impute_numerical_variables(df_na):
-    # set up transformer
     imputer = CategoricalImputer(
         imputation_method="missing",
         fill_value=0,
@@ -210,24 +195,19 @@ def test_impute_numerical_variables(df_na):
     )
     X_transformed = imputer.fit_transform(df_na)
 
-    # set up expected output
     X_reference = df_na.copy()
     X_reference = X_reference.fillna(0)
 
-    # test init params
     assert imputer.imputation_method == "missing"
     assert imputer.variables == ["Name", "City", "Studies", "Age", "Marks"]
 
-    # test fit attributes
     assert imputer.variables_ == ["Name", "City", "Studies", "Age", "Marks"]
     assert imputer.n_features_in_ == 6
 
-    # test transform params
     pd.testing.assert_frame_equal(X_transformed, X_reference)
 
 
 def test_impute_numerical_variables_with_mode(df_na):
-    # set up transformer
     imputer = CategoricalImputer(
         imputation_method="frequent",
         variables=["City", "Studies", "Marks"],
@@ -235,16 +215,13 @@ def test_impute_numerical_variables_with_mode(df_na):
     )
     X_transformed = imputer.fit_transform(df_na)
 
-    # set up expected output
     X_reference = df_na.copy()
     X_reference["City"] = X_reference["City"].fillna("London")
     X_reference["Studies"] = X_reference["Studies"].fillna("Bachelor")
     X_reference["Marks"] = X_reference["Marks"].fillna(0.8)
 
-    # test init params
     assert imputer.variables == ["City", "Studies", "Marks"]
 
-    # test fit attributes
     assert imputer.variables_ == ["City", "Studies", "Marks"]
     assert imputer.n_features_in_ == 6
     assert imputer.imputer_dict_ == {
@@ -253,7 +230,6 @@ def test_impute_numerical_variables_with_mode(df_na):
         "Marks": 0.8,
     }
 
-    # test transform output
     pd.testing.assert_frame_equal(X_transformed, X_reference)
 
 
@@ -265,7 +241,6 @@ def test_variables_cast_as_category_missing(df_na):
     imputer = CategoricalImputer(imputation_method="missing", variables=None)
     X_transformed = imputer.fit_transform(df_na)
 
-    # set up expected output
     X_reference = df_na.copy()
     X_reference["Name"] = X_reference["Name"].fillna("Missing")
     X_reference["Studies"] = X_reference["Studies"].fillna("Missing")
@@ -274,7 +249,6 @@ def test_variables_cast_as_category_missing(df_na):
         X_reference["City"].cat.add_categories("Missing").fillna("Missing")
     )
 
-    # test fit attributes
     assert imputer.variables_ == ["Name", "City", "Studies"]
     assert imputer.imputer_dict_ == {
         "Name": "Missing",
@@ -282,9 +256,6 @@ def test_variables_cast_as_category_missing(df_na):
         "Studies": "Missing",
     }
 
-    # test transform output
-    # selected columns should have no NA
-    # non selected columns should still have NA
     assert X_transformed[["Name", "City", "Studies"]].isnull().sum().sum() == 0
     assert X_transformed[["Age", "Marks"]].isnull().sum().sum() > 0
     pd.testing.assert_frame_equal(X_transformed, X_reference)
@@ -294,27 +265,21 @@ def test_variables_cast_as_category_frequent(df_na):
     df_na = df_na.copy()
     df_na["City"] = df_na["City"].astype("category")
 
-    # this variable does not have a mode, so drop
     df_na.drop(labels=["Name"], axis=1, inplace=True)
 
     imputer = CategoricalImputer(imputation_method="frequent", variables=None)
     X_transformed = imputer.fit_transform(df_na)
 
-    # set up expected output
     X_reference = df_na.copy()
     X_reference["Studies"] = X_reference["Studies"].fillna("Bachelor")
     X_reference["City"] = X_reference["City"].fillna("London")
 
-    # test fit attributes
     assert imputer.variables_ == ["City", "Studies"]
     assert imputer.imputer_dict_ == {
         "City": "London",
         "Studies": "Bachelor",
     }
 
-    # test transform output
-    # selected columns should have no NA
-    # non selected columns should still have NA
     assert X_transformed[["City", "Studies"]].isnull().sum().sum() == 0
     assert X_transformed[["Age", "Marks"]].isnull().sum().sum() > 0
     pd.testing.assert_frame_equal(X_transformed, X_reference)
@@ -334,8 +299,6 @@ def test_multimodal_raises_errors(multimodal_df):
     imputer = CategoricalImputer(imputation_method="frequent")
     msg = (
         "The variable(s) city, country contain(s) multiple frequent categories. "
-        "Set multimodal='warn' or multimodal='ignore' to allow imputation "
-        "using the first most frequent category found."
     )
     with pytest.raises(ValueError, match=re.escape(msg)):
         imputer.fit(multimodal_df)
@@ -343,7 +306,6 @@ def test_multimodal_raises_errors(multimodal_df):
 
 @pytest.mark.parametrize("multimodal", ["warn", "ignore"])
 def test_multimodal_imputation_result(multimodal_df, multimodal):
-    """Check that result is the same when multimodal='warn' or 'ignore'."""
     imputer = CategoricalImputer(imputation_method="frequent", multimodal=multimodal)
     if multimodal == "warn":
         with pytest.warns(UserWarning, match="multiple frequent categories"):
@@ -352,7 +314,6 @@ def test_multimodal_imputation_result(multimodal_df, multimodal):
         with warnings.catch_warnings(record=True) as w:
             warnings.simplefilter("always")
             imputer.fit(multimodal_df)
-            # Check that no warnings with the specific message were raised
             matching_warnings = [
                 msg for msg in w if "multiple frequent categories" in str(msg.message)
             ]
@@ -361,13 +322,11 @@ def test_multimodal_imputation_result(multimodal_df, multimodal):
 
 @pytest.mark.parametrize("multimodal", ["bad_value", 1, True])
 def test_multimodal_invalid_value_raises(multimodal):
-    """Passing an unsupported value for multimodal should raise ValueError at init."""
     with pytest.raises(ValueError, match="multimodal takes only values"):
         CategoricalImputer(imputation_method="frequent", multimodal=multimodal)
 
 
 def test_multimodal_param_ignored_when_imputation_method_is_missing():
-    """multimodal param has no effect for imputation_method='missing'."""
     df = pd.DataFrame({"city": ["London", np.nan, "Paris"]})
     imputer = CategoricalImputer(imputation_method="missing", multimodal="warn")
     # Should fit without warnings since there's no mode computation
@@ -390,26 +349,32 @@ def test_warning_when_single_variable_is_multimodal(multimodal_df):
 
 
 def test_warning_when_single_variable_in_list_is_multimodal(multimodal_df):
-    # Test for multimodal='warn' when passing 1 variable in a list
-    # to the variables parameter.
     imputer = CategoricalImputer(
         imputation_method="frequent", variables=["city"], multimodal="warn"
     )
     with pytest.warns(UserWarning) as record:
         imputer.fit(multimodal_df)
 
-    # check that warning was raised exactly once
     assert len(record) == 1
-    # check that warning message is as expected
     assert "Variable city has multiple frequent categories" in str(record[0].message)
     assert imputer.imputer_dict_["city"] == multimodal_df["city"].mode()[0]
 
 
+def test_ignore_when_single_variable_is_multimodal(multimodal_df):
+    imputer = CategoricalImputer(
+        imputation_method="frequent", variables="city", multimodal="ignore"
+    )
+    with warnings.catch_warnings(record=True) as w:
+        warnings.simplefilter("always")
+        imputer.fit(multimodal_df)
+        matching_warnings = [
+            msg for msg in w if "multiple frequent categories" in str(msg.message)
+        ]
+        assert len(matching_warnings) == 0
+    assert imputer.imputer_dict_["city"] == multimodal_df["city"].mode()[0]
+
+
 def test_multimodal_raise_when_only_one_variable_is_multimodal(multimodal_df):
-    """
-    This branch is reached when multiple variables are selected but only ONE of them
-    turns out to have multiple modes.
-    """
     imputer = CategoricalImputer(
         imputation_method="frequent", variables=["city", "one_mode"], multimodal="raise"
     )

From c77e8f178b49258e62825fcf8fb9edabf98ec011 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Fri, 27 Mar 2026 21:10:33 +0530
Subject: [PATCH 27/32] Update .gitignore

---
 .gitignore | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 0096d1595..399a7473b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -86,7 +86,6 @@ celerybeat-schedule
 # Environments
 .env
 .venv
-.venv_wsl
 env/
 venv/
 ENV/
@@ -112,4 +111,4 @@ venv.bak/
 *.csv
 *.DS_Store
 *.db
-*.pptx
\ No newline at end of file
+*.pptx

From a22f586ab82f15c26162663a4ea5f950a8bfc889 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Fri, 27 Mar 2026 21:14:40 +0530
Subject: [PATCH 28/32] removed the spaces

---
 feature_engine/imputation/categorical.py          | 6 ------
 tests/test_imputation/test_categorical_imputer.py | 7 -------
 2 files changed, 13 deletions(-)

diff --git a/feature_engine/imputation/categorical.py b/feature_engine/imputation/categorical.py
index 6c927f6ce..a64b3a49b 100644
--- a/feature_engine/imputation/categorical.py
+++ b/feature_engine/imputation/categorical.py
@@ -258,12 +258,8 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
     def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         if self.imputation_method == "frequent":
             X = super().transform(X)
-
-
         else:
             X = self._transform(X)
-
-
             add_cats = {}
             for variable in self.variables_:
                 if X[variable].dtype.name == "category":
@@ -277,10 +273,8 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
 
             X = X.assign(**add_cats).fillna(self.imputer_dict_)
 
-
         if self.return_object:
             X[self.variables_] = X[self.variables_].astype("O")
-
         return X
 
     transform.__doc__ = BaseImputer.transform.__doc__
diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index bf216b4d0..a179d0e80 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -24,21 +24,17 @@ def multimodal_df():
 
 
 def test_impute_with_string_missing_and_automatically_find_variables(df_na):
-    # set up transformer
     imputer = CategoricalImputer(imputation_method="missing", variables=None)
     X_transformed = imputer.fit_transform(df_na)
 
-    # set up expected output
     X_reference = df_na.copy()
     X_reference["Name"] = X_reference["Name"].fillna("Missing")
     X_reference["City"] = X_reference["City"].fillna("Missing")
     X_reference["Studies"] = X_reference["Studies"].fillna("Missing")
 
-    # test init params
     assert imputer.imputation_method == "missing"
     assert imputer.variables is None
 
-    # test fit attributes
     assert imputer.variables_ == ["Name", "City", "Studies"]
     assert imputer.n_features_in_ == 6
     assert imputer.imputer_dict_ == {
@@ -47,9 +43,6 @@ def test_impute_with_string_missing_and_automatically_find_variables(df_na):
         "Studies": "Missing",
     }
 
-    # test transform output
-    # selected columns should have no NA
-    # non selected columns should still have NA
     assert X_transformed[["Name", "City", "Studies"]].isnull().sum().sum() == 0
     assert X_transformed[["Age", "Marks"]].isnull().sum().sum() > 0
     pd.testing.assert_frame_equal(X_transformed, X_reference)

From 7156d2806ad6be6630794f9e6beb72093e172637 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Fri, 27 Mar 2026 21:17:40 +0530
Subject: [PATCH 29/32] removed the spaces

---
 feature_engine/imputation/categorical.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/feature_engine/imputation/categorical.py b/feature_engine/imputation/categorical.py
index a64b3a49b..5bc772e4c 100644
--- a/feature_engine/imputation/categorical.py
+++ b/feature_engine/imputation/categorical.py
@@ -252,7 +252,6 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
                 self.imputer_dict_ = mode_vals.iloc[0].to_dict()
 
         self._get_feature_names_in(X)
-
         return self
 
     def transform(self, X: pd.DataFrame) -> pd.DataFrame:
@@ -272,7 +271,7 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
                     )
 
             X = X.assign(**add_cats).fillna(self.imputer_dict_)
-
+            
         if self.return_object:
             X[self.variables_] = X[self.variables_].astype("O")
         return X

From 5d65fe8b425bb970d065ce9376170859423c1e9e Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Fri, 27 Mar 2026 21:25:15 +0530
Subject: [PATCH 30/32] simplified the test case as asked

---
 .../test_categorical_imputer.py               | 27 +++----------------
 1 file changed, 4 insertions(+), 23 deletions(-)

diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index a179d0e80..7be36a754 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -1,4 +1,3 @@
-import re
 import warnings
 
 import numpy as np
@@ -149,33 +148,18 @@ def test_error_when_imputation_method_not_frequent_or_missing():
 
 
 def test_error_when_variable_contains_multiple_modes(df_na):
-    msg = (
-        "The variable Name contains multiple frequent categories. "
-        "Set multimodal='warn' or multimodal='ignore' to allow imputation "
-        "using the first most frequent category found."
-    )
     imputer = CategoricalImputer(imputation_method="frequent", variables="Name")
-    with pytest.raises(ValueError, match=re.escape(msg)):
+    with pytest.raises(ValueError, match="The variable Name contains multiple frequent categories"):
         imputer.fit(df_na)
 
-    msg = (
-        "The variable(s) Name contain(s) multiple frequent categories. "
-        "Set multimodal='warn' or multimodal='ignore' to allow imputation "
-        "using the first most frequent category found."
-    )
     imputer = CategoricalImputer(imputation_method="frequent")
-    with pytest.raises(ValueError, match=re.escape(msg)):
+    with pytest.raises(ValueError, match="The variable\(s\) Name contain\(s\) multiple frequent categories"):
         imputer.fit(df_na)
 
     df_ = df_na.copy()
     df_["Name_dup"] = df_["Name"]
-    msg = (
-        "The variable(s) Name, Name_dup contain(s) multiple frequent categories. "
-        "Set multimodal='warn' or multimodal='ignore' to allow imputation "
-        "using the first most frequent category found."
-    )
     imputer = CategoricalImputer(imputation_method="frequent")
-    with pytest.raises(ValueError, match=re.escape(msg)):
+    with pytest.raises(ValueError, match="The variable\(s\) Name, Name_dup contain\(s\) multiple frequent categories"):
         imputer.fit(df_)
 
 
@@ -290,10 +274,7 @@ def test_error_when_ignore_format_is_not_boolean(ignore_format):
 
 def test_multimodal_raises_errors(multimodal_df):
     imputer = CategoricalImputer(imputation_method="frequent")
-    msg = (
-        "The variable(s) city, country contain(s) multiple frequent categories. "
-    )
-    with pytest.raises(ValueError, match=re.escape(msg)):
+    with pytest.raises(ValueError, match="multiple frequent categories"):
         imputer.fit(multimodal_df)
 
 

From a95f5e03c041a4bb1d133ef2be8db8918ca66ff1 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Fri, 27 Mar 2026 21:28:50 +0530
Subject: [PATCH 31/32] simplified the test case as asked

---
 feature_engine/imputation/categorical.py          | 2 +-
 tests/test_imputation/test_categorical_imputer.py | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/feature_engine/imputation/categorical.py b/feature_engine/imputation/categorical.py
index 5bc772e4c..e7200287c 100644
--- a/feature_engine/imputation/categorical.py
+++ b/feature_engine/imputation/categorical.py
@@ -271,7 +271,7 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
                     )
 
             X = X.assign(**add_cats).fillna(self.imputer_dict_)
-            
+
         if self.return_object:
             X[self.variables_] = X[self.variables_].astype("O")
         return X
diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index 7be36a754..5735fdf46 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -153,13 +153,15 @@ def test_error_when_variable_contains_multiple_modes(df_na):
         imputer.fit(df_na)
 
     imputer = CategoricalImputer(imputation_method="frequent")
-    with pytest.raises(ValueError, match="The variable\(s\) Name contain\(s\) multiple frequent categories"):
+    msg = r"The variable\(s\) Name contain\(s\) multiple frequent categories"
+    with pytest.raises(ValueError, match=msg):
         imputer.fit(df_na)
 
     df_ = df_na.copy()
     df_["Name_dup"] = df_["Name"]
     imputer = CategoricalImputer(imputation_method="frequent")
-    with pytest.raises(ValueError, match="The variable\(s\) Name, Name_dup contain\(s\) multiple frequent categories"):
+    msg = r"The variable\(s\) Name, Name_dup contain\(s\) multiple frequent categories"
+    with pytest.raises(ValueError, match=msg):
         imputer.fit(df_)
 
 

From 6f5b4da923f3503b404907dcda1e1e3e54eab142 Mon Sep 17 00:00:00 2001
From: Direk Kakkar <millioniar1717@gmail.com>
Date: Fri, 27 Mar 2026 21:32:42 +0530
Subject: [PATCH 32/32] simplified the test case as asked

---
 tests/test_imputation/test_categorical_imputer.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/test_imputation/test_categorical_imputer.py b/tests/test_imputation/test_categorical_imputer.py
index 5735fdf46..b1d36da8c 100644
--- a/tests/test_imputation/test_categorical_imputer.py
+++ b/tests/test_imputation/test_categorical_imputer.py
@@ -149,7 +149,8 @@ def test_error_when_imputation_method_not_frequent_or_missing():
 
 def test_error_when_variable_contains_multiple_modes(df_na):
     imputer = CategoricalImputer(imputation_method="frequent", variables="Name")
-    with pytest.raises(ValueError, match="The variable Name contains multiple frequent categories"):
+    msg = "The variable Name contains multiple frequent categories"
+    with pytest.raises(ValueError, match=msg):
         imputer.fit(df_na)
 
     imputer = CategoricalImputer(imputation_method="frequent")