diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4ab20623cc561..375f788cb12b7 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1150,6 +1150,7 @@ Indexing - Bug in :meth:`Series.__setitem__` when assigning boolean series with boolean indexer will raise ``LossySetitemError`` (:issue:`57338`) - Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`) - Bug in reindexing of :class:`DataFrame` with :class:`PeriodDtype` columns in case of consolidated block (:issue:`60980`, :issue:`60273`) +- Bug in :meth:`DataFrame.__setitem__` throwing a ``ValueError`` when setting a column with a 2D object array (:issue:`61026`) - Bug in :meth:`DataFrame.loc.__getitem__` and :meth:`DataFrame.iloc.__getitem__` with a :class:`CategoricalDtype` column with integer categories raising when trying to index a row containing a ``NaN`` entry (:issue:`58954`) - Bug in :meth:`Index.__getitem__` incorrectly raising with a 0-dim ``np.ndarray`` key (:issue:`55601`) - Bug in :meth:`Index.get_indexer` not casting missing values correctly for new string datatype (:issue:`55833`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c8c246434f6d8..273e70140538b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5502,7 +5502,30 @@ def _sanitize_column(self, value) -> tuple[ArrayLike, BlockValuesRefs | None]: if is_list_like(value): com.require_length_match(value, self.index) - return sanitize_array(value, self.index, copy=True, allow_2d=True), None + + # GH#61026: special-case 2D inputs for single-column assignment. + # - accept shape (n, 1) by flattening to 1D + # - disallow 2D *object* arrays with more than one column, since those + # correspond to a single column key and should be rejected + arr = value + + # np.matrix is always 2D; gonna convert to regular ndarray + if isinstance(arr, np.matrix): + arr = np.asarray(arr) + + if isinstance(arr, np.ndarray) and arr.ndim == 2: + if arr.shape[1] == 1: + # treating (n, 1) as a length-n 1D array + arr = arr[:, 0] + elif arr.dtype == object: + # single-column setitem with a 2D object array is not allowed. + msg = ( + "Setting a DataFrame column with a 2D array requires " + f"shape (n, 1); got shape {arr.shape}." + ) + raise ValueError(msg) + subarr = sanitize_array(arr, self.index, copy=True, allow_2d=True) + return subarr, None @property def _series(self): diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 0de2455079027..9d631026fc074 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -816,6 +816,24 @@ def test_setitem_index_object_dtype_not_inferring(self): ) tm.assert_frame_equal(df, expected) + def test_setitem_2d_object_array(self): + # GH#61026 + df = DataFrame( + { + "c1": [1, 2, 3, 4, 5], + } + ) + + arr = np.array([["A"], ["B"], ["C"], ["D"], ["E"]], dtype=object) + df["c1"] = arr + + expected = DataFrame( + { + "c1": ["A", "B", "C", "D", "E"], + } + ) + tm.assert_frame_equal(df, expected) + class TestSetitemTZAwareValues: @pytest.fixture