From 59aaa8ec0a744ff17ad90646ad75ec578462554c Mon Sep 17 00:00:00 2001 From: antareepsarkar Date: Wed, 26 Nov 2025 21:08:01 +0530 Subject: [PATCH 1/5] BUG: raise TypeError when array not like 1D in pandas.array --- pandas/core/construction.py | 6 ++++++ pandas/tests/arrays/test_array.py | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 5868bdaa1225b..3b014eba99a5c 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -321,6 +321,12 @@ def array( return data.copy() return data + # to avoid returning an array of string representation of objects. + if dtype == StringDtype(): + ndarr = np.array(data) + if ndarr.ndim != 1: + raise TypeError("Values must be a 1D list-like") + if isinstance(dtype, ExtensionDtype): cls = dtype.construct_array_type() return cls._from_sequence(data, dtype=dtype, copy=copy) diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index c327d1b647bce..1f526a7916416 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -460,6 +460,12 @@ def test_nd_raises(data): pd.array(data, dtype="int64") +@pytest.mark.parametrize("data", [[["a"], ["b"]]]) +def test_not_1D_like_raises(data): + with pytest.raises(TypeError, match="Values must be a 1D list-like"): + pd.array(data, dtype=pd.StringDtype()) + + def test_scalar_raises(): with pytest.raises(ValueError, match="Cannot pass scalar '1'"): pd.array(1) From 4fd428fc0c81302ff65ef0ead2eb06b043c057eb Mon Sep 17 00:00:00 2001 From: antareepsarkar Date: Fri, 28 Nov 2025 21:08:22 +0530 Subject: [PATCH 2/5] BUG: raise TypeError when array is not all scalar in pandas.array --- pandas/core/construction.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 3b014eba99a5c..2fa0154184f62 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -323,8 +323,7 @@ def array( # to avoid returning an array of string representation of objects. if dtype == StringDtype(): - ndarr = np.array(data) - if ndarr.ndim != 1: + if any(not lib.is_scalar(i) for i in data): raise TypeError("Values must be a 1D list-like") if isinstance(dtype, ExtensionDtype): From 8d41928e4eeaf9b94e5bd319533022357cf193db Mon Sep 17 00:00:00 2001 From: antareepsarkar Date: Wed, 3 Dec 2025 15:46:19 +0530 Subject: [PATCH 3/5] BUG: add function to check whether data is all scalar in pandas.array --- pandas/_libs/lib.pyi | 1 + pandas/_libs/lib.pyx | 20 ++++++++++++++++++++ pandas/core/construction.py | 4 ++-- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index e50b301c34868..9563a448713ce 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -165,6 +165,7 @@ def maybe_indices_to_slice( indices: npt.NDArray[np.intp], max_len: int, ) -> slice | npt.NDArray[np.intp]: ... +def is_all_scalar(obj: list | tuple) -> bool: ... def is_all_arraylike(obj: list) -> bool: ... # ----------------------------------------------------------------- diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 7ed36419babf1..122002e67564e 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -862,6 +862,26 @@ cpdef ndarray[object] ensure_string_array( return result +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef bool is_all_scalar(obj: list | tuple): + # Optimized for lists and tuples + cdef: + Py_ssize_t i, n = len(obj) + bool all_scalars = True + object temp + + for i in range(n): + temp = obj[i] + if isinstance(temp, (bytes, str)): + continue + elif hasattr(temp, "__iter__"): + all_scalars = False + break + + return all_scalars + + def is_all_arraylike(obj: list) -> bool: """ Should we treat these as levels of a MultiIndex, as opposed to Index items? diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 2fa0154184f62..4c48412794734 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -322,8 +322,8 @@ def array( return data # to avoid returning an array of string representation of objects. - if dtype == StringDtype(): - if any(not lib.is_scalar(i) for i in data): + if isinstance(dtype, StringDtype) and isinstance(data, (list, tuple)): + if not lib.is_all_scalar(data): raise TypeError("Values must be a 1D list-like") if isinstance(dtype, ExtensionDtype): From c4e3ba0e742909f2d6cf75b21760f888f4a41353 Mon Sep 17 00:00:00 2001 From: antareepsarkar Date: Wed, 3 Dec 2025 16:55:09 +0530 Subject: [PATCH 4/5] BUG: add function to check whether data is all scalar in pandas.array --- pandas/_libs/lib.pyx | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 122002e67564e..d300d83e53292 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -862,10 +862,7 @@ cpdef ndarray[object] ensure_string_array( return result -@cython.wraparound(False) -@cython.boundscheck(False) -cpdef bool is_all_scalar(obj: list | tuple): - # Optimized for lists and tuples +def is_all_scalar(obj: list | tuple) -> bool: cdef: Py_ssize_t i, n = len(obj) bool all_scalars = True From 0a3846f90fe5808c0575a64d637e8748659ed07f Mon Sep 17 00:00:00 2001 From: antareepsarkar Date: Wed, 3 Dec 2025 17:14:57 +0530 Subject: [PATCH 5/5] BUG: add function to check whether data is all scalar in pandas.array --- pandas/_libs/lib.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index d300d83e53292..c074099685610 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -865,9 +865,10 @@ cpdef ndarray[object] ensure_string_array( def is_all_scalar(obj: list | tuple) -> bool: cdef: Py_ssize_t i, n = len(obj) - bool all_scalars = True object temp + all_scalars = True + for i in range(n): temp = obj[i] if isinstance(temp, (bytes, str)):