Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1117,6 +1117,7 @@ Conversion

Strings
^^^^^^^
- Bug in :meth:`Series.str.match` failing to raise when given a compiled ``re.Pattern`` object and conflicting ``case`` or ``flags`` arguments (:issue:`62240`)
- Bug in :meth:`Series.str.replace` raising an error on valid group references (``\1``, ``\2``, etc.) on series converted to PyArrow backend dtype (:issue:`62653`)
- Bug in :meth:`Series.str.zfill` raising ``AttributeError`` for :class:`ArrowDtype` (:issue:`61485`)
- Bug in :meth:`Series.value_counts` would not respect ``sort=False`` for series having ``string`` dtype (:issue:`55224`)
Expand Down
41 changes: 40 additions & 1 deletion pandas/core/strings/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1351,7 +1351,13 @@ def contains(
return self._wrap_result(result, fill_value=na, returns_string=False)

@forbid_nonstring_types(["bytes"])
def match(self, pat: str, case: bool = True, flags: int = 0, na=lib.no_default):
def match(
self,
pat: str | re.Pattern,
case: bool | lib.NoDefault = lib.no_default,
flags: int | lib.NoDefault = lib.no_default,
na=lib.no_default,
):
"""
Determine if each string starts with a match of a regular expression.

Expand Down Expand Up @@ -1397,6 +1403,39 @@ def match(self, pat: str, case: bool = True, flags: int = 0, na=lib.no_default):
2 False
dtype: bool
"""
if flags is not lib.no_default:
# pat.flags will have re.U regardless, so we need to add it here
# before checking for a match
flags = flags | re.U
if is_re(pat):
if pat.flags != flags:
raise ValueError(
"Cannot both specify 'flags' and pass a compiled regexp "
"object with conflicting flags"
)
else:
pat = re.compile(pat, flags=flags)
# set flags=0 to ensure that when we call
# re.compile(pat, flags=flags) the constructor does not raise.
flags = 0
else:
flags = 0

if case is lib.no_default:
if is_re(pat):
case = not bool(pat.flags & re.IGNORECASE)
else:
# Case-sensitive default
case = True
elif is_re(pat):
implicit_case = not bool(pat.flags & re.IGNORECASE)
if implicit_case != case:
# GH#62240
raise ValueError(
"Cannot both specify 'case' and pass a compiled regexp "
"object with conflicting case-sensitivity"
)

result = self._data.array._str_match(pat, case=case, flags=flags, na=na)
return self._wrap_result(result, fill_value=na, returns_string=False)

Expand Down
12 changes: 11 additions & 1 deletion pandas/core/strings/object_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,17 @@ def _str_match(
if not case:
flags |= re.IGNORECASE

regex = re.compile(pat, flags=flags)
if isinstance(pat, re.Pattern):
# We need to check that flags matches pat.flags.
# pat.flags will have re.U regardless, so we need to add it here
# before checking for a match
flags = flags | re.U

if flags != pat.flags:
raise ValueError("Cannot pass flags that do not match pat.flags")
regex = pat
else:
regex = re.compile(pat, flags=flags)

f = lambda x: regex.match(x) is not None
return self._str_map(f, na_value=na, dtype=np.dtype(bool))
Expand Down
27 changes: 14 additions & 13 deletions pandas/tests/strings/test_find_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -1004,26 +1004,27 @@ def test_match_compiled_regex(any_string_dtype):
expected = Series([True, False, True, False], dtype=expected_dtype)
tm.assert_series_equal(result, expected)

# TODO this currently works for pyarrow-backed dtypes but raises for python
if any_string_dtype == "string" and any_string_dtype.storage == "pyarrow":
result = values.str.match(re.compile("ab"), case=False)
expected = Series([True, True, True, True], dtype=expected_dtype)
tm.assert_series_equal(result, expected)
else:
with pytest.raises(
ValueError, match="cannot process flags argument with a compiled pattern"
):
values.str.match(re.compile("ab"), case=False)
msg = (
"Cannot both specify 'case' and pass a compiled "
"regexp object with conflicting case-sensitivity"
)
with pytest.raises(ValueError, match=msg):
values.str.match(re.compile("ab"), case=False)

result = values.str.match(re.compile("ab", flags=re.IGNORECASE))
expected = Series([True, True, True, True], dtype=expected_dtype)
tm.assert_series_equal(result, expected)

with pytest.raises(
ValueError, match="cannot process flags argument with a compiled pattern"
):
msg = (
"Cannot both specify 'flags' and pass a compiled "
"regexp object with conflicting flags"
)
with pytest.raises(ValueError, match=msg):
values.str.match(re.compile("ab"), flags=re.IGNORECASE)

# But if the flags match you're OK
values.str.match(re.compile("ab", flags=re.IGNORECASE), flags=re.IGNORECASE)


@pytest.mark.parametrize(
"pat, case, exp",
Expand Down
Loading