Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 41 additions & 1 deletion pandas/core/strings/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1351,7 +1351,13 @@ def contains(
return self._wrap_result(result, fill_value=na, returns_string=False)

@forbid_nonstring_types(["bytes"])
def match(self, pat: str, case: bool = True, flags: int = 0, na=lib.no_default):
def match(
self,
pat: str | re.Pattern,
case: bool | lib.NoDefault = lib.no_default,
flags: int | lib.NoDefault = lib.no_default,
na=lib.no_default,
):
"""
Determine if each string starts with a match of a regular expression.

Expand Down Expand Up @@ -1397,6 +1403,40 @@ def match(self, pat: str, case: bool = True, flags: int = 0, na=lib.no_default):
2 False
dtype: bool
"""
if flags is not lib.no_default:
# pat.flags will have re.U regardless, so we need to add it here
# before checking for a match
flags = flags | re.U
if is_re(pat):
if pat.flags != flags:
raise ValueError(
"Cannot both specify 'flags' and pass a compiled regexp "
"object with conflicting flags"
)
else:
pat = re.compile(pat, flags=flags)
# set flags=0 to ensure that when we call
# re.compile(pat, flags=flags) the constructor does not raise.
flags = 0
else:
flags = 0

if case is lib.no_default:
if is_re(pat):
implicit_case = not bool(pat.flags & re.IGNORECASE)
case = True
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be:

Suggested change
if case is lib.no_default:
if is_re(pat):
implicit_case = not bool(pat.flags & re.IGNORECASE)
case = True
if case is lib.no_default:
if is_re(pat):
case = not bool(pat.flags & re.IGNORECASE)

else:
# Case-sensitive default
case = True
elif is_re(pat):
implicit_case = not bool(pat.flags & re.IGNORECASE)
if implicit_case != case:
# GH#62240
raise ValueError(
"Cannot both specify 'case' and pass a compiled regexp "
"object with conflicting case-sensitivity"
)

result = self._data.array._str_match(pat, case=case, flags=flags, na=na)
return self._wrap_result(result, fill_value=na, returns_string=False)

Expand Down
27 changes: 14 additions & 13 deletions pandas/tests/strings/test_find_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -1004,26 +1004,27 @@ def test_match_compiled_regex(any_string_dtype):
expected = Series([True, False, True, False], dtype=expected_dtype)
tm.assert_series_equal(result, expected)

# TODO this currently works for pyarrow-backed dtypes but raises for python
if any_string_dtype == "string" and any_string_dtype.storage == "pyarrow":
result = values.str.match(re.compile("ab"), case=False)
expected = Series([True, True, True, True], dtype=expected_dtype)
tm.assert_series_equal(result, expected)
else:
with pytest.raises(
ValueError, match="cannot process flags argument with a compiled pattern"
):
values.str.match(re.compile("ab"), case=False)
msg = (
"Cannot both specify 'case' and pass a compiled "
"regexp object with conflicting case-sensitivity"
)
with pytest.raises(ValueError, match=msg):
values.str.match(re.compile("ab"), case=False)

result = values.str.match(re.compile("ab", flags=re.IGNORECASE))
expected = Series([True, True, True, True], dtype=expected_dtype)
tm.assert_series_equal(result, expected)

with pytest.raises(
ValueError, match="cannot process flags argument with a compiled pattern"
):
msg = (
"Cannot both specify 'flags' and pass a compiled "
"regexp object with conflicting flags"
)
with pytest.raises(ValueError, match=msg):
values.str.match(re.compile("ab"), flags=re.IGNORECASE)

# But if the flags match you're OK
values.str.match(re.compile("ab", flags=re.IGNORECASE), flags=re.IGNORECASE)


@pytest.mark.parametrize(
"pat, case, exp",
Expand Down
Loading