Skip to content

Commit f896a82

Browse files
committed
gh-148729: use memchr in SRE prefix scan
For single byte characters use `memchr` instead of the equivalent hand-written while loop. This ensures that `re.search` is typically vectorized through libc for regexes starting with a `LITERAL`. In the no-match case this means 16 or 32 bytes per iterations instead of a single byte (ok, it was unrolled, but not auto-vectorized). Signed-off-by: Harmen Stoppels <harmenstoppels@gmail.com>
1 parent 7ce737e commit f896a82

File tree

1 file changed

+18
-0
lines changed

1 file changed

+18
-0
lines changed

Modules/_sre/sre_lib.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1753,10 +1753,19 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
17531753
end = (SRE_CHAR *)state->end;
17541754
state->must_advance = 0;
17551755
while (ptr < end) {
1756+
#if SIZEOF_SRE_CHAR == 1
1757+
{
1758+
SRE_CHAR *found = memchr(ptr, c, end - ptr);
1759+
if (!found)
1760+
return 0;
1761+
ptr = found;
1762+
}
1763+
#else
17561764
while (*ptr != c) {
17571765
if (++ptr >= end)
17581766
return 0;
17591767
}
1768+
#endif
17601769
TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
17611770
state->start = ptr;
17621771
state->ptr = ptr + prefix_skip;
@@ -1786,10 +1795,19 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
17861795
#endif
17871796
while (ptr < end) {
17881797
SRE_CHAR c = (SRE_CHAR) prefix[0];
1798+
#if SIZEOF_SRE_CHAR == 1
1799+
{
1800+
SRE_CHAR *found = memchr(ptr, c, end - ptr);
1801+
if (!found)
1802+
return 0;
1803+
ptr = found + 1;
1804+
}
1805+
#else
17891806
while (*ptr++ != c) {
17901807
if (ptr >= end)
17911808
return 0;
17921809
}
1810+
#endif
17931811
if (ptr >= end)
17941812
return 0;
17951813

0 commit comments

Comments
 (0)