Skip to content

Commit 99eaf66

Browse files
committed
[GR-71253] Implement re.findall, re.split, re.sub and re.subn as Truffle Nodes.
PullRequest: graalpython/4083
2 parents 82cedff + 61e92fb commit 99eaf66

File tree

9 files changed

+1461
-357
lines changed

9 files changed

+1461
-357
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,10 +1043,12 @@ public Gcd2Node(boolean isRecursive) {
10431043
abstract Object execute(VirtualFrame frame, Object a, Object b);
10441044

10451045
private static long count(long a, long b) {
1046-
if (b == 0) {
1047-
return a;
1046+
while (b != 0) {
1047+
long tmp = b;
1048+
b = a % b;
1049+
a = tmp;
10481050
}
1049-
return count(b, a % b);
1051+
return a;
10501052
}
10511053

10521054
@Specialization

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/SREModuleBuiltins.java

Lines changed: 1179 additions & 156 deletions
Large diffs are not rendered by default.
Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
/*
2+
* Copyright (c) 2025, 2025, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* The Universal Permissive License (UPL), Version 1.0
6+
*
7+
* Subject to the condition set forth below, permission is hereby granted to any
8+
* person obtaining a copy of this software, associated documentation and/or
9+
* data (collectively the "Software"), free of charge and under any and all
10+
* copyright rights in the Software, and any and all patent rights owned or
11+
* freely licensable by each licensor hereunder covering either (i) the
12+
* unmodified Software as contributed to or provided by such licensor, or (ii)
13+
* the Larger Works (as defined below), to deal in both
14+
*
15+
* (a) the Software, and
16+
*
17+
* (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
18+
* one is included with the Software each a "Larger Work" to which the Software
19+
* is contributed by such licensors),
20+
*
21+
* without restriction, including without limitation the rights to copy, create
22+
* derivative works of, display, perform, and distribute the Software and make,
23+
* use, sell, offer for sale, import, export, have made, and have sold the
24+
* Software and the Larger Work(s), and to sublicense the foregoing rights on
25+
* either these or other terms.
26+
*
27+
* This license is subject to the following condition:
28+
*
29+
* The above copyright notice and either this complete permission notice or at a
30+
* minimum a reference to the UPL must be included in all copies or substantial
31+
* portions of the Software.
32+
*
33+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
36+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
37+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
38+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
39+
* SOFTWARE.
40+
*/
41+
package com.oracle.graal.python.builtins.modules;
42+
43+
import com.oracle.truffle.api.CompilerDirectives;
44+
import com.oracle.truffle.api.dsl.GenerateCached;
45+
import com.oracle.truffle.api.dsl.GenerateInline;
46+
import com.oracle.truffle.api.dsl.GenerateUncached;
47+
import com.oracle.truffle.api.dsl.Specialization;
48+
import com.oracle.truffle.api.interop.ArityException;
49+
import com.oracle.truffle.api.interop.InteropLibrary;
50+
import com.oracle.truffle.api.interop.UnknownIdentifierException;
51+
import com.oracle.truffle.api.interop.UnsupportedMessageException;
52+
import com.oracle.truffle.api.interop.UnsupportedTypeException;
53+
import com.oracle.truffle.api.library.CachedLibrary;
54+
import com.oracle.truffle.api.nodes.Node;
55+
import com.oracle.truffle.api.strings.TruffleString;
56+
57+
public final class TRegexUtil {
58+
59+
private TRegexUtil() {
60+
// should not be constructed
61+
}
62+
63+
public static final class Props {
64+
private Props() {
65+
// should not be constructed
66+
}
67+
68+
public static final class CompiledRegex {
69+
private CompiledRegex() {
70+
// should not be constructed
71+
}
72+
73+
public static final String PATTERN = "pattern";
74+
public static final String FLAGS = "flags";
75+
public static final String EXEC = "exec";
76+
public static final String GROUP_COUNT = "groupCount";
77+
public static final String GROUPS = "groups";
78+
}
79+
80+
public static final class RegexResult {
81+
private RegexResult() {
82+
// should not be constructed
83+
}
84+
85+
public static final String IS_MATCH = "isMatch";
86+
public static final String GET_START = "getStart";
87+
public static final String GET_END = "getEnd";
88+
}
89+
}
90+
91+
private static final String NUMBER_OF_REGEX_RESULT_TYPES = "1";
92+
93+
@GenerateCached
94+
@GenerateInline(inlineByDefault = true)
95+
@GenerateUncached
96+
public abstract static class InteropReadMemberNode extends Node {
97+
98+
public abstract Object execute(Node node, Object obj, String key);
99+
100+
@Specialization(limit = NUMBER_OF_REGEX_RESULT_TYPES)
101+
static Object read(Object obj, String key,
102+
@CachedLibrary("obj") InteropLibrary objs) {
103+
try {
104+
return objs.readMember(obj, key);
105+
} catch (UnsupportedMessageException | UnknownIdentifierException e) {
106+
throw CompilerDirectives.shouldNotReachHere(e);
107+
}
108+
}
109+
}
110+
111+
@GenerateCached(false)
112+
@GenerateInline
113+
@GenerateUncached
114+
public abstract static class InvokeExecMethodNode extends Node {
115+
116+
public abstract Object execute(Node inliningTarget, Object compiledRegex, TruffleString input, int fromIndex);
117+
118+
@Specialization(limit = NUMBER_OF_REGEX_RESULT_TYPES)
119+
static Object exec(Object compiledRegex, TruffleString input, int fromIndex,
120+
@CachedLibrary("compiledRegex") InteropLibrary objs) {
121+
try {
122+
return objs.invokeMember(compiledRegex, Props.CompiledRegex.EXEC, input, fromIndex);
123+
} catch (UnsupportedMessageException | UnsupportedTypeException | ArityException | UnknownIdentifierException e) {
124+
throw CompilerDirectives.shouldNotReachHere(e);
125+
}
126+
}
127+
}
128+
129+
@GenerateCached(false)
130+
@GenerateInline
131+
@GenerateUncached
132+
public abstract static class InvokeExecMethodWithMaxIndexNode extends Node {
133+
134+
public abstract Object execute(Node inliningTarget, Object compiledRegex, TruffleString input, int fromIndex, int toIndex);
135+
136+
@Specialization(limit = NUMBER_OF_REGEX_RESULT_TYPES)
137+
static Object exec(Object compiledRegex, TruffleString input, int fromIndex, int toIndex,
138+
@CachedLibrary("compiledRegex") InteropLibrary objs) {
139+
try {
140+
return objs.invokeMember(compiledRegex, Props.CompiledRegex.EXEC, input, fromIndex, toIndex, 0, toIndex);
141+
} catch (UnsupportedMessageException | UnsupportedTypeException | ArityException | UnknownIdentifierException e) {
142+
throw CompilerDirectives.shouldNotReachHere(e);
143+
}
144+
}
145+
}
146+
147+
@GenerateCached(false)
148+
@GenerateInline
149+
@GenerateUncached
150+
public abstract static class ReadIsMatchNode extends Node {
151+
static final String IS_MATCH = "isMatch";
152+
153+
public abstract boolean execute(Node inliningTarget, Object regexResult);
154+
155+
@Specialization(limit = NUMBER_OF_REGEX_RESULT_TYPES)
156+
static boolean read(Object regexResult, @CachedLibrary("regexResult") InteropLibrary objs) {
157+
try {
158+
return (boolean) objs.readMember(regexResult, IS_MATCH);
159+
} catch (UnsupportedMessageException | UnknownIdentifierException e) {
160+
throw CompilerDirectives.shouldNotReachHere(e);
161+
}
162+
}
163+
}
164+
165+
@GenerateCached(false)
166+
@GenerateInline
167+
@GenerateUncached
168+
public abstract static class InvokeGetGroupBoundariesMethodNode extends Node {
169+
170+
public abstract int execute(Node inliningTarget, Object regexResult, Object method, int groupNumber);
171+
172+
@Specialization(limit = NUMBER_OF_REGEX_RESULT_TYPES)
173+
static int exec(Object regexResult, String method, int groupNumber,
174+
@CachedLibrary("regexResult") InteropLibrary objs) {
175+
try {
176+
return (int) objs.invokeMember(regexResult, method, groupNumber);
177+
} catch (UnsupportedMessageException | UnsupportedTypeException | ArityException | UnknownIdentifierException e) {
178+
throw CompilerDirectives.shouldNotReachHere(e);
179+
}
180+
}
181+
}
182+
183+
public static final class TRegexCompiledRegexAccessor {
184+
185+
private TRegexCompiledRegexAccessor() {
186+
}
187+
188+
public static String pattern(Object compiledRegexObject, Node node, InteropReadMemberNode readPattern) {
189+
return (String) readPattern.execute(node, compiledRegexObject, Props.CompiledRegex.PATTERN);
190+
}
191+
192+
public static Object flags(Object compiledRegexObject, Node node, InteropReadMemberNode readFlags) {
193+
return readFlags.execute(node, compiledRegexObject, Props.CompiledRegex.FLAGS);
194+
}
195+
196+
public static int groupCount(Object compiledRegexObject, Node node, InteropReadMemberNode readGroupCount) {
197+
return (int) readGroupCount.execute(node, compiledRegexObject, Props.CompiledRegex.GROUP_COUNT);
198+
}
199+
200+
public static Object namedCaptureGroups(Object compiledRegexObject, Node node, InteropReadMemberNode readGroups) {
201+
return readGroups.execute(node, compiledRegexObject, Props.CompiledRegex.GROUPS);
202+
}
203+
}
204+
205+
public static final class TRegexNamedCaptureGroupsAccessor {
206+
207+
private TRegexNamedCaptureGroupsAccessor() {
208+
}
209+
210+
public static boolean hasGroup(Object namedCaptureGroupsMap, TruffleString name, InteropLibrary interop) {
211+
return interop.isMemberReadable(namedCaptureGroupsMap, name.toJavaStringUncached());
212+
}
213+
214+
public static int getGroupNumber(Object namedCaptureGroupsMap, TruffleString name, InteropLibrary libMap) {
215+
try {
216+
return (int) libMap.readMember(namedCaptureGroupsMap, name.toJavaStringUncached());
217+
} catch (UnsupportedMessageException | UnknownIdentifierException e) {
218+
throw CompilerDirectives.shouldNotReachHere(e);
219+
}
220+
}
221+
}
222+
223+
public static final class TRegexResultAccessor {
224+
225+
private TRegexResultAccessor() {
226+
}
227+
228+
public static boolean isMatch(Object result, Node node, InteropReadMemberNode readIsMatch) {
229+
return (boolean) readIsMatch.execute(node, result, Props.RegexResult.IS_MATCH);
230+
}
231+
232+
public static int captureGroupStart(Object result, int groupNumber, Node node, InvokeGetGroupBoundariesMethodNode getStart) {
233+
return getStart.execute(node, result, Props.RegexResult.GET_START, groupNumber);
234+
}
235+
236+
public static int captureGroupEnd(Object result, int groupNumber, Node node, InvokeGetGroupBoundariesMethodNode getEnd) {
237+
return getEnd.execute(node, result, Props.RegexResult.GET_END, groupNumber);
238+
}
239+
}
240+
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextHashBuiltins.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.Py_hash_t;
4949
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.Py_ssize_t;
5050
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.Void;
51+
import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING_BINARY;
5152

5253
import com.oracle.graal.python.builtins.modules.SysModuleBuiltins;
5354
import com.oracle.graal.python.builtins.modules.cext.PythonCextBuiltins.CApiBinaryBuiltinNode;
@@ -62,7 +63,6 @@
6263
import com.oracle.truffle.api.dsl.Cached;
6364
import com.oracle.truffle.api.dsl.Specialization;
6465
import com.oracle.truffle.api.strings.TruffleString;
65-
import com.oracle.truffle.api.strings.TruffleString.Encoding;
6666
import com.oracle.truffle.api.strings.TruffleString.HashCodeNode;
6767

6868
public final class PythonCextHashBuiltins {
@@ -118,7 +118,7 @@ static long doI(Object value, long size,
118118
@Cached TruffleString.FromByteArrayNode toString,
119119
@Cached HashCodeNode hashNode) {
120120
byte[] array = readNode.readByteArray(value, (int) size);
121-
TruffleString string = toString.execute(array, Encoding.US_ASCII, false);
121+
TruffleString string = toString.execute(array, TS_ENCODING_BINARY, false);
122122
return PyObjectHashNode.hash(string, hashNode);
123123
}
124124
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringBuiltins.java

Lines changed: 7 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
import com.oracle.graal.python.builtins.PythonBuiltins;
7777
import com.oracle.graal.python.builtins.modules.BuiltinFunctions;
7878
import com.oracle.graal.python.builtins.modules.CodecsModuleBuiltins;
79+
import com.oracle.graal.python.builtins.modules.TRegexUtil;
7980
import com.oracle.graal.python.builtins.objects.PNone;
8081
import com.oracle.graal.python.builtins.objects.PNotImplemented;
8182
import com.oracle.graal.python.builtins.objects.buffer.PythonBufferAccessLibrary;
@@ -185,11 +186,6 @@
185186
import com.oracle.truffle.api.dsl.NeverDefault;
186187
import com.oracle.truffle.api.dsl.Specialization;
187188
import com.oracle.truffle.api.frame.VirtualFrame;
188-
import com.oracle.truffle.api.interop.ArityException;
189-
import com.oracle.truffle.api.interop.InteropLibrary;
190-
import com.oracle.truffle.api.interop.UnknownIdentifierException;
191-
import com.oracle.truffle.api.interop.UnsupportedMessageException;
192-
import com.oracle.truffle.api.interop.UnsupportedTypeException;
193189
import com.oracle.truffle.api.library.CachedLibrary;
194190
import com.oracle.truffle.api.nodes.Node;
195191
import com.oracle.truffle.api.profiles.InlinedBranchProfile;
@@ -1491,63 +1487,6 @@ static PList doGeneric(Object self, Object keepends,
14911487
return innerNode.execute(inliningTarget, selfStr, bKeepends);
14921488
}
14931489

1494-
@GenerateCached(false)
1495-
@GenerateInline
1496-
@GenerateUncached
1497-
public abstract static class InvokeExecMethodNode extends Node {
1498-
static final String EXEC = "exec";
1499-
1500-
public abstract Object execute(Node inliningTarget, Object compiledRegex, TruffleString input, int fromIndex);
1501-
1502-
@Specialization(guards = "objs.isMemberInvocable(compiledRegex, EXEC)", limit = "3")
1503-
static Object exec(Object compiledRegex, TruffleString input, int fromIndex,
1504-
@CachedLibrary("compiledRegex") InteropLibrary objs) {
1505-
try {
1506-
return objs.invokeMember(compiledRegex, EXEC, input, fromIndex);
1507-
} catch (UnsupportedMessageException | UnsupportedTypeException | ArityException | UnknownIdentifierException e) {
1508-
throw CompilerDirectives.shouldNotReachHere(e);
1509-
}
1510-
}
1511-
}
1512-
1513-
@GenerateCached(false)
1514-
@GenerateInline
1515-
@GenerateUncached
1516-
public abstract static class ReadIsMatchNode extends Node {
1517-
static final String IS_MATCH = "isMatch";
1518-
1519-
public abstract boolean execute(Node inliningTarget, Object regexResult);
1520-
1521-
@Specialization(guards = "objs.isMemberReadable(regexResult, IS_MATCH)", limit = "3")
1522-
static boolean read(Object regexResult, @CachedLibrary("regexResult") InteropLibrary objs) {
1523-
try {
1524-
return (boolean) objs.readMember(regexResult, IS_MATCH);
1525-
} catch (UnsupportedMessageException | UnknownIdentifierException e) {
1526-
throw CompilerDirectives.shouldNotReachHere(e);
1527-
}
1528-
}
1529-
}
1530-
1531-
@GenerateCached(false)
1532-
@GenerateInline
1533-
@GenerateUncached
1534-
public abstract static class InvokeGetGroupBoundariesMethodNode extends Node {
1535-
static final String GET_START = "getStart";
1536-
static final String GET_END = "getEnd";
1537-
1538-
public abstract int execute(Node inliningTarget, Object regexResult, Object method, int groupNumber);
1539-
1540-
@Specialization(guards = "objs.isMemberInvocable(regexResult, method)", limit = "3")
1541-
static int exec(Object regexResult, String method, int groupNumber,
1542-
@CachedLibrary("regexResult") InteropLibrary objs) {
1543-
try {
1544-
return (int) objs.invokeMember(regexResult, method, groupNumber);
1545-
} catch (UnsupportedMessageException | UnsupportedTypeException | ArityException | UnknownIdentifierException e) {
1546-
throw CompilerDirectives.shouldNotReachHere(e);
1547-
}
1548-
}
1549-
}
1550-
15511490
@GenerateCached(false)
15521491
@GenerateInline
15531492
@GenerateUncached
@@ -1557,10 +1496,10 @@ abstract static class SplitLinesInnerNode extends Node {
15571496

15581497
@Specialization
15591498
static PList doStringKeepends(Node inliningTarget, TruffleString self, boolean keepends,
1560-
@Cached InvokeExecMethodNode invokeExecMethodNode,
1561-
@Cached ReadIsMatchNode readIsMatchNode,
1562-
@Cached InvokeGetGroupBoundariesMethodNode getStartNode,
1563-
@Cached InvokeGetGroupBoundariesMethodNode getEndNode,
1499+
@Cached TRegexUtil.InvokeExecMethodNode invokeExecMethodNode,
1500+
@Cached TRegexUtil.ReadIsMatchNode readIsMatchNode,
1501+
@Cached TRegexUtil.InvokeGetGroupBoundariesMethodNode getStartNode,
1502+
@Cached TRegexUtil.InvokeGetGroupBoundariesMethodNode getEndNode,
15641503
@Cached TruffleString.SubstringByteIndexNode substringNode,
15651504
@Cached AppendNode appendNode) {
15661505
Object lineBreakRegex = PythonLanguage.get(inliningTarget).getCachedTRegexLineBreakRegex(PythonContext.get(inliningTarget));
@@ -1575,11 +1514,11 @@ static PList doStringKeepends(Node inliningTarget, TruffleString self, boolean k
15751514
final int substringStartByteIndex = asByteIndex(lastEnd);
15761515
final int substringByteLength;
15771516
if (matchFound) {
1578-
int end = getEndNode.execute(inliningTarget, regexResult, InvokeGetGroupBoundariesMethodNode.GET_END, 0);
1517+
int end = TRegexUtil.TRegexResultAccessor.captureGroupEnd(regexResult, 0, inliningTarget, getEndNode);
15791518
if (keepends) {
15801519
substringByteLength = asByteIndex(end - lastEnd);
15811520
} else {
1582-
int start = getStartNode.execute(inliningTarget, regexResult, InvokeGetGroupBoundariesMethodNode.GET_START, 0);
1521+
int start = TRegexUtil.TRegexResultAccessor.captureGroupStart(regexResult, 0, inliningTarget, getStartNode);
15831522
substringByteLength = asByteIndex(start - lastEnd);
15841523
}
15851524
assert end > lastEnd : String.format("end: %d, lastEnd: %d", end, lastEnd);

0 commit comments

Comments
 (0)