Skip to content

Commit 072047b

Browse files
committed
[GR-70803] Improve pyc unmarshalling performance
PullRequest: graalpython/4058
2 parents 99eaf66 + 398c68f commit 072047b

File tree

10 files changed

+194
-151
lines changed

10 files changed

+194
-151
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/PythonLanguage.java

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import static com.oracle.graal.python.nodes.StringLiterals.J_PY_EXTENSION;
3131
import static com.oracle.graal.python.nodes.StringLiterals.T_PY_EXTENSION;
3232
import static com.oracle.graal.python.nodes.truffle.TruffleStringMigrationHelpers.isJavaString;
33+
import static com.oracle.graal.python.util.PythonUtils.ARRAY_ACCESSOR;
3334
import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING;
3435
import static com.oracle.graal.python.util.PythonUtils.tsLiteral;
3536

@@ -60,7 +61,7 @@
6061
import com.oracle.graal.python.annotations.PythonOS;
6162
import com.oracle.graal.python.builtins.Python3Core;
6263
import com.oracle.graal.python.builtins.PythonBuiltinClassType;
63-
import com.oracle.graal.python.builtins.modules.MarshalModuleBuiltins;
64+
import com.oracle.graal.python.builtins.modules.ImpModuleBuiltins;
6465
import com.oracle.graal.python.builtins.modules.SignalModuleBuiltins;
6566
import com.oracle.graal.python.builtins.objects.PNone;
6667
import com.oracle.graal.python.builtins.objects.PNotImplemented;
@@ -160,7 +161,6 @@
160161
"text/x-python-\2\u0100-eval", "text/x-python-\2\u0100-compile", "text/x-python-\0\u0040-eval", "text/x-python-\0\u0040-compile", "text/x-python-\1\u0040-eval",
161162
"text/x-python-\1\u0040-compile", "text/x-python-\2\u0040-eval", "text/x-python-\2\u0040-compile", "text/x-python-\0\u0140-eval", "text/x-python-\0\u0140-compile",
162163
"text/x-python-\1\u0140-eval", "text/x-python-\1\u0140-compile", "text/x-python-\2\u0140-eval", "text/x-python-\2\u0140-compile"}, //
163-
byteMimeTypes = {PythonLanguage.MIME_TYPE_BYTECODE}, //
164164
defaultMimeType = PythonLanguage.MIME_TYPE, //
165165
dependentLanguages = {"nfi", "llvm"}, //
166166
interactive = true, internal = false, //
@@ -312,8 +312,6 @@ private static boolean mimeTypesComplete(ArrayList<String> mimeJavaStrings) {
312312
assert mimeTypesComplete(mimeJavaStrings) : "Expected all of {" + String.join(", ", mimeJavaStrings) + "} in the PythonLanguage characterMimeTypes";
313313
}
314314

315-
public static final String MIME_TYPE_BYTECODE = "application/x-python-bytecode";
316-
317315
public static final TruffleString[] T_DEFAULT_PYTHON_EXTENSIONS = new TruffleString[]{T_PY_EXTENSION, tsLiteral(".pyc")};
318316

319317
public static final TruffleLogger LOGGER = TruffleLogger.getLogger(ID, PythonLanguage.class);
@@ -557,11 +555,6 @@ protected CallTarget parse(ParsingRequest request) {
557555
if (!request.getArgumentNames().isEmpty()) {
558556
throw new IllegalStateException("parse with arguments is only allowed for " + MIME_TYPE + " mime type");
559557
}
560-
if (MIME_TYPE_BYTECODE.equals(source.getMimeType())) {
561-
byte[] bytes = source.getBytes().toByteArray();
562-
CodeUnit code = MarshalModuleBuiltins.deserializeCodeUnit(null, context, bytes);
563-
return callTargetFromBytecode(context, source, code);
564-
}
565558

566559
String mime = source.getMimeType();
567560
String prefix = mime.substring(0, MIME_PREFIX.length());
@@ -586,7 +579,7 @@ protected CallTarget parse(ParsingRequest request) {
586579
return parse(context, source, type, false, optimize, false, null, FutureFeature.fromFlags(flags));
587580
}
588581

589-
public RootCallTarget callTargetFromBytecode(PythonContext context, Source source, CodeUnit code) {
582+
public static RootCallTarget callTargetFromBytecode(PythonContext context, Source source, CodeUnit code) {
590583
boolean internal = shouldMarkSourceInternal(context);
591584
SourceBuilder builder = null;
592585
// The original file path should be passed as the name
@@ -616,7 +609,7 @@ public RootCallTarget callTargetFromBytecode(PythonContext context, Source sourc
616609
// TODO lazily load source in bytecode DSL interpreter too
617610
rootNode = ((BytecodeDSLCodeUnit) code).createRootNode(context, lazySource.getSource());
618611
} else {
619-
rootNode = PBytecodeRootNode.create(this, (BytecodeCodeUnit) code, lazySource, internal);
612+
rootNode = PBytecodeRootNode.create(context.getLanguage(), (BytecodeCodeUnit) code, lazySource, internal);
620613
}
621614

622615
return PythonUtils.getOrCreateCallTarget(rootNode);
@@ -999,10 +992,17 @@ protected void initializeMultipleContexts() {
999992
singleContext = false;
1000993
}
1001994

1002-
private final ConcurrentHashMap<TruffleString, CallTarget> cachedCode = new ConcurrentHashMap<>();
995+
public record CodeCacheKey(TruffleString filename, long codeHash) {
996+
}
997+
998+
private final ConcurrentHashMap<CodeCacheKey, CallTarget> cachedCode = new ConcurrentHashMap<>();
1003999

1004-
@TruffleBoundary
10051000
public CallTarget cacheCode(TruffleString filename, Supplier<CallTarget> createCode) {
1001+
return cacheCode(new CodeCacheKey(filename, 0), createCode);
1002+
}
1003+
1004+
@TruffleBoundary
1005+
public CallTarget cacheCode(CodeCacheKey filename, Supplier<CallTarget> createCode) {
10061006
if (!singleContext) {
10071007
return cachedCode.computeIfAbsent(filename, f -> {
10081008
LOGGER.log(Level.FINEST, () -> "Caching CallTarget for " + filename);
@@ -1013,6 +1013,19 @@ public CallTarget cacheCode(TruffleString filename, Supplier<CallTarget> createC
10131013
}
10141014
}
10151015

1016+
public long cacheKeyForBytecode(byte[] code, int length) {
1017+
if (singleContext) {
1018+
// No caching in single context
1019+
return 0;
1020+
}
1021+
byte[] hashBytes = ImpModuleBuiltins.SourceHashNode.hashSource(0, code, length);
1022+
return ARRAY_ACCESSOR.getLong(hashBytes, 0);
1023+
}
1024+
1025+
public long cacheKeyForBytecode(byte[] code) {
1026+
return cacheKeyForBytecode(code, code.length);
1027+
}
1028+
10161029
private static final Source LINEBREAK_REGEX_SOURCE = Source.newBuilder("regex", "/\r\n|[\n\u000B\u000C\r\u0085\u2028\u2029]/", "re_linebreak") //
10171030
.option("regex.Flavor", "Python") //
10181031
.option("regex.Encoding", "UTF-32") //

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/CodecsModuleBuiltins.java

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,7 @@ static void doCustom(VirtualFrame frame, TruffleDecoder decoder, TruffleString e
490490
@Cached PyCodecLookupErrorNode lookupErrorNode,
491491
@Cached PRaiseNode raiseNode) {
492492
try {
493-
Object errorHandler = lookupErrorNode.execute(inliningTarget, errorAction);
493+
Object errorHandler = lookupErrorNode.execute(frame, inliningTarget, errorAction);
494494
if (errorHandler == null) {
495495
throw raiseNode.raise(inliningTarget, LookupError, UNKNOWN_ERROR_HANDLER, errorAction);
496496
}
@@ -975,6 +975,7 @@ public abstract static class PyCodecLookupNode extends PNodeWithContext {
975975

976976
@Specialization
977977
static PTuple lookup(VirtualFrame frame, Node inliningTarget, TruffleString encoding,
978+
@Cached CodecsRegistry.EnsureRegistryInitializedNode ensureRegistryInitializedNode,
978979
@Cached(inline = false) CallUnaryMethodNode callNode,
979980
@Cached PyObjectTypeCheck typeCheck,
980981
@Cached(inline = false) PyObjectSizeNode sizeNode,
@@ -986,7 +987,7 @@ static PTuple lookup(VirtualFrame frame, Node inliningTarget, TruffleString enco
986987
@Cached PRaiseNode raiseNode) {
987988
TruffleString normalizedEncoding = normalizeEncodingNameNode.execute(inliningTarget, encoding);
988989
PythonContext context = PythonContext.get(inliningTarget);
989-
ensureRegistryInitialized(context);
990+
ensureRegistryInitializedNode.execute(frame, inliningTarget, context);
990991
PTuple result = getSearchPath(context, normalizedEncoding);
991992
if (hasSearchPathProfile.profile(inliningTarget, result != null)) {
992993
return result;
@@ -1040,21 +1041,18 @@ private static boolean isTupleInstanceCheck(VirtualFrame frame, Node inliningTar
10401041
return typeCheck.execute(inliningTarget, result, PythonBuiltinClassType.PTuple) && sizeNode.execute(frame, inliningTarget, result) == len;
10411042
}
10421043

1043-
private static void ensureRegistryInitialized(PythonContext context) {
1044-
CodecsRegistry.ensureRegistryInitialized(context);
1045-
}
1046-
10471044
@Builtin(name = "register", minNumOfPositionalArgs = 1)
10481045
@GenerateNodeFactory
10491046
abstract static class RegisterNode extends PythonUnaryBuiltinNode {
10501047
@Specialization
1051-
static Object lookup(Object searchFunction,
1048+
static Object lookup(VirtualFrame frame, Object searchFunction,
10521049
@Bind Node inliningTarget,
1050+
@Cached CodecsRegistry.EnsureRegistryInitializedNode ensureRegistryInitializedNode,
10531051
@Cached PyCallableCheckNode callableCheckNode,
10541052
@Cached PRaiseNode raiseNode) {
10551053
if (callableCheckNode.execute(inliningTarget, searchFunction)) {
10561054
PythonContext context = PythonContext.get(inliningTarget);
1057-
ensureRegistryInitialized(context);
1055+
ensureRegistryInitializedNode.execute(frame, inliningTarget, context);
10581056
add(context, searchFunction);
10591057
return PNone.NONE;
10601058
} else {
@@ -1116,10 +1114,10 @@ protected ArgumentClinicProvider getArgumentClinic() {
11161114
}
11171115

11181116
@Specialization
1119-
Object register(TruffleString name, Object handler,
1117+
Object register(VirtualFrame frame, TruffleString name, Object handler,
11201118
@Bind Node inliningTarget,
11211119
@Cached PyCodecRegisterErrorNode registerErrorNode) {
1122-
registerErrorNode.execute(inliningTarget, name, handler);
1120+
registerErrorNode.execute(frame, inliningTarget, name, handler);
11231121
return PNone.NONE;
11241122
}
11251123
}
@@ -1135,10 +1133,10 @@ protected ArgumentClinicProvider getArgumentClinic() {
11351133
}
11361134

11371135
@Specialization
1138-
Object lookup(TruffleString name,
1136+
Object lookup(VirtualFrame frame, TruffleString name,
11391137
@Bind Node inliningTarget,
11401138
@Cached PyCodecLookupErrorNode errorNode) {
1141-
return errorNode.execute(inliningTarget, name);
1139+
return errorNode.execute(frame, inliningTarget, name);
11421140
}
11431141
}
11441142

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/ImpModuleBuiltins.java

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
import static com.oracle.graal.python.nodes.StringLiterals.T_EXT_SO;
5454
import static com.oracle.graal.python.nodes.StringLiterals.T_NAME;
5555
import static com.oracle.graal.python.runtime.exception.PythonErrorType.NotImplementedError;
56+
import static com.oracle.graal.python.util.PythonUtils.ARRAY_ACCESSOR;
5657
import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING;
5758
import static com.oracle.graal.python.util.PythonUtils.toTruffleStringUncached;
5859
import static com.oracle.graal.python.util.PythonUtils.tsLiteral;
@@ -61,6 +62,9 @@
6162
import java.util.List;
6263
import java.util.concurrent.locks.ReentrantLock;
6364

65+
import org.bouncycastle.crypto.macs.SipHash;
66+
import org.bouncycastle.crypto.params.KeyParameter;
67+
6468
import com.oracle.graal.python.PythonLanguage;
6569
import com.oracle.graal.python.annotations.ArgumentClinic;
6670
import com.oracle.graal.python.annotations.ArgumentClinic.ClinicConversion;
@@ -72,7 +76,6 @@
7276
import com.oracle.graal.python.builtins.modules.MarshalModuleBuiltins.Marshal.MarshalError;
7377
import com.oracle.graal.python.builtins.objects.PNone;
7478
import com.oracle.graal.python.builtins.objects.buffer.PythonBufferAccessLibrary;
75-
import com.oracle.graal.python.builtins.objects.bytes.BytesNodes;
7679
import com.oracle.graal.python.builtins.objects.bytes.PBytes;
7780
import com.oracle.graal.python.builtins.objects.cext.capi.CApiContext;
7881
import com.oracle.graal.python.builtins.objects.cext.capi.CApiContext.ModuleSpec;
@@ -125,6 +128,7 @@
125128
import com.oracle.truffle.api.dsl.Specialization;
126129
import com.oracle.truffle.api.frame.VirtualFrame;
127130
import com.oracle.truffle.api.interop.InteropLibrary;
131+
import com.oracle.truffle.api.library.CachedLibrary;
128132
import com.oracle.truffle.api.memory.ByteArraySupport;
129133
import com.oracle.truffle.api.nodes.Node;
130134
import com.oracle.truffle.api.source.Source;
@@ -480,7 +484,7 @@ static Object run(TruffleString name, Object dataObj,
480484
Object code = null;
481485

482486
try {
483-
code = MarshalModuleBuiltins.Marshal.load(context, bytes, size);
487+
code = MarshalModuleBuiltins.Marshal.load(context, bytes, size, 0);
484488
} catch (MarshalError | NumberFormatException e) {
485489
raiseFrozenError(inliningTarget, raiseNode, FROZEN_INVALID, name);
486490
}
@@ -646,9 +650,11 @@ public static PythonModule importFrozenModuleObject(Node inliningTarget, PConstr
646650
}
647651

648652
private static RootCallTarget createCallTarget(PythonContext context, FrozenInfo info) {
649-
String name = PythonLanguage.FROZEN_FILENAME_PREFIX + info.name + PythonLanguage.FROZEN_FILENAME_SUFFIX;
650-
Source source = Source.newBuilder("python", "", name).content(Source.CONTENT_NONE).build();
651-
return context.getLanguage().callTargetFromBytecode(context, source, info.code);
653+
return (RootCallTarget) context.getLanguage().cacheCode(new PythonLanguage.CodeCacheKey(info.origName, System.identityHashCode(info.code)), () -> {
654+
String name = PythonLanguage.FROZEN_FILENAME_PREFIX + info.name + PythonLanguage.FROZEN_FILENAME_SUFFIX;
655+
Source source = Source.newBuilder("python", "", name).content(Source.CONTENT_NONE).build();
656+
return PythonLanguage.callTargetFromBytecode(context, source, info.code);
657+
});
652658
}
653659

654660
/*
@@ -702,22 +708,28 @@ private static void raiseFrozenError(Node inliningTarget, PConstructAndRaiseNode
702708
@ArgumentClinic(name = "source", conversion = ArgumentClinic.ClinicConversion.ReadableBuffer)
703709
@GenerateNodeFactory
704710
public abstract static class SourceHashNode extends PythonBinaryClinicBuiltinNode {
705-
@TruffleBoundary
706-
@Specialization
711+
@Specialization(limit = "2")
707712
static PBytes run(long magicNumber, Object sourceBuffer,
713+
@CachedLibrary("sourceBuffer") PythonBufferAccessLibrary bufferLib,
708714
@Bind PythonLanguage language) {
709-
long sourceHash = BytesNodes.HashBufferNode.executeUncached(sourceBuffer);
710-
return PFactory.createBytes(language, computeHash(magicNumber, sourceHash));
715+
try {
716+
byte[] hash = hashSource(magicNumber, bufferLib.getInternalOrCopiedByteArray(sourceBuffer), bufferLib.getBufferLength(sourceBuffer));
717+
return PFactory.createBytes(language, hash);
718+
} finally {
719+
bufferLib.release(sourceBuffer);
720+
}
711721
}
712722

713723
@TruffleBoundary
714-
private static byte[] computeHash(long magicNumber, long sourceHash) {
715-
byte[] hash = new byte[Long.BYTES];
716-
long hashCode = magicNumber ^ sourceHash;
717-
for (int i = 0; i < hash.length; i++) {
718-
hash[i] = (byte) (hashCode << (8 * i));
719-
}
720-
return hash;
724+
public static byte[] hashSource(long magicNumber, byte[] bytes, int length) {
725+
SipHash sipHash = new SipHash(1, 3);
726+
byte[] key = new byte[16];
727+
ARRAY_ACCESSOR.putLong(key, 0, magicNumber);
728+
sipHash.init(new KeyParameter(key));
729+
sipHash.update(bytes, 0, length);
730+
byte[] out = new byte[sipHash.getMacSize()];
731+
sipHash.doFinal(out, 0);
732+
return out;
721733
}
722734

723735
@Override

0 commit comments

Comments
 (0)