From ec1dd5d34ef07d3fc285d48d319f25a05def2ce0 Mon Sep 17 00:00:00 2001 From: Florian Angerer Date: Thu, 30 Apr 2026 14:23:41 +0200 Subject: [PATCH 1/8] Optimize PyUnicode_New native representation --- .../com.oracle.graal.python.cext/src/capi.h | 15 ++ .../src/unicodeobject.c | 89 ++++++++--- .../modules/cext/PythonCextSlotBuiltins.java | 146 ------------------ .../cext/PythonCextUnicodeBuiltins.java | 60 ++++--- .../capi/transitions/CApiTransitions.java | 102 ++++++++++-- .../capi/transitions/ToNativeTypeNode.java | 10 ++ .../objects/cext/structs/CFields.java | 7 + .../objects/cext/structs/CStructs.java | 1 + .../builtins/objects/str/StringNodes.java | 50 +++++- 9 files changed, 282 insertions(+), 198 deletions(-) diff --git a/graalpython/com.oracle.graal.python.cext/src/capi.h b/graalpython/com.oracle.graal.python.cext/src/capi.h index 124141c065..5cceb32794 100644 --- a/graalpython/com.oracle.graal.python.cext/src/capi.h +++ b/graalpython/com.oracle.graal.python.cext/src/capi.h @@ -157,6 +157,21 @@ typedef struct { double ob_fval; } GraalPyFloatObject; +#define GRAALPY_UNICODE_INTERN_STATE_UNDETERMINED 0 +#define GRAALPY_UNICODE_INTERN_STATE_INTERNED 1 +#define GRAALPY_UNICODE_INTERN_STATE_NOT_INTERNED 2 + +typedef struct { + GraalPyObject ob_base; + Py_ssize_t length; + Py_ssize_t byte_length; + Py_hash_t hash; + int32_t kind; + int32_t is_ascii; + int32_t interned; + void *data; +} GraalPyUnicodeObject; + typedef struct gc_generation GCGeneration; // {{start CAPI_BUILTINS}} diff --git a/graalpython/com.oracle.graal.python.cext/src/unicodeobject.c b/graalpython/com.oracle.graal.python.cext/src/unicodeobject.c index 0ee3cb46a5..496b07de40 100644 --- a/graalpython/com.oracle.graal.python.cext/src/unicodeobject.c +++ b/graalpython/com.oracle.graal.python.cext/src/unicodeobject.c @@ -127,6 +127,33 @@ extern "C" { # define _PyUnicode_CHECK(op) PyUnicode_Check(op) #endif +static inline Py_ssize_t * +GraalPyPrivate_Unicode_LengthPtr(PyObject *op) +{ + if (points_to_py_handle_space(op)) { + return &((GraalPyUnicodeObject *) pointer_to_stub(op))->length; + } + return &_PyASCIIObject_CAST(op)->length; +} + +static inline Py_hash_t * +GraalPyPrivate_Unicode_HashPtr(PyObject *op) +{ + if (points_to_py_handle_space(op)) { + return &((GraalPyUnicodeObject *) pointer_to_stub(op))->hash; + } + return &_PyASCIIObject_CAST(op)->hash; +} + +static inline void ** +GraalPyPrivate_Unicode_DataPtr(PyObject *op) +{ + if (points_to_py_handle_space(op)) { + return &((GraalPyUnicodeObject *) pointer_to_stub(op))->data; + } + return &_PyUnicodeObject_CAST(op)->data.any; +} + #define _PyUnicode_UTF8(op) \ (_PyCompactUnicodeObject_CAST(op)->utf8) #define PyUnicode_UTF8(op) \ @@ -143,19 +170,19 @@ extern "C" { _PyUnicode_UTF8_LENGTH(op)) #define _PyUnicode_LENGTH(op) \ - (_PyASCIIObject_CAST(op)->length) + (*GraalPyPrivate_Unicode_LengthPtr(_PyObject_CAST(op))) #define _PyUnicode_STATE(op) \ (_PyASCIIObject_CAST(op)->state) #define _PyUnicode_HASH(op) \ - (_PyASCIIObject_CAST(op)->hash) + (*GraalPyPrivate_Unicode_HashPtr(_PyObject_CAST(op))) #define _PyUnicode_KIND(op) \ (assert(_PyUnicode_CHECK(op)), \ - _PyASCIIObject_CAST(op)->state.kind) + PyUnicode_KIND(op)) #define _PyUnicode_GET_LENGTH(op) \ (assert(_PyUnicode_CHECK(op)), \ - _PyASCIIObject_CAST(op)->length) + PyUnicode_GET_LENGTH(op)) #define _PyUnicode_DATA_ANY(op) \ - (_PyUnicodeObject_CAST(op)->data.any) + (*GraalPyPrivate_Unicode_DataPtr(_PyObject_CAST(op))) #define _PyUnicode_SHARE_UTF8(op) \ (assert(_PyUnicode_CHECK(op)), \ @@ -1358,22 +1385,24 @@ PyObject * PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) { // GraalPy change: different implementation - /* add one to size for the null character */ + if (size < 0) { + PyErr_SetString(PyExc_SystemError, + "Negative size passed to PyUnicode_New"); + return NULL; + } if (maxchar < 128) { - /* We intentionally use 'size' (which is one element less than the allocated array) - * because interop users should not see the null character. */ - return GraalPyPrivate_Unicode_New((Py_UCS1 *) calloc(size + 1, PyUnicode_1BYTE_KIND), size, PyUnicode_1BYTE_KIND, 1); + return GraalPyPrivate_Unicode_New(size, PyUnicode_1BYTE_KIND, 1); } else if (maxchar < 256) { - return GraalPyPrivate_Unicode_New((Py_UCS1 *) calloc(size + 1, PyUnicode_1BYTE_KIND), size, PyUnicode_1BYTE_KIND, 0); + return GraalPyPrivate_Unicode_New(size, PyUnicode_1BYTE_KIND, 0); } else if (maxchar < 65536) { - return GraalPyPrivate_Unicode_New((Py_UCS2 *) calloc(size + 1, PyUnicode_2BYTE_KIND), size, PyUnicode_2BYTE_KIND, 0); + return GraalPyPrivate_Unicode_New(size, PyUnicode_2BYTE_KIND, 0); } else { if (maxchar > MAX_UNICODE) { PyErr_SetString(PyExc_SystemError, "invalid maximum character passed to PyUnicode_New"); return NULL; } - return GraalPyPrivate_Unicode_New((Py_UCS4 *) calloc(size + 1, PyUnicode_4BYTE_KIND), size, PyUnicode_4BYTE_KIND, 0); + return GraalPyPrivate_Unicode_New(size, PyUnicode_4BYTE_KIND, 0); } /* should never be reached */ return NULL; @@ -14194,7 +14223,7 @@ GraalPyPrivate_Unicode_SubtypeNew(PyTypeObject *type, PyObject *unicode) _PyUnicode_STATE(self).kind = kind; _PyUnicode_STATE(self).compact = 0; // GraalPy change - _PyUnicode_STATE(self).ascii = GET_SLOT_SPECIAL(unicode, PyASCIIObject, state_ascii, state.ascii); + _PyUnicode_STATE(self).ascii = PyUnicode_IS_ASCII(unicode); _PyUnicode_STATE(self).statically_allocated = 0; _PyUnicode_UTF8_LENGTH(self) = 0; _PyUnicode_UTF8(self) = NULL; @@ -15243,27 +15272,49 @@ PyInit__string(void) // GraalPy additions unsigned int GraalPyUnicode_CHECK_INTERNED(PyObject *op) { - return GET_SLOT_SPECIAL(op, PyASCIIObject, state_interned, state.interned); + if (points_to_py_handle_space(op)) { + GraalPyUnicodeObject *unicode = (GraalPyUnicodeObject *) pointer_to_stub(op); + if (unicode->interned == GRAALPY_UNICODE_INTERN_STATE_UNDETERMINED) { + unicode->interned = GraalPyPrivate_Unicode_CheckInterned(op) ? GRAALPY_UNICODE_INTERN_STATE_INTERNED : GRAALPY_UNICODE_INTERN_STATE_NOT_INTERNED; + } + return unicode->interned == GRAALPY_UNICODE_INTERN_STATE_INTERNED ? SSTATE_INTERNED_MORTAL : SSTATE_NOT_INTERNED; + } + return _PyASCIIObject_CAST(op)->state.interned; } Py_ssize_t GraalPyUnicode_GET_LENGTH(PyObject* op) { - return GraalPyPrivate_GET_PyASCIIObject_length(op); + if (points_to_py_handle_space(op)) { + return ((GraalPyUnicodeObject *) pointer_to_stub(op))->length; + } + return _PyASCIIObject_CAST(op)->length; } unsigned int GraalPyUnicode_IS_ASCII(PyObject* op) { - return GET_SLOT_SPECIAL(op, PyASCIIObject, state_ascii, state.ascii); + if (points_to_py_handle_space(op)) { + return ((GraalPyUnicodeObject *) pointer_to_stub(op))->is_ascii; + } + return _PyASCIIObject_CAST(op)->state.ascii; } unsigned int GraalPyUnicode_IS_COMPACT(PyObject* op) { - return GET_SLOT_SPECIAL(op, PyASCIIObject, state_compact, state.compact); + if (points_to_py_handle_space(op)) { + return 0; + } + return _PyASCIIObject_CAST(op)->state.compact; } int GraalPyUnicode_KIND(PyObject* op) { - return GET_SLOT_SPECIAL(op, PyASCIIObject, state_kind, state.kind); + if (points_to_py_handle_space(op)) { + return ((GraalPyUnicodeObject *) pointer_to_stub(op))->kind; + } + return _PyASCIIObject_CAST(op)->state.kind; } void* GraalPyUnicode_NONCOMPACT_DATA(PyObject* op) { - return GET_SLOT_SPECIAL(op, PyUnicodeObject, data, data.any); + if (points_to_py_handle_space(op)) { + return ((GraalPyUnicodeObject *) pointer_to_stub(op))->data; + } + return _PyUnicodeObject_CAST(op)->data.any; } #ifdef __cplusplus diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextSlotBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextSlotBuiltins.java index 2900e991fb..76de75839e 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextSlotBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextSlotBuiltins.java @@ -44,7 +44,6 @@ import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.ConstCharPtrAsTruffleString; import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.Int; import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.Pointer; -import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.PyASCIIObject; import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.PyByteArrayObject; import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.PyCFunctionObject; import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.PyCMethodObject; @@ -66,24 +65,20 @@ import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.PySliceObject; import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.PyTupleObject; import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.PyTypeObjectBorrowed; -import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.PyUnicodeObject; import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.PyVarObject; import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.Py_ssize_t; import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.UINTPTR_T; -import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.UNSIGNED_INT; import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.Void; import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.getter; import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.setter; import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.vectorcallfunc; import static com.oracle.graal.python.runtime.nativeaccess.NativeMemory.NULLPTR; -import static com.oracle.graal.python.runtime.nativeaccess.NativeMemory.calloc; import static com.oracle.graal.python.nodes.HiddenAttr.METHOD_DEF_PTR; import static com.oracle.graal.python.nodes.HiddenAttr.PROMOTED_START; import static com.oracle.graal.python.nodes.HiddenAttr.PROMOTED_STEP; import static com.oracle.graal.python.nodes.HiddenAttr.PROMOTED_STOP; import static com.oracle.graal.python.nodes.SpecialAttributeNames.T___MODULE__; import static com.oracle.graal.python.runtime.PythonContext.NATIVE_NULL; -import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING; import com.oracle.graal.python.builtins.PythonBuiltinClassType; import com.oracle.graal.python.builtins.modules.cext.PythonCextBuiltins.CApiBinaryBuiltinNode; @@ -99,13 +94,11 @@ import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions; import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.HandlePointerConverter; import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.NativeToPythonInternalNode; -import com.oracle.graal.python.builtins.objects.cext.structs.CStructAccess; import com.oracle.graal.python.builtins.objects.common.HashingStorageNodes.HashingStorageLen; import com.oracle.graal.python.builtins.objects.frame.FrameBuiltins; import com.oracle.graal.python.builtins.objects.frame.PFrame; import com.oracle.graal.python.builtins.objects.function.PBuiltinFunction; import com.oracle.graal.python.builtins.objects.getsetdescriptor.GetSetDescriptor; -import com.oracle.graal.python.builtins.objects.ints.PInt; import com.oracle.graal.python.builtins.objects.list.PList; import com.oracle.graal.python.builtins.objects.method.PBuiltinMethod; import com.oracle.graal.python.builtins.objects.method.PDecoratedMethod; @@ -114,10 +107,6 @@ import com.oracle.graal.python.builtins.objects.object.PythonBuiltinObject; import com.oracle.graal.python.builtins.objects.set.PBaseSet; import com.oracle.graal.python.builtins.objects.slice.PSlice; -import com.oracle.graal.python.builtins.objects.str.NativeStringData; -import com.oracle.graal.python.builtins.objects.str.PString; -import com.oracle.graal.python.builtins.objects.str.StringNodes; -import com.oracle.graal.python.builtins.objects.str.StringNodes.StringLenNode; import com.oracle.graal.python.builtins.objects.tuple.PTuple; import com.oracle.graal.python.builtins.objects.type.TypeNodes; import com.oracle.graal.python.lib.PyObjectLookupAttr; @@ -136,8 +125,6 @@ import com.oracle.truffle.api.dsl.GenerateInline; import com.oracle.truffle.api.dsl.Specialization; import com.oracle.truffle.api.nodes.Node; -import com.oracle.truffle.api.profiles.InlinedConditionProfile; -import com.oracle.truffle.api.strings.TruffleString; public final class PythonCextSlotBuiltins { @@ -155,91 +142,6 @@ public static long GraalPyPrivate_Get_PyTupleObject_ob_item(long ptr) { return PySequenceArrayWrapper.ensureNativeSequence(object); } - @CApiBuiltin(ret = Py_ssize_t, args = {PyASCIIObject}, call = Ignored) - abstract static class GraalPyPrivate_Get_PyASCIIObject_length extends CApiUnaryBuiltinNode { - - @Specialization - static long get(Object object, - @Cached StringLenNode stringLenNode) { - return stringLenNode.execute(object); - } - } - - @CApiBuiltin(ret = UNSIGNED_INT, args = {PyASCIIObject}, call = Ignored) - abstract static class GraalPyPrivate_Get_PyASCIIObject_state_ascii extends CApiUnaryBuiltinNode { - - @Specialization - int get(PString object, - @Bind Node inliningTarget, - @Cached InlinedConditionProfile storageProfile, - @Cached HiddenAttr.ReadNode readAttrNode, - @Cached TruffleString.GetCodeRangeNode getCodeRangeNode) { - // important: avoid materialization of native sequences - NativeStringData nativeData = object.getNativeStringData(inliningTarget, readAttrNode); - if (storageProfile.profile(inliningTarget, nativeData != null)) { - return nativeData.isAscii() ? 1 : 0; - } - - TruffleString string = object.getMaterialized(); - return PInt.intValue(getCodeRangeNode.execute(string, TS_ENCODING) == TruffleString.CodeRange.ASCII); - } - } - - @CApiBuiltin(ret = UNSIGNED_INT, args = {PyASCIIObject}, call = Ignored) - abstract static class GraalPyPrivate_Get_PyASCIIObject_state_compact extends CApiUnaryBuiltinNode { - - @Specialization - static int get(@SuppressWarnings("unused") Object object) { - return 0; - } - } - - @CApiBuiltin(ret = UNSIGNED_INT, args = {PyASCIIObject}, call = Ignored) - abstract static class GraalPyPrivate_Get_PyASCIIObject_state_interned extends CApiUnaryBuiltinNode { - - @Specialization - static int get(PString object, - @Bind Node inliningTarget, - @Cached StringNodes.IsInternedStringNode isInternedStringNode) { - return isInternedStringNode.execute(inliningTarget, object) ? 1 : 0; - } - } - - @CApiBuiltin(ret = UNSIGNED_INT, args = {PyASCIIObject}, call = Ignored) - abstract static class GraalPyPrivate_Get_PyASCIIObject_state_kind extends CApiUnaryBuiltinNode { - - @Specialization - static int get(PString object, - @Bind Node inliningTarget, - @Cached InlinedConditionProfile storageProfile, - @Cached HiddenAttr.ReadNode readAttrNode, - @Cached TruffleString.GetCodeRangeNode getCodeRangeNode) { - // important: avoid materialization of native sequences - NativeStringData nativeData = object.getNativeStringData(inliningTarget, readAttrNode); - if (storageProfile.profile(inliningTarget, nativeData != null)) { - return nativeData.getCharSize(); - } - TruffleString string = object.getMaterialized(); - TruffleString.CodeRange range = getCodeRangeNode.execute(string, TS_ENCODING); - if (range.isSubsetOf(TruffleString.CodeRange.LATIN_1)) { - return 1; - } else if (range.isSubsetOf(TruffleString.CodeRange.BMP)) { - return 2; - } else { - return 4; - } - } - } - - @CApiBuiltin(ret = UNSIGNED_INT, args = {PyASCIIObject}, call = Ignored) - abstract static class GraalPyPrivate_Get_PyASCIIObject_state_ready extends CApiUnaryBuiltinNode { - - @Specialization - static int get(@SuppressWarnings("unused") Object object) { - return 1; - } - } - @CApiBuiltin(ret = PyTypeObjectBorrowed, args = {PyCMethodObject}, call = Ignored) abstract static class GraalPyPrivate_Get_PyCMethodObject_mm_class extends CApiUnaryBuiltinNode { @Specialization @@ -662,54 +564,6 @@ static Object doStop(PSlice object, } } - @CApiBuiltin(ret = Pointer, args = {PyUnicodeObject}, call = Ignored) - abstract static class GraalPyPrivate_Get_PyUnicodeObject_data extends CApiUnaryBuiltinNode { - - @Specialization - static long get(PString object, - @Bind Node inliningTarget, - @Cached TruffleString.GetCodeRangeNode getCodeRangeNode, - @Cached TruffleString.SwitchEncodingNode switchEncodingNode, - @Cached CStructAccess.WriteTruffleStringNode writeTruffleStringNode, - @Cached HiddenAttr.ReadNode readAttrNode, - @Cached HiddenAttr.WriteNode writeAttrNode) { - NativeStringData nativeData = object.getNativeStringData(inliningTarget, readAttrNode); - if (nativeData != null) { - // in this case, we can just return the pointer - return nativeData.getPtr(); - } - TruffleString string = object.getMaterialized(); - TruffleString.CodeRange range = getCodeRangeNode.execute(string, TS_ENCODING); - TruffleString.Encoding encoding; - int charSize; - boolean isAscii = false; - if (range == TruffleString.CodeRange.ASCII) { - isAscii = true; - charSize = 1; - encoding = TruffleString.Encoding.US_ASCII; - } else if (range.isSubsetOf(TruffleString.CodeRange.LATIN_1)) { - charSize = 1; - encoding = TruffleString.Encoding.ISO_8859_1; - } else if (range.isSubsetOf(TruffleString.CodeRange.BMP)) { - charSize = 2; - encoding = TruffleString.Encoding.UTF_16; - } else { - charSize = 4; - encoding = TruffleString.Encoding.UTF_32; - } - string = switchEncodingNode.execute(string, encoding); - int byteLength = string.byteLength(encoding); - long ptr = calloc(byteLength + /* null terminator */ charSize); - writeTruffleStringNode.write(ptr, string, encoding); - /* - * Set native data, so we can just return the pointer the next time. - */ - NativeStringData data = NativeStringData.create(charSize, isAscii, ptr, byteLength); - object.setNativeStringData(inliningTarget, writeAttrNode, data); - return ptr; - } - } - @CApiBuiltin(ret = Py_ssize_t, args = {PyVarObject}, call = Ignored) abstract static class GraalPyPrivate_Get_PyVarObject_ob_size extends CApiUnaryBuiltinNode { diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextUnicodeBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextUnicodeBuiltins.java index 4aeaa8643a..bb5630a5dd 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextUnicodeBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextUnicodeBuiltins.java @@ -55,6 +55,8 @@ import static com.oracle.graal.python.builtins.modules.CodecsModuleBuiltins.T_UTF_32_LE; import static com.oracle.graal.python.builtins.modules.cext.PythonCextBuiltins.CApiCallPath.Direct; import static com.oracle.graal.python.builtins.modules.cext.PythonCextBuiltins.CApiCallPath.Ignored; +import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.GRAALPY_UNICODE_INTERN_STATE_INTERNED; +import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.GRAALPY_UNICODE_INTERN_STATE_NOT_INTERNED; import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.CONST_WCHAR_PTR; import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.ConstCharPtr; import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.ConstCharPtrAsTruffleString; @@ -114,6 +116,10 @@ import com.oracle.graal.python.builtins.objects.cext.capi.PySequenceArrayWrapper; import com.oracle.graal.python.builtins.objects.cext.capi.UnicodeObjectNodes.UnicodeAsWideCharNode; import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTiming; +import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions; +import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.AllocateNativeObjectStubNode; +import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.FirstToNativeNode; +import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.HandlePointerConverter; import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.NativeToPythonInternalNode; import com.oracle.graal.python.builtins.objects.cext.common.CExtCommonNodes.EncodeNativeStringNode; import com.oracle.graal.python.builtins.objects.cext.common.CExtCommonNodes.ReadUnicodeArrayNode; @@ -122,7 +128,6 @@ import com.oracle.graal.python.builtins.objects.cext.structs.CStructs; import com.oracle.graal.python.builtins.objects.ints.PInt; import com.oracle.graal.python.builtins.objects.memoryview.PMemoryView; -import com.oracle.graal.python.builtins.objects.str.NativeStringData; import com.oracle.graal.python.builtins.objects.str.PString; import com.oracle.graal.python.builtins.objects.str.StringBuiltins; import com.oracle.graal.python.builtins.objects.str.StringBuiltins.EncodeNode; @@ -159,6 +164,7 @@ import com.oracle.graal.python.runtime.object.PFactory; import com.oracle.graal.python.util.ConcurrentWeakSet; import com.oracle.graal.python.util.OverflowException; +import com.oracle.graal.python.util.PythonUtils; import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.dsl.Bind; @@ -324,6 +330,10 @@ static Object withPString(PString str, } str.intern(); + if (str.isNative()) { + long ptr = HandlePointerConverter.pointerToStub(str.getNativePointer()); + CStructAccess.writeIntField(ptr, CFields.GraalPyUnicodeObject__interned, GRAALPY_UNICODE_INTERN_STATE_INTERNED); + } /* * TODO this is not integrated with str.intern, pointer comparisons of two str.intern'ed * string may still yield failse @@ -342,6 +352,19 @@ Object nil(@SuppressWarnings("unused") Object obj) { } } + @CApiBuiltin(ret = Int, args = {PyObject}, call = Ignored) + abstract static class GraalPyPrivate_Unicode_CheckInterned extends CApiUnaryBuiltinNode { + @Specialization + static int withPString(PString str) { + return PythonUtils.isInterned(str.getValueUncached()) ? 1 : 0; + } + + @Fallback + static int nil(@SuppressWarnings("unused") Object obj) { + return 0; + } + } + @CApiBuiltin(ret = PyObjectTransfer, args = {PyObject, Int, Int, Int}, call = Direct) @ImportStatic(PythonCextUnicodeBuiltins.class) abstract static class _PyUnicode_FormatLong extends CApiQuaternaryBuiltinNode { @@ -826,23 +849,24 @@ static int doGeneric(Object type, long lindex, } } - @CApiBuiltin(ret = PyObjectTransfer, args = {Pointer, Py_ssize_t, Int, Int}, call = Ignored) - abstract static class GraalPyPrivate_Unicode_New extends CApiQuaternaryBuiltinNode { - @Specialization - static Object doGeneric(long ptr, long elements, int charSize, int isAscii, - @Bind Node inliningTarget, - @Bind PythonLanguage language, - @Cached HiddenAttr.WriteNode writeNode, - @Cached PRaiseNode raiseNode) { - long size = elements * charSize; - if (!PInt.isIntRange(size)) { - throw raiseNode.raise(inliningTarget, MemoryError); - } - PString s = PFactory.createString(language, null); - NativeStringData data = NativeStringData.create(charSize, isAscii != 0, ptr, (int) size); - s.setNativeStringData(inliningTarget, writeNode, data); - return s; - } + @CApiBuiltin(ret = PyObject, args = {Py_ssize_t, Int, Int}, call = Ignored) + static long GraalPyPrivate_Unicode_New(long nChars, int charSize, int isAscii) { + if (nChars < 0 || nChars > Long.MAX_VALUE / charSize) { + throw PRaiseNode.raiseStatic(null, MemoryError); + } + long size = nChars * charSize; + if (!PInt.isIntRange(size)) { + throw PRaiseNode.raiseStatic(null, MemoryError); + } + // the extra 'charSize' accounts for the NUL char + long extraSize = size + charSize; + PString s = PFactory.createString(PythonLanguage.get(null), null); + long initialRefCount = FirstToNativeNode.getInitialRefcnt(true, false); + long taggedPointer = AllocateNativeObjectStubNode.executeUncached(s, PythonBuiltinClassType.PString, CStructs.GraalPyUnicodeObject, initialRefCount, false, extraSize); + s.setNativePointer(taggedPointer); + long realPointer = HandlePointerConverter.pointerToStub(taggedPointer); + CApiTransitions.initializeGraalPyUnicodeObject(realPointer, nChars, size, charSize, isAscii != 0, GRAALPY_UNICODE_INTERN_STATE_NOT_INTERNED); + return taggedPointer; } @CApiBuiltin(ret = PyObjectTransfer, args = {Pointer, Py_ssize_t, Int}, call = Ignored) diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/transitions/CApiTransitions.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/transitions/CApiTransitions.java index 871903525f..484b222a92 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/transitions/CApiTransitions.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/transitions/CApiTransitions.java @@ -54,7 +54,6 @@ import static com.oracle.graal.python.builtins.objects.object.PythonObject.IMMORTAL_REFCNT; import static com.oracle.graal.python.builtins.objects.object.PythonObject.MANAGED_REFCNT; import static com.oracle.graal.python.runtime.nativeaccess.NativeMemory.NULLPTR; -import static com.oracle.graal.python.runtime.nativeaccess.NativeMemory.calloc; import static com.oracle.graal.python.runtime.nativeaccess.NativeMemory.free; import static com.oracle.graal.python.runtime.nativeaccess.NativeMemory.mallocPtrArray; import static com.oracle.graal.python.runtime.nativeaccess.NativeMemory.writePtrArrayElements; @@ -113,6 +112,8 @@ import com.oracle.graal.python.builtins.objects.ints.PInt; import com.oracle.graal.python.builtins.objects.memoryview.PMemoryView; import com.oracle.graal.python.builtins.objects.object.PythonObject; +import com.oracle.graal.python.builtins.objects.str.PString; +import com.oracle.graal.python.builtins.objects.str.StringNodes.StringMaterializeNode; import com.oracle.graal.python.builtins.objects.tuple.PTuple; import com.oracle.graal.python.builtins.objects.type.PythonAbstractClass; import com.oracle.graal.python.builtins.objects.type.PythonBuiltinClass; @@ -157,6 +158,7 @@ import com.oracle.truffle.api.profiles.InlinedBranchProfile; import com.oracle.truffle.api.profiles.InlinedConditionProfile; import com.oracle.truffle.api.profiles.InlinedExactClassProfile; +import com.oracle.truffle.api.strings.TruffleString; import sun.misc.Unsafe; @@ -171,6 +173,10 @@ public abstract class CApiTransitions { private static final TruffleLogger LOGGER = CApiContext.getLogger(CApiTransitions.class); + public static final int GRAALPY_UNICODE_INTERN_STATE_UNDETERMINED = 0; + public static final int GRAALPY_UNICODE_INTERN_STATE_INTERNED = 1; + public static final int GRAALPY_UNICODE_INTERN_STATE_NOT_INTERNED = 2; + enum PollingState { /** startup barrier not finished yet, polling must not run */ RQ_UNINITIALIZED, @@ -445,6 +451,22 @@ public static void registerNativeSequenceStorage(NativeSequenceStorage storage) handleContext.nativeStorageReferences.add(ref); } + public static long initializeGraalPyUnicodeObject(long rawPointer, long elements, long byteLength, int charSize, boolean isAscii, int interned) { + assert charSize == 1 || charSize == 2 || charSize == 4; + assert byteLength == elements * charSize; + assert interned == GRAALPY_UNICODE_INTERN_STATE_UNDETERMINED || interned == GRAALPY_UNICODE_INTERN_STATE_INTERNED || interned == GRAALPY_UNICODE_INTERN_STATE_NOT_INTERNED; + long data = rawPointer + CStructs.GraalPyUnicodeObject.size(); + writeLongField(rawPointer, CFields.GraalPyUnicodeObject__length, elements); + writeLongField(rawPointer, CFields.GraalPyUnicodeObject__byte_length, byteLength); + writeLongField(rawPointer, CFields.GraalPyUnicodeObject__hash, -1); + writeIntField(rawPointer, CFields.GraalPyUnicodeObject__kind, charSize); + writeIntField(rawPointer, CFields.GraalPyUnicodeObject__is_ascii, isAscii ? 1 : 0); + writeIntField(rawPointer, CFields.GraalPyUnicodeObject__interned, interned); + writePtrField(rawPointer, CFields.GraalPyUnicodeObject__data, data); + NativeMemory.memset(data + byteLength, (byte) 0, charSize); + return data; + } + public static final class PyCapsuleReference extends IdReference { private final PyCapsule.CapsuleData data; @@ -1288,7 +1310,7 @@ static long doSpecialSingleton(@SuppressWarnings("unused") Node inliningTarget, Object type = GetClassNode.executeUncached(singletonObject); assert (GetTypeFlagsNode.executeUncached(type) & TypeFlags.HAVE_GC) == 0; - return AllocateNativeObjectStubNodeGen.getUncached().execute(inliningTarget, singletonObject, type, CStructs.GraalPyObject, IMMORTAL_REFCNT, false); + return AllocateNativeObjectStubNodeGen.getUncached().execute(inliningTarget, singletonObject, type, CStructs.GraalPyObject, IMMORTAL_REFCNT, false, 0); } @Specialization(guards = {"!isManagedClass(pythonObject)", "!isMemoryView(pythonObject)"}) @@ -1296,8 +1318,12 @@ static long doOther(Node inliningTarget, PythonObject pythonObject, long initial @Exclusive @Cached InlinedConditionProfile isVarObjectProfile, @Exclusive @Cached InlinedConditionProfile isGcProfile, @Exclusive @Cached InlinedConditionProfile isFloatObjectProfile, + @Exclusive @Cached InlinedConditionProfile isStringObjectProfile, @Cached GetPythonObjectClassNode getClassNode, @Cached(inline = false) GetTypeFlagsNode getTypeFlagsNode, + @Cached TruffleString.GetCodeRangeNode getCodeRangeNode, + @Cached TruffleString.SwitchEncodingNode switchEncodingNode, + @Cached CStructAccess.WriteTruffleStringNode writeTruffleStringNode, @Exclusive @Cached AllocateNativeObjectStubNode allocateNativeObjectStubNode) { // for types, we always need to allocate the full PyTypeObject @@ -1309,16 +1335,47 @@ static long doOther(Node inliningTarget, PythonObject pythonObject, long initial Object type = getClassNode.execute(inliningTarget, pythonObject); CStructs ctype; + TruffleString unicodeString = null; + TruffleString.Encoding unicodeEncoding = null; + int unicodeCharSize = 0; + boolean unicodeIsAscii = false; + long unicodeByteLength = 0; + long extraSize = 0; if (isVarObjectProfile.profile(inliningTarget, pythonObject instanceof PTuple)) { ctype = CStructs.GraalPyVarObject; } else if (isFloatObjectProfile.profile(inliningTarget, pythonObject instanceof PFloat)) { ctype = CStructs.GraalPyFloatObject; + } else if (isStringObjectProfile.profile(inliningTarget, pythonObject instanceof PString)) { + ctype = CStructs.GraalPyUnicodeObject; + PString stringObject = (PString) pythonObject; + if (!stringObject.isMaterialized()) { + throw CompilerDirectives.shouldNotReachHere("unmaterialized PString should already have a native unicode stub"); + } + unicodeString = stringObject.getMaterialized(); + TruffleString.CodeRange range = getCodeRangeNode.execute(unicodeString, PythonUtils.TS_ENCODING); + if (range == TruffleString.CodeRange.ASCII) { + unicodeIsAscii = true; + unicodeCharSize = 1; + unicodeEncoding = TruffleString.Encoding.US_ASCII; + } else if (range.isSubsetOf(TruffleString.CodeRange.LATIN_1)) { + unicodeCharSize = 1; + unicodeEncoding = TruffleString.Encoding.ISO_8859_1; + } else if (range.isSubsetOf(TruffleString.CodeRange.BMP)) { + unicodeCharSize = 2; + unicodeEncoding = TruffleString.Encoding.UTF_16; + } else { + unicodeCharSize = 4; + unicodeEncoding = TruffleString.Encoding.UTF_32; + } + unicodeString = switchEncodingNode.execute(unicodeString, unicodeEncoding); + unicodeByteLength = unicodeString.byteLength(unicodeEncoding); + extraSize = unicodeByteLength + unicodeCharSize; } else { ctype = CStructs.GraalPyObject; } boolean gc = isGcProfile.profile(inliningTarget, (getTypeFlagsNode.execute(type) & TypeFlags.HAVE_GC) != 0); - long taggedPointer = allocateNativeObjectStubNode.execute(inliningTarget, pythonObject, type, ctype, initialRefCount, gc); + long taggedPointer = allocateNativeObjectStubNode.execute(inliningTarget, pythonObject, type, ctype, initialRefCount, gc, extraSize); // allocate a native stub object (C type: GraalPy*Object) if (ctype == CStructs.GraalPyVarObject) { @@ -1335,6 +1392,12 @@ static long doOther(Node inliningTarget, PythonObject pythonObject, long initial assert pythonObject instanceof PFloat; long realPointer = HandlePointerConverter.pointerToStub(taggedPointer); writeDoubleField(realPointer, CFields.GraalPyFloatObject__ob_fval, ((PFloat) pythonObject).getValue()); + } else if (ctype == CStructs.GraalPyUnicodeObject) { + assert pythonObject instanceof PString; + long realPointer = HandlePointerConverter.pointerToStub(taggedPointer); + long data = initializeGraalPyUnicodeObject(realPointer, unicodeByteLength / unicodeCharSize, unicodeByteLength, unicodeCharSize, unicodeIsAscii, + GRAALPY_UNICODE_INTERN_STATE_UNDETERMINED); + writeTruffleStringNode.write(data, unicodeString, unicodeEncoding); } return taggedPointer; @@ -1351,12 +1414,17 @@ public static long getInitialRefcnt(boolean newRef, boolean immortal) { @GenerateUncached @GenerateInline @GenerateCached(false) - abstract static class AllocateNativeObjectStubNode extends Node { + public abstract static class AllocateNativeObjectStubNode extends Node { - abstract long execute(Node inliningTarget, PythonAbstractObject object, Object type, CStructs ctype, long initialRefCount, boolean gc); + @TruffleBoundary + public static long executeUncached(PythonAbstractObject object, Object type, CStructs ctype, long initialRefCount, boolean gc, long extraSize) { + return AllocateNativeObjectStubNodeGen.getUncached().execute(null, object, type, ctype, initialRefCount, gc, extraSize); + } + + public abstract long execute(Node inliningTarget, PythonAbstractObject object, Object type, CStructs ctype, long initialRefCount, boolean gc, long extraSize); @Specialization - static long doGeneric(Node inliningTarget, PythonAbstractObject object, Object type, CStructs ctype, long initialRefCount, boolean gc, + static long doGeneric(Node inliningTarget, PythonAbstractObject object, Object type, CStructs ctype, long initialRefCount, boolean gc, long extraSize, @Cached(inline = false) GilNode gil, @Cached(inline = false) CStructAccess.WriteObjectNewRefNode writeObjectNode, @Cached PyObjectGCTrackNode gcTrackNode) { @@ -1369,7 +1437,9 @@ static long doGeneric(Node inliningTarget, PythonAbstractObject object, Object t * Python's GC, we will also allocate space for 'PyGC_Head'. */ long presize = gc ? CStructs.PyGC_Head.size() : 0; - long stubPointer = calloc(ctype.size() + presize); + long allocationSize = ctype.size() + presize + extraSize; + long stubPointer = NativeMemory.malloc(allocationSize); + NativeMemory.memset(stubPointer, (byte) 0, ctype.size() + presize); PythonContext pythonContext = PythonContext.get(inliningTarget); HandleContext handleContext = pythonContext.handleContext; @@ -2625,7 +2695,8 @@ static void doGeneric(Node inliningTarget, PythonObject pythonObject, boolean se @Cached GCListRemoveNode gcListRemoveNode, @Cached InlinedConditionProfile isGcProfile, @Cached GetPythonObjectClassNode getClassNode, - @Cached(inline = false) GetTypeFlagsNode getTypeFlagsNode) { + @Cached(inline = false) GetTypeFlagsNode getTypeFlagsNode, + @Cached StringMaterializeNode stringMaterializeNode) { assert CompilerDirectives.isPartialEvaluationConstant(release); assert CompilerDirectives.isPartialEvaluationConstant(keepInGcList); @@ -2654,7 +2725,7 @@ static void doGeneric(Node inliningTarget, PythonObject pythonObject, boolean se UpdateHandleTableReferenceNode.makeDirectReferenceWeak(inliningTarget, handleContext, pythonObject, taggedPointer, handleTableIndex, release, keepInGcList, isLoggable, - gcListRemoveNode, isGcProfile, getClassNode, getTypeFlagsNode); + gcListRemoveNode, isGcProfile, getClassNode, getTypeFlagsNode, stringMaterializeNode); } } } @@ -2736,9 +2807,10 @@ static void doGeneric(Node inliningTarget, HandleContext handleContext, long tag PythonObject pythonObject = (PythonObject) ref; assert pythonObject.ref == null; + // note: parameter 'stringMaterializeNode' can be null because 'release' is false makeDirectReferenceWeak(inliningTarget, handleContext, pythonObject, taggedPointer, handleTableIndex, false, keepInGcList, isLoggable, - gcListRemoveNode, isGcProfile, getClassNode, getTypeFlagsNode); + gcListRemoveNode, isGcProfile, getClassNode, getTypeFlagsNode, null); } } @@ -2777,7 +2849,8 @@ static void makeDirectReferenceWeak(Node inliningTarget, HandleContext handleCon GCListRemoveNode gcListRemoveNode, InlinedConditionProfile isGcProfile, GetPythonObjectClassNode getClassNode, - GetTypeFlagsNode getTypeFlagsNode) { + GetTypeFlagsNode getTypeFlagsNode, + StringMaterializeNode stringMaterializeNode) { long untaggedPointer = HandlePointerConverter.pointerToStub(taggedPointer); if (isLoggable) { @@ -2796,6 +2869,13 @@ static void makeDirectReferenceWeak(Node inliningTarget, HandleContext handleCon assert gc || pythonObject.getRefCount() == MANAGED_REFCNT; if (release) { + if (pythonObject instanceof PString stringObject && !stringObject.isMaterialized()) { + /* + * The native companion is about to be released, so materialization must not + * keep a view of GraalPyUnicodeObject.data. + */ + stringMaterializeNode.execute(inliningTarget, stringObject, true); + } // clear all links between the managed and the native companion object writeIntField(untaggedPointer, CFields.GraalPyObject__handle_table_index, 0); pythonObject.clearNativePointer(); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/transitions/ToNativeTypeNode.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/transitions/ToNativeTypeNode.java index 055d00fe0f..06fa5c9beb 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/transitions/ToNativeTypeNode.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/transitions/ToNativeTypeNode.java @@ -201,6 +201,16 @@ static void initializeType(PythonManagedClass clazz, long mem, boolean heaptype, // free'd if the type is free'd. TruffleString nativeUncached = nameUtf8.asNativeUncached(ctx::allocateContextMemory, Encoding.UTF_8, false, true); Object internalNativePointerUncached = nativeUncached.getInternalNativePointerUncached(Encoding.UTF_8); + if (internalNativePointerUncached == null) { + /* + * Native TruffleStrings created from raw long pointers do not have a pointer object for + * lifetime tracking. Force a managed copy before allocating the tp_name with context + * lifetime. See GR-75283. + */ + TruffleString managedName = TruffleString.AsManagedNode.getUncached().execute(nameUtf8, Encoding.UTF_8); + nativeUncached = managedName.asNativeUncached(ctx::allocateContextMemory, Encoding.UTF_8, false, true); + internalNativePointerUncached = nativeUncached.getInternalNativePointerUncached(Encoding.UTF_8); + } long namePointer = PythonUtils.coerceToLong(internalNativePointerUncached, InteropLibrary.getUncached()); writePtrField(mem, CFields.PyTypeObject__tp_name, namePointer); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/structs/CFields.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/structs/CFields.java index 8804568e1b..bdf1892d98 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/structs/CFields.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/structs/CFields.java @@ -123,6 +123,13 @@ public enum CFields { GraalPyVarObject__ob_size(Py_ssize_t), GraalPyVarObject__ob_item(PyObjectPtr), GraalPyFloatObject__ob_fval(Double), + GraalPyUnicodeObject__length(Py_ssize_t), + GraalPyUnicodeObject__byte_length(Py_ssize_t), + GraalPyUnicodeObject__hash(Py_hash_t), + GraalPyUnicodeObject__kind(Int), + GraalPyUnicodeObject__is_ascii(Int), + GraalPyUnicodeObject__interned(Int), + GraalPyUnicodeObject__data(Pointer), PyModuleDef__m_name(ConstCharPtr), PyModuleDef__m_doc(ConstCharPtr), diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/structs/CStructs.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/structs/CStructs.java index efaa969e25..7e10aecb00 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/structs/CStructs.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/structs/CStructs.java @@ -75,6 +75,7 @@ public enum CStructs { PyTupleObject, PyFloatObject, GraalPyFloatObject, + GraalPyUnicodeObject, _PyLongValue, PyLongObject, PyModuleDef_Base, diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringNodes.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringNodes.java index b288cf18bb..a3275a9656 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringNodes.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringNodes.java @@ -58,7 +58,10 @@ import com.oracle.graal.python.builtins.objects.cext.capi.ExternalFunctionInvoker; import com.oracle.graal.python.builtins.objects.cext.capi.NativeCAPISymbol; import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTiming; +import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.HandlePointerConverter; import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.PythonToNativeNode; +import com.oracle.graal.python.builtins.objects.cext.structs.CFields; +import com.oracle.graal.python.builtins.objects.cext.structs.CStructAccess; import com.oracle.graal.python.builtins.objects.common.SequenceNodes; import com.oracle.graal.python.builtins.objects.common.SequenceStorageNodes; import com.oracle.graal.python.builtins.objects.ints.PInt; @@ -87,6 +90,7 @@ import com.oracle.graal.python.runtime.sequence.storage.SequenceStorage; import com.oracle.graal.python.util.OverflowException; import com.oracle.graal.python.util.PythonUtils; +import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.HostCompilerDirectives.InliningCutoff; import com.oracle.truffle.api.dsl.Bind; @@ -116,21 +120,51 @@ public abstract class StringNodes { public abstract static class StringMaterializeNode extends Node { public static TruffleString executeUncached(PString s) { - return StringMaterializeNodeGen.getUncached().execute(null, s); + return executeUncached(s, false); } - public abstract TruffleString execute(Node inliningTarget, PString materialize); + public static TruffleString executeUncached(PString s, boolean copyNativeData) { + return StringMaterializeNodeGen.getUncached().execute(null, s, copyNativeData); + } + + public final TruffleString execute(Node inliningTarget, PString materialize) { + return execute(inliningTarget, materialize, false); + } + + public abstract TruffleString execute(Node inliningTarget, PString materialize, boolean copyNativeData); @Specialization(guards = "x.isMaterialized()") - static TruffleString doMaterialized(PString x) { + static TruffleString doMaterialized(PString x, @SuppressWarnings("unused") boolean copyNativeData) { return x.getMaterialized(); } @Fallback @InliningCutoff - static TruffleString doNative(Node inliningTarget, PString x, + static TruffleString doNative(Node inliningTarget, PString x, boolean copyNativeData, @Cached HiddenAttr.ReadNode readAttrNode, @Cached TruffleString.FromNativePointerWithCompactionUTF32Node fromNativePointerNode) { + if (x.isNative()) { + long ptr = HandlePointerConverter.pointerToStub(x.getNativePointer()); + long data = CStructAccess.readPtrField(ptr, CFields.GraalPyUnicodeObject__data); + if (data != 0) { + int byteLength; + try { + byteLength = PInt.intValueExact(CStructAccess.readLongField(ptr, CFields.GraalPyUnicodeObject__byte_length)); + } catch (OverflowException e) { + throw CompilerDirectives.shouldNotReachHere(e); + } + int kind = CStructAccess.readIntField(ptr, CFields.GraalPyUnicodeObject__kind); + TruffleString.CompactionLevel compactionLevel = switch (kind) { + case 1 -> TruffleString.CompactionLevel.S1; + case 2 -> TruffleString.CompactionLevel.S2; + case 4 -> TruffleString.CompactionLevel.S4; + default -> throw CompilerDirectives.shouldNotReachHere(); + }; + TruffleString materialized = fromNativePointerNode.execute(data, 0, byteLength, compactionLevel, copyNativeData); + x.setMaterialized(materialized); + return materialized; + } + } NativeStringData nativeData = x.getNativeStringData(inliningTarget, readAttrNode); TruffleString materialized = nativeData.toTruffleString(fromNativePointerNode); x.setMaterialized(materialized); @@ -166,6 +200,14 @@ static int doNative(PString x, @Cached StringMaterializeNode materializeNode, @Shared @Cached TruffleString.CodePointLengthNode codePointLengthNode) { NativeStringData nativeData = x.getNativeStringData(inliningTarget, readAttrNode); + if (nativeData == null && x.isNative()) { + long ptr = HandlePointerConverter.pointerToStub(x.getNativePointer()); + try { + return PInt.intValueExact(CStructAccess.readLongField(ptr, CFields.GraalPyUnicodeObject__length)); + } catch (OverflowException e) { + throw CompilerDirectives.shouldNotReachHere(e); + } + } int charSize = nativeData.getCharSize(); assert charSize == 1 || charSize == 2 || charSize == 4; /* From 264f120711f48c951c2d3e580571b3a46099ade2 Mon Sep 17 00:00:00 2001 From: Florian Angerer Date: Thu, 7 May 2026 15:34:24 +0200 Subject: [PATCH 2/8] Pack unicode state into one field --- .../com.oracle.graal.python.cext/src/capi.h | 10 ++- .../src/unicodeobject.c | 89 +++++++++++++++++-- .../cext/PythonCextUnicodeBuiltins.java | 2 +- .../capi/transitions/CApiTransitions.java | 57 +++++++++++- .../objects/cext/structs/CFields.java | 5 +- .../builtins/objects/str/StringNodes.java | 3 +- 6 files changed, 150 insertions(+), 16 deletions(-) diff --git a/graalpython/com.oracle.graal.python.cext/src/capi.h b/graalpython/com.oracle.graal.python.cext/src/capi.h index 5cceb32794..7840f540ad 100644 --- a/graalpython/com.oracle.graal.python.cext/src/capi.h +++ b/graalpython/com.oracle.graal.python.cext/src/capi.h @@ -161,14 +161,18 @@ typedef struct { #define GRAALPY_UNICODE_INTERN_STATE_INTERNED 1 #define GRAALPY_UNICODE_INTERN_STATE_NOT_INTERNED 2 +#define GRAALPY_UNICODE_KIND_MASK 0x7 +#define GRAALPY_UNICODE_IS_ASCII_FLAG (1ULL << 3) +#define GRAALPY_UNICODE_INTERN_STATE_SHIFT 4 +#define GRAALPY_UNICODE_INTERN_STATE_MASK (0x3ULL << GRAALPY_UNICODE_INTERN_STATE_SHIFT) + typedef struct { GraalPyObject ob_base; Py_ssize_t length; Py_ssize_t byte_length; Py_hash_t hash; - int32_t kind; - int32_t is_ascii; - int32_t interned; + /* Bits 0-2: kind; bit 3: is_ascii; bits 4-5: interned state. */ + uint64_t state; void *data; } GraalPyUnicodeObject; diff --git a/graalpython/com.oracle.graal.python.cext/src/unicodeobject.c b/graalpython/com.oracle.graal.python.cext/src/unicodeobject.c index 496b07de40..7c7bf59205 100644 --- a/graalpython/com.oracle.graal.python.cext/src/unicodeobject.c +++ b/graalpython/com.oracle.graal.python.cext/src/unicodeobject.c @@ -15271,13 +15271,92 @@ PyInit__string(void) #endif // GraalPy change // GraalPy additions +/* Keep in sync with CApiTransitions.encodeGraalPyUnicodeObjectAscii. */ +static inline uint64_t +GraalPyUnicodeObject_EncodeAscii(unsigned int is_ascii) +{ + return is_ascii ? GRAALPY_UNICODE_IS_ASCII_FLAG : 0; +} + +/* Keep in sync with CApiTransitions.encodeGraalPyUnicodeObjectInterned. */ +static inline uint64_t +GraalPyUnicodeObject_EncodeInterned(unsigned int interned) +{ + return (uint64_t) interned << GRAALPY_UNICODE_INTERN_STATE_SHIFT; +} + +/* Keep in sync with CApiTransitions.createGraalPyUnicodeObjectState. */ +static inline uint64_t +GraalPyUnicodeObject_CreateState(int kind, unsigned int is_ascii, unsigned int interned) +{ + return kind | GraalPyUnicodeObject_EncodeAscii(is_ascii) | GraalPyUnicodeObject_EncodeInterned(interned); +} + +/* Keep in sync with CApiTransitions.getGraalPyUnicodeObjectInternedFromState. */ +static inline unsigned int +GraalPyUnicodeObject_GetInternedFromState(uint64_t state) +{ + return (state & GRAALPY_UNICODE_INTERN_STATE_MASK) >> GRAALPY_UNICODE_INTERN_STATE_SHIFT; +} + +/* Keep in sync with CApiTransitions.updateGraalPyUnicodeObjectInterned. */ +static inline uint64_t +GraalPyUnicodeObject_UpdateInterned(uint64_t state, unsigned int interned) +{ + return (state & ~GRAALPY_UNICODE_INTERN_STATE_MASK) | GraalPyUnicodeObject_EncodeInterned(interned); +} + +/* Keep in sync with CApiTransitions.isGraalPyUnicodeObjectAsciiFromState. */ +static inline unsigned int +GraalPyUnicodeObject_IsAsciiFromState(uint64_t state) +{ + return (state & GRAALPY_UNICODE_IS_ASCII_FLAG) != 0; +} + +/* Keep in sync with CApiTransitions.getGraalPyUnicodeObjectKindFromState. */ +static inline int +GraalPyUnicodeObject_GetKindFromState(uint64_t state) +{ + return state & GRAALPY_UNICODE_KIND_MASK; +} + +/* Keep in sync with CApiTransitions.getGraalPyUnicodeObjectInternedFromState. */ +static inline unsigned int +GraalPyUnicodeObject_GetInterned(GraalPyUnicodeObject *unicode) +{ + return GraalPyUnicodeObject_GetInternedFromState(unicode->state); +} + +/* Keep in sync with CApiTransitions.setGraalPyUnicodeObjectInterned. */ +static inline void +GraalPyUnicodeObject_SetInterned(GraalPyUnicodeObject *unicode, unsigned int interned) +{ + unicode->state = GraalPyUnicodeObject_UpdateInterned(unicode->state, interned); +} + +/* Keep in sync with CApiTransitions.isGraalPyUnicodeObjectAsciiFromState. */ +static inline unsigned int +GraalPyUnicodeObject_IsAscii(GraalPyUnicodeObject *unicode) +{ + return GraalPyUnicodeObject_IsAsciiFromState(unicode->state); +} + +/* Keep in sync with CApiTransitions.getGraalPyUnicodeObjectKind. */ +static inline int +GraalPyUnicodeObject_GetKind(GraalPyUnicodeObject *unicode) +{ + return GraalPyUnicodeObject_GetKindFromState(unicode->state); +} + unsigned int GraalPyUnicode_CHECK_INTERNED(PyObject *op) { if (points_to_py_handle_space(op)) { GraalPyUnicodeObject *unicode = (GraalPyUnicodeObject *) pointer_to_stub(op); - if (unicode->interned == GRAALPY_UNICODE_INTERN_STATE_UNDETERMINED) { - unicode->interned = GraalPyPrivate_Unicode_CheckInterned(op) ? GRAALPY_UNICODE_INTERN_STATE_INTERNED : GRAALPY_UNICODE_INTERN_STATE_NOT_INTERNED; + unsigned int interned = GraalPyUnicodeObject_GetInterned(unicode); + if (interned == GRAALPY_UNICODE_INTERN_STATE_UNDETERMINED) { + interned = GraalPyPrivate_Unicode_CheckInterned(op) ? GRAALPY_UNICODE_INTERN_STATE_INTERNED : GRAALPY_UNICODE_INTERN_STATE_NOT_INTERNED; + GraalPyUnicodeObject_SetInterned(unicode, interned); } - return unicode->interned == GRAALPY_UNICODE_INTERN_STATE_INTERNED ? SSTATE_INTERNED_MORTAL : SSTATE_NOT_INTERNED; + return interned == GRAALPY_UNICODE_INTERN_STATE_INTERNED ? SSTATE_INTERNED_MORTAL : SSTATE_NOT_INTERNED; } return _PyASCIIObject_CAST(op)->state.interned; } @@ -15291,7 +15370,7 @@ Py_ssize_t GraalPyUnicode_GET_LENGTH(PyObject* op) { unsigned int GraalPyUnicode_IS_ASCII(PyObject* op) { if (points_to_py_handle_space(op)) { - return ((GraalPyUnicodeObject *) pointer_to_stub(op))->is_ascii; + return GraalPyUnicodeObject_IsAscii((GraalPyUnicodeObject *) pointer_to_stub(op)); } return _PyASCIIObject_CAST(op)->state.ascii; } @@ -15305,7 +15384,7 @@ unsigned int GraalPyUnicode_IS_COMPACT(PyObject* op) { int GraalPyUnicode_KIND(PyObject* op) { if (points_to_py_handle_space(op)) { - return ((GraalPyUnicodeObject *) pointer_to_stub(op))->kind; + return GraalPyUnicodeObject_GetKind((GraalPyUnicodeObject *) pointer_to_stub(op)); } return _PyASCIIObject_CAST(op)->state.kind; } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextUnicodeBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextUnicodeBuiltins.java index bb5630a5dd..b265ae1c6d 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextUnicodeBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextUnicodeBuiltins.java @@ -332,7 +332,7 @@ static Object withPString(PString str, str.intern(); if (str.isNative()) { long ptr = HandlePointerConverter.pointerToStub(str.getNativePointer()); - CStructAccess.writeIntField(ptr, CFields.GraalPyUnicodeObject__interned, GRAALPY_UNICODE_INTERN_STATE_INTERNED); + CApiTransitions.setGraalPyUnicodeObjectInterned(ptr, GRAALPY_UNICODE_INTERN_STATE_INTERNED); } /* * TODO this is not integrated with str.intern, pointer comparisons of two str.intern'ed diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/transitions/CApiTransitions.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/transitions/CApiTransitions.java index 484b222a92..83312f1027 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/transitions/CApiTransitions.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/transitions/CApiTransitions.java @@ -47,6 +47,7 @@ import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.PollingState.RQ_READY; import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.PollingState.RQ_UNINITIALIZED; import static com.oracle.graal.python.builtins.objects.cext.structs.CStructAccess.readIntField; +import static com.oracle.graal.python.builtins.objects.cext.structs.CStructAccess.readLongField; import static com.oracle.graal.python.builtins.objects.cext.structs.CStructAccess.writeDoubleField; import static com.oracle.graal.python.builtins.objects.cext.structs.CStructAccess.writeIntField; import static com.oracle.graal.python.builtins.objects.cext.structs.CStructAccess.writeLongField; @@ -176,6 +177,10 @@ public abstract class CApiTransitions { public static final int GRAALPY_UNICODE_INTERN_STATE_UNDETERMINED = 0; public static final int GRAALPY_UNICODE_INTERN_STATE_INTERNED = 1; public static final int GRAALPY_UNICODE_INTERN_STATE_NOT_INTERNED = 2; + private static final int GRAALPY_UNICODE_KIND_MASK = 0x7; + private static final long GRAALPY_UNICODE_IS_ASCII_FLAG = 1L << 3; + private static final int GRAALPY_UNICODE_INTERN_STATE_SHIFT = 4; + private static final long GRAALPY_UNICODE_INTERN_STATE_MASK = 0x3L << GRAALPY_UNICODE_INTERN_STATE_SHIFT; enum PollingState { /** startup barrier not finished yet, polling must not run */ @@ -459,14 +464,60 @@ public static long initializeGraalPyUnicodeObject(long rawPointer, long elements writeLongField(rawPointer, CFields.GraalPyUnicodeObject__length, elements); writeLongField(rawPointer, CFields.GraalPyUnicodeObject__byte_length, byteLength); writeLongField(rawPointer, CFields.GraalPyUnicodeObject__hash, -1); - writeIntField(rawPointer, CFields.GraalPyUnicodeObject__kind, charSize); - writeIntField(rawPointer, CFields.GraalPyUnicodeObject__is_ascii, isAscii ? 1 : 0); - writeIntField(rawPointer, CFields.GraalPyUnicodeObject__interned, interned); + writeLongField(rawPointer, CFields.GraalPyUnicodeObject__state, createGraalPyUnicodeObjectState(charSize, isAscii, interned)); writePtrField(rawPointer, CFields.GraalPyUnicodeObject__data, data); NativeMemory.memset(data + byteLength, (byte) 0, charSize); return data; } + // Keep in sync with unicodeobject.c:GraalPyUnicodeObject_CreateState. + private static long createGraalPyUnicodeObjectState(int charSize, boolean isAscii, int interned) { + assert (charSize & ~GRAALPY_UNICODE_KIND_MASK) == 0; + return charSize | encodeGraalPyUnicodeObjectAscii(isAscii) | encodeGraalPyUnicodeObjectInterned(interned); + } + + // Keep in sync with unicodeobject.c:GraalPyUnicodeObject_EncodeAscii. + private static long encodeGraalPyUnicodeObjectAscii(boolean isAscii) { + return isAscii ? GRAALPY_UNICODE_IS_ASCII_FLAG : 0; + } + + // Keep in sync with unicodeobject.c:GraalPyUnicodeObject_EncodeInterned. + private static long encodeGraalPyUnicodeObjectInterned(int interned) { + return (long) interned << GRAALPY_UNICODE_INTERN_STATE_SHIFT; + } + + // Keep in sync with unicodeobject.c:GraalPyUnicodeObject_GetInternedFromState. + private static int getGraalPyUnicodeObjectInternedFromState(long state) { + return (int) ((state & GRAALPY_UNICODE_INTERN_STATE_MASK) >> GRAALPY_UNICODE_INTERN_STATE_SHIFT); + } + + // Keep in sync with unicodeobject.c:GraalPyUnicodeObject_IsAsciiFromState. + private static boolean isGraalPyUnicodeObjectAsciiFromState(long state) { + return (state & GRAALPY_UNICODE_IS_ASCII_FLAG) != 0; + } + + // Keep in sync with unicodeobject.c:GraalPyUnicodeObject_GetKindFromState. + private static int getGraalPyUnicodeObjectKindFromState(long state) { + return (int) (state & GRAALPY_UNICODE_KIND_MASK); + } + + // Keep in sync with unicodeobject.c:GraalPyUnicodeObject_UpdateInterned. + private static long updateGraalPyUnicodeObjectInterned(long state, int interned) { + return (state & ~GRAALPY_UNICODE_INTERN_STATE_MASK) | encodeGraalPyUnicodeObjectInterned(interned); + } + + // Keep in sync with unicodeobject.c:GraalPyUnicodeObject_GetKind. + public static int getGraalPyUnicodeObjectKind(long rawPointer) { + return getGraalPyUnicodeObjectKindFromState(readLongField(rawPointer, CFields.GraalPyUnicodeObject__state)); + } + + // Keep in sync with unicodeobject.c:GraalPyUnicodeObject_SetInterned. + public static void setGraalPyUnicodeObjectInterned(long rawPointer, int interned) { + assert interned == GRAALPY_UNICODE_INTERN_STATE_UNDETERMINED || interned == GRAALPY_UNICODE_INTERN_STATE_INTERNED || interned == GRAALPY_UNICODE_INTERN_STATE_NOT_INTERNED; + long state = readLongField(rawPointer, CFields.GraalPyUnicodeObject__state); + writeLongField(rawPointer, CFields.GraalPyUnicodeObject__state, updateGraalPyUnicodeObjectInterned(state, interned)); + } + public static final class PyCapsuleReference extends IdReference { private final PyCapsule.CapsuleData data; diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/structs/CFields.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/structs/CFields.java index bdf1892d98..8104dc8067 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/structs/CFields.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/structs/CFields.java @@ -67,6 +67,7 @@ import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.Py_ssize_t; import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.UCHAR; import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.UINTPTR_T; +import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.UINT64_T; import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.UNSIGNED_INT; import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.UNSIGNED_LONG; import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.allocfunc; @@ -126,9 +127,7 @@ public enum CFields { GraalPyUnicodeObject__length(Py_ssize_t), GraalPyUnicodeObject__byte_length(Py_ssize_t), GraalPyUnicodeObject__hash(Py_hash_t), - GraalPyUnicodeObject__kind(Int), - GraalPyUnicodeObject__is_ascii(Int), - GraalPyUnicodeObject__interned(Int), + GraalPyUnicodeObject__state(UINT64_T), GraalPyUnicodeObject__data(Pointer), PyModuleDef__m_name(ConstCharPtr), diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringNodes.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringNodes.java index a3275a9656..8d1743991f 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringNodes.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringNodes.java @@ -58,6 +58,7 @@ import com.oracle.graal.python.builtins.objects.cext.capi.ExternalFunctionInvoker; import com.oracle.graal.python.builtins.objects.cext.capi.NativeCAPISymbol; import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTiming; +import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions; import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.HandlePointerConverter; import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.PythonToNativeNode; import com.oracle.graal.python.builtins.objects.cext.structs.CFields; @@ -153,7 +154,7 @@ static TruffleString doNative(Node inliningTarget, PString x, boolean copyNative } catch (OverflowException e) { throw CompilerDirectives.shouldNotReachHere(e); } - int kind = CStructAccess.readIntField(ptr, CFields.GraalPyUnicodeObject__kind); + int kind = CApiTransitions.getGraalPyUnicodeObjectKind(ptr); TruffleString.CompactionLevel compactionLevel = switch (kind) { case 1 -> TruffleString.CompactionLevel.S1; case 2 -> TruffleString.CompactionLevel.S2; From a28a6e5a2e9c82a821659899f52ad90b08fadb97 Mon Sep 17 00:00:00 2001 From: Florian Angerer Date: Thu, 7 May 2026 16:01:26 +0200 Subject: [PATCH 3/8] Introduce annotation @CApiConstant to mirror Java constants to C --- .../python/annotations/CApiConstant.java | 56 ++++++++++++ .../processor/CApiBuiltinsProcessor.java | 91 +++++++++++++++++-- 2 files changed, 141 insertions(+), 6 deletions(-) create mode 100644 graalpython/com.oracle.graal.python.annotations/src/com/oracle/graal/python/annotations/CApiConstant.java diff --git a/graalpython/com.oracle.graal.python.annotations/src/com/oracle/graal/python/annotations/CApiConstant.java b/graalpython/com.oracle.graal.python.annotations/src/com/oracle/graal/python/annotations/CApiConstant.java new file mode 100644 index 0000000000..6147b5378b --- /dev/null +++ b/graalpython/com.oracle.graal.python.annotations/src/com/oracle/graal/python/annotations/CApiConstant.java @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * The Universal Permissive License (UPL), Version 1.0 + * + * Subject to the condition set forth below, permission is hereby granted to any + * person obtaining a copy of this software, associated documentation and/or + * data (collectively the "Software"), free of charge and under any and all + * copyright rights in the Software, and any and all patent rights owned or + * freely licensable by each licensor hereunder covering either (i) the + * unmodified Software as contributed to or provided by such licensor, or (ii) + * the Larger Works (as defined below), to deal in both + * + * (a) the Software, and + * + * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if + * one is included with the Software each a "Larger Work" to which the Software + * is contributed by such licensors), + * + * without restriction, including without limitation the rights to copy, create + * derivative works of, display, perform, and distribute the Software and make, + * use, sell, offer for sale, import, export, have made, and have sold the + * Software and the Larger Work(s), and to sublicense the foregoing rights on + * either these or other terms. + * + * This license is subject to the following condition: + * + * The above copyright notice and either this complete permission notice or at a + * minimum a reference to the UPL must be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package com.oracle.graal.python.annotations; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Marks a Java primitive constant as the source of truth for a C {@code #define} generated into + * {@code capi.gen.h}. This is the opposite direction of {@link CApiConstants}, which marks C + * constants that should be mirrored to Java. + */ +@Retention(RetentionPolicy.SOURCE) +@Target(ElementType.FIELD) +public @interface CApiConstant { +} diff --git a/graalpython/com.oracle.graal.python.processor/src/com/oracle/graal/python/processor/CApiBuiltinsProcessor.java b/graalpython/com.oracle.graal.python.processor/src/com/oracle/graal/python/processor/CApiBuiltinsProcessor.java index f5622e81a4..fd18b0b0bf 100644 --- a/graalpython/com.oracle.graal.python.processor/src/com/oracle/graal/python/processor/CApiBuiltinsProcessor.java +++ b/graalpython/com.oracle.graal.python.processor/src/com/oracle/graal/python/processor/CApiBuiltinsProcessor.java @@ -80,6 +80,7 @@ import javax.tools.Diagnostic.Kind; import javax.tools.StandardLocation; +import com.oracle.graal.python.annotations.CApiConstant; import com.oracle.graal.python.annotations.CApiConstants; import com.oracle.graal.python.annotations.CApiExternalFunctionSignatures; import com.oracle.graal.python.annotations.CApiFields; @@ -193,6 +194,25 @@ private static String name(Element obj) { return obj.getSimpleName().toString(); } + private static String formatCConstantValue(VariableElement constant, Object value) { + TypeKind kind = constant.asType().getKind(); + return switch (kind) { + case BOOLEAN -> ((Boolean) value) ? "1" : "0"; + case BYTE, SHORT, INT -> value.toString(); + case LONG -> value + "LL"; + case CHAR -> Integer.toString((Character) value); + case FLOAT -> { + float f = (Float) value; + yield Float.isFinite(f) ? Float.toString(f) + "f" : null; + } + case DOUBLE -> { + double d = (Double) value; + yield Double.isFinite(d) ? Double.toString(d) : null; + } + default -> null; + }; + } + private static final class CApiBuiltinDesc { public final Element origin; public final String name; @@ -216,6 +236,18 @@ public CApiBuiltinDesc(Element origin, String name, VariableElement returnType, } } + private static final class CApiConstantDesc { + public final Element origin; + public final String name; + public final String value; + + public CApiConstantDesc(Element origin, String name, String value) { + this.origin = origin; + this.name = name; + this.value = value; + } + } + private String capiTypeToLogicalType(VariableElement element) { var init = getFieldInitializer(element); if (init.contains("Void")) { @@ -270,7 +302,9 @@ private static String argName(int i) { @Override public Set getSupportedAnnotationTypes() { - return Set.of(CAPI_BUILTIN, CAPI_BUILTINS, CApiFields.class.getName(), CApiConstants.class.getName(), CApiStructs.class.getName(), CApiExternalFunctionSignatures.class.getName()); + return Set.of(CAPI_BUILTIN, CAPI_BUILTINS, CApiFields.class.getName(), + CApiConstant.class.getName(), CApiConstants.class.getName(), CApiStructs.class.getName(), + CApiExternalFunctionSignatures.class.getName()); } @Override @@ -632,8 +666,15 @@ private void generateCApiSource(List javaBuiltins, List updateResource("capi.gen.c.h", javaBuiltins, lines); } - private void updateResource(String name, List javaBuiltins, List lines, StandardLocation loc) throws IOException { - var origins = javaBuiltins.stream().map((jb) -> jb.origin).toArray(Element[]::new); + private void updateResource(String name, List javaBuiltins, List additionalOrigins, + List lines, StandardLocation loc) + throws IOException { + var originsList = new ArrayList(); + for (var javaBuiltin : javaBuiltins) { + originsList.add(javaBuiltin.origin); + } + originsList.addAll(additionalOrigins); + var origins = originsList.toArray(Element[]::new); String oldContents = ""; String newContents = String.join(System.lineSeparator(), lines); try { @@ -651,6 +692,11 @@ private void updateResource(String name, List javaBuiltins, Lis } } + private void updateResource(String name, List javaBuiltins, List lines, + StandardLocation loc) throws IOException { + updateResource(name, javaBuiltins, List.of(), lines, loc); + } + private void updateResource(String name, List javaBuiltins, List lines) throws IOException { updateResource(name, javaBuiltins, lines, StandardLocation.NATIVE_HEADER_OUTPUT); } @@ -660,8 +706,14 @@ private void updateResource(String name, List javaBuiltins, Lis * in Java code. Additionally, it generates helpers for all "GraalPyPrivate_Get_" and * "GraalPyPrivate_Set_" builtins. */ - private void generateCApiHeader(List javaBuiltins) throws IOException { + private void generateCApiHeader(List javaBuiltins, List javaConstants) throws IOException { List lines = new ArrayList<>(); + for (var javaConstant : javaConstants) { + lines.add("#define " + javaConstant.name + " " + javaConstant.value); + } + if (!javaConstants.isEmpty()) { + lines.add(""); + } lines.add("#define CAPI_BUILTINS \\"); int id = 0; for (var entry : javaBuiltins) { @@ -705,7 +757,8 @@ private void generateCApiHeader(List javaBuiltins) throws IOExc } } - updateResource("capi.gen.h", javaBuiltins, lines); + updateResource("capi.gen.h", javaBuiltins, javaConstants.stream().map((c) -> c.origin).toList(), lines, + StandardLocation.NATIVE_HEADER_OUTPUT); } /** @@ -2036,7 +2089,33 @@ public boolean process(Set annotations, RoundEnvironment List constants = new ArrayList<>(); List fields = new ArrayList<>(); List structs = new ArrayList<>(); + List javaConstants = new ArrayList<>(); List externalFunctionSignatures = new ArrayList<>(); + for (var el : re.getElementsAnnotatedWith(CApiConstant.class)) { + if (el.getKind() == ElementKind.FIELD && el.asType().getKind().isPrimitive() && + el.getModifiers().contains(Modifier.STATIC) && el.getModifiers().contains(Modifier.FINAL)) { + VariableElement constant = (VariableElement) el; + Object value = constant.getConstantValue(); + String cValue = value == null ? null : formatCConstantValue(constant, value); + if (cValue != null) { + javaConstants.add(new CApiConstantDesc(constant, constant.getSimpleName().toString(), cValue)); + } else { + processingEnv.getMessager().printError(CApiConstant.class.getSimpleName() + + " is only applicable for Java primitive compile-time constants with C-representable values.", el); + } + } else { + processingEnv.getMessager().printError(CApiConstant.class.getSimpleName() + + " is only applicable for static final Java primitive fields.", el); + } + } + javaConstants.sort((a, b) -> a.name.compareTo(b.name)); + for (int i = 1; i < javaConstants.size(); i++) { + if (javaConstants.get(i - 1).name.equals(javaConstants.get(i).name)) { + processingEnv.getMessager().printError("Duplicate " + CApiConstant.class.getSimpleName() + + " name: " + javaConstants.get(i).name, + javaConstants.get(i).origin); + } + } for (var el : re.getElementsAnnotatedWith(CApiConstants.class)) { if (el.getKind() == ElementKind.ENUM) { for (var enumBit : el.getEnclosedElements()) { @@ -2102,7 +2181,7 @@ public boolean process(Set annotations, RoundEnvironment if (trees != null) { // needs jdk.compiler generateCApiSource(allBuiltins, constants, fields, structs); - generateCApiHeader(javaBuiltins); + generateCApiHeader(javaBuiltins, javaConstants); generateExternalFunctionInvoker(new ArrayList<>(sigs.values())); generateExternalFunctionHelperNodes(externalFunctionDescs); generateExternalFunctionRootNodes(externalFunctionDescs, cApiExternalFunctionWrapperDescs, sigs); From 192781635557e849f8f403d1f0b05bbfd2e5fb11 Mon Sep 17 00:00:00 2001 From: Florian Angerer Date: Thu, 7 May 2026 16:02:36 +0200 Subject: [PATCH 4/8] Mirror GRAALPY_UNICODE_* constants to C --- graalpython/com.oracle.graal.python.cext/src/capi.h | 9 --------- .../objects/cext/capi/transitions/CApiTransitions.java | 8 ++++++++ 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/graalpython/com.oracle.graal.python.cext/src/capi.h b/graalpython/com.oracle.graal.python.cext/src/capi.h index 7840f540ad..d3cec68c49 100644 --- a/graalpython/com.oracle.graal.python.cext/src/capi.h +++ b/graalpython/com.oracle.graal.python.cext/src/capi.h @@ -157,15 +157,6 @@ typedef struct { double ob_fval; } GraalPyFloatObject; -#define GRAALPY_UNICODE_INTERN_STATE_UNDETERMINED 0 -#define GRAALPY_UNICODE_INTERN_STATE_INTERNED 1 -#define GRAALPY_UNICODE_INTERN_STATE_NOT_INTERNED 2 - -#define GRAALPY_UNICODE_KIND_MASK 0x7 -#define GRAALPY_UNICODE_IS_ASCII_FLAG (1ULL << 3) -#define GRAALPY_UNICODE_INTERN_STATE_SHIFT 4 -#define GRAALPY_UNICODE_INTERN_STATE_MASK (0x3ULL << GRAALPY_UNICODE_INTERN_STATE_SHIFT) - typedef struct { GraalPyObject ob_base; Py_ssize_t length; diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/transitions/CApiTransitions.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/transitions/CApiTransitions.java index 83312f1027..1fa60774cc 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/transitions/CApiTransitions.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/transitions/CApiTransitions.java @@ -74,6 +74,7 @@ import java.util.logging.Level; import com.oracle.graal.python.PythonLanguage; +import com.oracle.graal.python.annotations.CApiConstant; import com.oracle.graal.python.builtins.PythonBuiltinClassType; import com.oracle.graal.python.builtins.objects.PNone; import com.oracle.graal.python.builtins.objects.PythonAbstractObject; @@ -174,12 +175,19 @@ public abstract class CApiTransitions { private static final TruffleLogger LOGGER = CApiContext.getLogger(CApiTransitions.class); + @CApiConstant // public static final int GRAALPY_UNICODE_INTERN_STATE_UNDETERMINED = 0; + @CApiConstant // public static final int GRAALPY_UNICODE_INTERN_STATE_INTERNED = 1; + @CApiConstant // public static final int GRAALPY_UNICODE_INTERN_STATE_NOT_INTERNED = 2; + @CApiConstant // private static final int GRAALPY_UNICODE_KIND_MASK = 0x7; + @CApiConstant // private static final long GRAALPY_UNICODE_IS_ASCII_FLAG = 1L << 3; + @CApiConstant // private static final int GRAALPY_UNICODE_INTERN_STATE_SHIFT = 4; + @CApiConstant // private static final long GRAALPY_UNICODE_INTERN_STATE_MASK = 0x3L << GRAALPY_UNICODE_INTERN_STATE_SHIFT; enum PollingState { From 5fd8c476f8e59128c741956148b6debfecc27e9f Mon Sep 17 00:00:00 2001 From: Florian Angerer Date: Thu, 7 May 2026 17:21:02 +0200 Subject: [PATCH 5/8] Use @CApiConstant on logging constants --- .../com.oracle.graal.python.cext/src/capi.c | 16 ++--- .../com.oracle.graal.python.cext/src/capi.h | 32 +++------- .../src/gcmodule.c | 6 +- .../com.oracle.graal.python.cext/src/object.c | 2 +- .../src/obmalloc.c | 14 ++--- .../modules/cext/PythonCextBuiltins.java | 61 +++++++++++-------- 6 files changed, 65 insertions(+), 66 deletions(-) diff --git a/graalpython/com.oracle.graal.python.cext/src/capi.c b/graalpython/com.oracle.graal.python.cext/src/capi.c index 00f0cf3458..783084af9a 100644 --- a/graalpython/com.oracle.graal.python.cext/src/capi.c +++ b/graalpython/com.oracle.graal.python.cext/src/capi.c @@ -254,7 +254,7 @@ uint32_t Py_Truffle_Options; #undef bool static void initialize_builtin_types_and_structs() { clock_t t = clock(); - GraalPyPrivate_Log(PY_TRUFFLE_LOG_FINE, "initialize_builtin_types_and_structs..."); + GraalPyPrivate_Log(GRAALPY_LOG_FINE, "initialize_builtin_types_and_structs..."); static int64_t builtin_types[] = { #define PY_TRUFFLE_TYPE_GENERIC(GLOBAL_NAME, __TYPE_NAME__, a, b, c, d, e, f, g) &GLOBAL_NAME, __TYPE_NAME__, #define PY_TRUFFLE_TYPE_EXTERN(GLOBAL_NAME, __TYPE_NAME__) &GLOBAL_NAME, __TYPE_NAME__, @@ -271,7 +271,7 @@ static void initialize_builtin_types_and_structs() { // fix up for circular dependency: PyType_Type.tp_base = &PyBaseObject_Type; - GraalPyPrivate_Log(PY_TRUFFLE_LOG_FINE, "initialize_builtin_types_and_structs: %fs", ((double) (clock() - t)) / CLOCKS_PER_SEC); + GraalPyPrivate_Log(GRAALPY_LOG_FINE, "initialize_builtin_types_and_structs: %fs", ((double) (clock() - t)) / CLOCKS_PER_SEC); } int mmap_getbuffer(PyObject *self, Py_buffer *view, int flags) { @@ -284,7 +284,7 @@ int mmap_getbuffer(PyObject *self, Py_buffer *view, int flags) { } PyAPI_FUNC(void) GraalPyPrivate_MMap_InitBufferProtocol(PyObject* mmap_type) { - GraalPyPrivate_Log(PY_TRUFFLE_LOG_FINE, "GraalPyPrivate_MMap_InitBufferProtocol"); + GraalPyPrivate_Log(GRAALPY_LOG_FINE, "GraalPyPrivate_MMap_InitBufferProtocol"); assert(PyType_Check(mmap_type)); static PyBufferProcs mmap_as_buffer = { @@ -433,7 +433,7 @@ GraalPyPrivate_SUBREF(intptr_t ptr, Py_ssize_t value) Py_ssize_t new_value = ((obj->ob_refcnt) -= value); if (new_value == 0) { - GraalPyPrivate_Log(PY_TRUFFLE_LOG_FINER, "%s: _Py_Dealloc(0x%zx)", + GraalPyPrivate_Log(GRAALPY_LOG_FINER, "%s: _Py_Dealloc(0x%zx)", __func__, obj); _Py_Dealloc(obj); } @@ -451,7 +451,7 @@ GraalPyPrivate_BulkDealloc(intptr_t ptrArray[], int64_t len) { for (int i = 0; i < len; i++) { PyObject *obj = (PyObject *)ptrArray[i]; - GraalPyPrivate_Log(PY_TRUFFLE_LOG_FINER, + GraalPyPrivate_Log(GRAALPY_LOG_FINER, "%s: _Py_Dealloc(a %s at 0x%zx)", __func__, Py_TYPE(obj)->tp_name, obj); _Py_Dealloc(obj); @@ -476,7 +476,7 @@ GraalPyPrivate_BulkDeallocOnShutdown(intptr_t ptrArray[], int64_t len) /* we don't need to care about objects with default deallocation process */ obj->ob_refcnt = 0; - GraalPyPrivate_Log(PY_TRUFFLE_LOG_FINER, "%s: _Py_Dealloc(0x%zx)", + GraalPyPrivate_Log(GRAALPY_LOG_FINER, "%s: _Py_Dealloc(0x%zx)", __func__, obj); _Py_Dealloc(obj); } @@ -623,7 +623,7 @@ PyAPI_FUNC(PyThreadState **) initialize_graal_capi(void **builtin_closures, GCSt * context exits and its table is the "latest", we delay freeing it. */ initialize_builtins(builtin_closures); - GraalPyPrivate_Log(PY_TRUFFLE_LOG_FINE, "initialize_builtins: %fs", ((double) (clock() - t)) / CLOCKS_PER_SEC); + GraalPyPrivate_Log(GRAALPY_LOG_FINE, "initialize_builtins: %fs", ((double) (clock() - t)) / CLOCKS_PER_SEC); Py_Truffle_Options = GraalPyPrivate_Native_Options(); initialize_builtin_types_and_structs(); @@ -638,7 +638,7 @@ PyAPI_FUNC(PyThreadState **) initialize_graal_capi(void **builtin_closures, GCSt // TODO: initialize during cext initialization doesn't work at the moment Py_FileSystemDefaultEncoding = "utf-8"; // strdup(PyUnicode_AsUTF8(GraalPyPrivate_FileSystemDefaultEncoding())); - GraalPyPrivate_Log(PY_TRUFFLE_LOG_FINE, "initialize_graal_capi: %fs", ((double) (clock() - t)) / CLOCKS_PER_SEC); + GraalPyPrivate_Log(GRAALPY_LOG_FINE, "initialize_graal_capi: %fs", ((double) (clock() - t)) / CLOCKS_PER_SEC); return &tstate_current; } diff --git a/graalpython/com.oracle.graal.python.cext/src/capi.h b/graalpython/com.oracle.graal.python.cext/src/capi.h index d3cec68c49..e0165ba9f2 100644 --- a/graalpython/com.oracle.graal.python.cext/src/capi.h +++ b/graalpython/com.oracle.graal.python.cext/src/capi.h @@ -97,18 +97,6 @@ extern graalpy_detach_native_thread_func graalpy_detach_native_thread; #define SRC_CS "utf-8" -/* Flags definitions representing global (debug) options. */ -#define PY_TRUFFLE_TRACE_MEM 0x1 -#define PY_TRUFFLE_LOG_INFO 0x2 -#define PY_TRUFFLE_LOG_CONFIG 0x4 -#define PY_TRUFFLE_LOG_FINE 0x8 -#define PY_TRUFFLE_LOG_FINER 0x10 -#define PY_TRUFFLE_LOG_FINEST 0x20 -#define PY_TRUFFLE_DEBUG_CAPI 0x40 -#define PY_TRUFFLE_PYTHON_GC 0x80 -#define PY_TRUFFLE_POISON_NATIVE_MEMORY_ON_FREE 0x100 -#define PY_TRUFFLE_SAMPLE_NATIVE_MEMORY_ALLOC_SITES 0x200 - typedef struct mmap_object mmap_object; typedef struct _gc_runtime_state GCState; // originally in 'gcmodule.c' @@ -230,35 +218,35 @@ static void print_c_stacktrace() { /* Flags definitions representing global (debug) options. */ static MUST_INLINE int GraalPyPrivate_Trace_Memory() { - return Py_Truffle_Options & PY_TRUFFLE_TRACE_MEM; + return Py_Truffle_Options & GRAALPY_TRACE_MEM; } static MUST_INLINE int GraalPyPrivate_Log_Info() { - return Py_Truffle_Options & PY_TRUFFLE_LOG_INFO; + return Py_Truffle_Options & GRAALPY_LOG_INFO; } static MUST_INLINE int GraalPyPrivate_Log_Config() { - return Py_Truffle_Options & PY_TRUFFLE_LOG_CONFIG; + return Py_Truffle_Options & GRAALPY_LOG_CONFIG; } static MUST_INLINE int GraalPyPrivate_Log_Fine() { - return Py_Truffle_Options & PY_TRUFFLE_LOG_FINE; + return Py_Truffle_Options & GRAALPY_LOG_FINE; } static MUST_INLINE int GraalPyPrivate_Log_Finer() { - return Py_Truffle_Options & PY_TRUFFLE_LOG_FINER; + return Py_Truffle_Options & GRAALPY_LOG_FINER; } static MUST_INLINE int GraalPyPrivate_Log_Finest() { - return Py_Truffle_Options & PY_TRUFFLE_LOG_FINEST; + return Py_Truffle_Options & GRAALPY_LOG_FINEST; } static MUST_INLINE int GraalPyPrivate_Debug_CAPI() { - return Py_Truffle_Options & PY_TRUFFLE_DEBUG_CAPI; + return Py_Truffle_Options & GRAALPY_DEBUG_CAPI; } static MUST_INLINE int GraalPyPrivate_PythonGC() { - return Py_Truffle_Options & PY_TRUFFLE_PYTHON_GC; + return Py_Truffle_Options & GRAALPY_PYTHON_GC; } static MUST_INLINE int GraalPyPrivate_PoisonNativeMemoryOnFree() { - return Py_Truffle_Options & PY_TRUFFLE_POISON_NATIVE_MEMORY_ON_FREE; + return Py_Truffle_Options & GRAALPY_POISON_NATIVE_MEMORY_ON_FREE; } static MUST_INLINE int GraalPyPrivate_SampleNativeMemoryAllocSites() { - return Py_Truffle_Options & PY_TRUFFLE_SAMPLE_NATIVE_MEMORY_ALLOC_SITES; + return Py_Truffle_Options & GRAALPY_SAMPLE_NATIVE_MEMORY_ALLOC_SITES; } static void diff --git a/graalpython/com.oracle.graal.python.cext/src/gcmodule.c b/graalpython/com.oracle.graal.python.cext/src/gcmodule.c index f354d504ed..94ef36e376 100644 --- a/graalpython/com.oracle.graal.python.cext/src/gcmodule.c +++ b/graalpython/com.oracle.graal.python.cext/src/gcmodule.c @@ -49,19 +49,19 @@ call_traverse(traverseproc traverse, PyObject *op, visitproc visit, void *arg) return 0; } if (!traverse) { - GraalPyPrivate_Log(PY_TRUFFLE_LOG_FINE, + GraalPyPrivate_Log(GRAALPY_LOG_FINE, "type '%.100s' is a GC type but tp_traverse is NULL", Py_TYPE((op))->tp_name); return 0; } else { if (_PyObject_IsFreed(op)) { - GraalPyPrivate_Log(PY_TRUFFLE_LOG_FINE, + GraalPyPrivate_Log(GRAALPY_LOG_FINE, "we tried to call tp_traverse on a freed object at %p (ctx %p)!", op, arg); return 0; } if (_PyObject_IsFreed((PyObject *)Py_TYPE(op))) { - GraalPyPrivate_Log(PY_TRUFFLE_LOG_FINE, + GraalPyPrivate_Log(GRAALPY_LOG_FINE, "we tried to call tp_traverse on an object at %p with a freed type at %p (ctx %p)!", op, Py_TYPE(op), arg); return 0; diff --git a/graalpython/com.oracle.graal.python.cext/src/object.c b/graalpython/com.oracle.graal.python.cext/src/object.c index 449cd46818..19b0f97422 100644 --- a/graalpython/com.oracle.graal.python.cext/src/object.c +++ b/graalpython/com.oracle.graal.python.cext/src/object.c @@ -2966,7 +2966,7 @@ void GraalPy_SET_TYPE(PyObject *a, PyTypeObject *b) { if (points_to_py_handle_space(a)) { - GraalPyPrivate_Log(PY_TRUFFLE_LOG_INFO, + GraalPyPrivate_Log(GRAALPY_LOG_INFO, "changing the type of an object is not supported\n"); } else { a->ob_type = b; diff --git a/graalpython/com.oracle.graal.python.cext/src/obmalloc.c b/graalpython/com.oracle.graal.python.cext/src/obmalloc.c index a05020235a..6683b778df 100644 --- a/graalpython/com.oracle.graal.python.cext/src/obmalloc.c +++ b/graalpython/com.oracle.graal.python.cext/src/obmalloc.c @@ -144,11 +144,11 @@ _GraalPyMem_LogRecentSamples(const char *func, void *ptr) const GraalPyMemSample_t *sample = &_GraalPyMem_Samples[index]; if (sample->ptr == ptr && sample->serial != 0) { char prefix[128]; - GraalPyPrivate_Log(PY_TRUFFLE_LOG_INFO, + GraalPyPrivate_Log(GRAALPY_LOG_INFO, "%s: recent raw memory sample #%llu op=%c ptr=%p size=%lu depth=%lu\n", func, sample->serial, sample->operation, sample->ptr, (unsigned long) sample->size, (unsigned long) sample->depth); snprintf(prefix, sizeof(prefix), "%s: sample #%llu ", func, sample->serial); - GraalPyPrivate_LogCapturedStacktrace(PY_TRUFFLE_LOG_INFO, prefix, sample->stack, sample->depth); + GraalPyPrivate_LogCapturedStacktrace(GRAALPY_LOG_INFO, prefix, sample->stack, sample->depth); printed++; } } @@ -179,7 +179,7 @@ _GraalPyMem_FatalInvalidHeader(const char *func, void *ptr, const mem_head_t *pt const char *reason = (ptr_with_head->size == GRAALPY_MEM_HEAD_POISON && ptr_with_head->dummy == GRAALPY_MEM_HEAD_POISON) ? "poisoned raw allocation header" : "invalid raw allocation header"; - GraalPyPrivate_Log(PY_TRUFFLE_LOG_INFO, + GraalPyPrivate_Log(GRAALPY_LOG_INFO, "%s: %s for ptr=%p head=%p size=%lu dummy=0x%lx\n", func, reason, ptr, ptr_with_head, (unsigned long) ptr_with_head->size, (unsigned long) ptr_with_head->dummy); _GraalPyMem_LogRecentSamples(func, ptr); @@ -512,7 +512,7 @@ _GraalPyMem_PrepareAlloc(GraalPyMem_t *state, size_t size) state->native_memory_gc_barrier = GraalPyPrivate_GetInitialNativeMemory(); continue; } - GraalPyPrivate_Log(PY_TRUFFLE_LOG_CONFIG, + GraalPyPrivate_Log(GRAALPY_LOG_CONFIG, "%s: exceeding native_memory_gc_barrier (%lu) with allocation of size %lu, current allocated_memory: %lu\n", __func__, state->native_memory_gc_barrier, size, state->allocated_memory); @@ -532,12 +532,12 @@ _GraalPyMem_PrepareAlloc(GraalPyMem_t *state, size_t size) if (state->native_memory_gc_barrier > state->max_native_memory) { state->native_memory_gc_barrier = state->max_native_memory; } - GraalPyPrivate_Log(PY_TRUFFLE_LOG_CONFIG, + GraalPyPrivate_Log(GRAALPY_LOG_CONFIG, "%s: enlarging native_memory_gc_barrier to %lu\n", __func__, state->native_memory_gc_barrier); } else { - GraalPyPrivate_Log(PY_TRUFFLE_LOG_INFO, + GraalPyPrivate_Log(GRAALPY_LOG_INFO, "%s: native memory exhausted while allocating %lu bytes\n", __func__, size); return 1; @@ -658,7 +658,7 @@ _GraalPyMem_RawFree(void *ctx, void *ptr) mem_head_t *ptr_with_head = _GraalPyMem_GetValidatedHead(__func__, ptr); const size_t size = ptr_with_head->size; if (state->allocated_memory < size) { - GraalPyPrivate_Log(PY_TRUFFLE_LOG_INFO, + GraalPyPrivate_Log(GRAALPY_LOG_INFO, "%s: freed memory size (%lu) is larger than allocated memory size (%lu)\n", __func__, size, state->allocated_memory); state->allocated_memory = size; diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextBuiltins.java index 23cc849dab..b86d1bd05f 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextBuiltins.java @@ -112,6 +112,7 @@ import java.util.logging.Level; import com.oracle.graal.python.PythonLanguage; +import com.oracle.graal.python.annotations.CApiConstant; import com.oracle.graal.python.builtins.Python3Core; import com.oracle.graal.python.builtins.PythonBuiltinClassType; import com.oracle.graal.python.builtins.modules.GraalPythonModuleBuiltins.DebugNode; @@ -1439,16 +1440,26 @@ static int GraalPyPrivate_DisableReferenceQueuePolling() { return PInt.intValue(CApiTransitions.disableReferenceQueuePolling(handleContext)); } - private static final int TRACE_MEM = 0x1; - private static final int LOG_INFO = 0x2; - private static final int LOG_CONFIG = 0x4; - private static final int LOG_FINE = 0x8; - private static final int LOG_FINER = 0x10; - private static final int LOG_FINEST = 0x20; - private static final int DEBUG_CAPI = 0x40; - private static final int PYTHON_GC = 0x80; - private static final int POISON_NATIVE_MEMORY_ON_FREE = 0x100; - private static final int SAMPLE_NATIVE_MEMORY_ALLOC_SITES = 0x200; + @CApiConstant // + private static final int GRAALPY_TRACE_MEM = 0x1; + @CApiConstant // + private static final int GRAALPY_LOG_INFO = 0x2; + @CApiConstant // + private static final int GRAALPY_LOG_CONFIG = 0x4; + @CApiConstant // + private static final int GRAALPY_LOG_FINE = 0x8; + @CApiConstant // + private static final int GRAALPY_LOG_FINER = 0x10; + @CApiConstant // + private static final int GRAALPY_LOG_FINEST = 0x20; + @CApiConstant // + private static final int GRAALPY_DEBUG_CAPI = 0x40; + @CApiConstant // + private static final int GRAALPY_PYTHON_GC = 0x80; + @CApiConstant // + private static final int GRAALPY_POISON_NATIVE_MEMORY_ON_FREE = 0x100; + @CApiConstant // + private static final int GRAALPY_SAMPLE_NATIVE_MEMORY_ALLOC_SITES = 0x200; /* * These should be kept so they can be shared across multiple contexts in the same engine, if @@ -1462,34 +1473,34 @@ static int GraalPyPrivate_Native_Options() { int options = 0; PythonLanguage language = PythonLanguage.get(null); if (language.getEngineOption(PythonOptions.TraceNativeMemory)) { - options |= TRACE_MEM; + options |= GRAALPY_TRACE_MEM; } if (LOGGER.isLoggable(Level.INFO)) { - options |= LOG_INFO; + options |= GRAALPY_LOG_INFO; } if (LOGGER.isLoggable(Level.CONFIG)) { - options |= LOG_CONFIG; + options |= GRAALPY_LOG_CONFIG; } if (LOGGER.isLoggable(Level.FINE)) { - options |= LOG_FINE; + options |= GRAALPY_LOG_FINE; } if (LOGGER.isLoggable(Level.FINER)) { - options |= LOG_FINER; + options |= GRAALPY_LOG_FINER; } if (LOGGER.isLoggable(Level.FINEST)) { - options |= LOG_FINEST; + options |= GRAALPY_LOG_FINEST; } if (PythonContext.DEBUG_CAPI) { - options |= DEBUG_CAPI; + options |= GRAALPY_DEBUG_CAPI; } if (language.getEngineOption(PythonOptions.PythonGC)) { - options |= PYTHON_GC; + options |= GRAALPY_PYTHON_GC; } if (language.getEngineOption(PythonOptions.PoisonNativeMemoryOnFree)) { - options |= POISON_NATIVE_MEMORY_ON_FREE; + options |= GRAALPY_POISON_NATIVE_MEMORY_ON_FREE; } if (language.getEngineOption(PythonOptions.SampleNativeMemoryAllocSites)) { - options |= SAMPLE_NATIVE_MEMORY_ALLOC_SITES; + options |= GRAALPY_SAMPLE_NATIVE_MEMORY_ALLOC_SITES; } return options; } @@ -1500,19 +1511,19 @@ static void GraalPyPrivate_LogString(int level, long messagePtr) { TruffleString message = (TruffleString) CharPtrToPythonNode.getUncached().execute(messagePtr); String msg = message.toJavaStringUncached(); switch (level) { - case LOG_INFO: + case GRAALPY_LOG_INFO: LOGGER.info(msg); break; - case LOG_CONFIG: + case GRAALPY_LOG_CONFIG: LOGGER.config(msg); break; - case LOG_FINE: + case GRAALPY_LOG_FINE: LOGGER.fine(msg); break; - case LOG_FINER: + case GRAALPY_LOG_FINER: LOGGER.finer(msg); break; - case LOG_FINEST: + case GRAALPY_LOG_FINEST: LOGGER.finest(msg); break; default: From 44ce613d38bc95979f497dcbdc99b41607416170 Mon Sep 17 00:00:00 2001 From: Florian Angerer Date: Thu, 7 May 2026 21:06:06 +0200 Subject: [PATCH 6/8] Cleanup NativeStringData --- .../objects/str/NativeStringData.java | 93 --------------- .../python/builtins/objects/str/PString.java | 14 +-- .../builtins/objects/str/StringNodes.java | 111 +++++++----------- .../oracle/graal/python/nodes/HiddenAttr.java | 1 - 4 files changed, 45 insertions(+), 174 deletions(-) delete mode 100644 graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/NativeStringData.java diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/NativeStringData.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/NativeStringData.java deleted file mode 100644 index 8954fcec46..0000000000 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/NativeStringData.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2025, 2026, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * The Universal Permissive License (UPL), Version 1.0 - * - * Subject to the condition set forth below, permission is hereby granted to any - * person obtaining a copy of this software, associated documentation and/or - * data (collectively the "Software"), free of charge and under any and all - * copyright rights in the Software, and any and all patent rights owned or - * freely licensable by each licensor hereunder covering either (i) the - * unmodified Software as contributed to or provided by such licensor, or (ii) - * the Larger Works (as defined below), to deal in both - * - * (a) the Software, and - * - * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if - * one is included with the Software each a "Larger Work" to which the Software - * is contributed by such licensors), - * - * without restriction, including without limitation the rights to copy, create - * derivative works of, display, perform, and distribute the Software and make, - * use, sell, offer for sale, import, export, have made, and have sold the - * Software and the Larger Work(s), and to sublicense the foregoing rights on - * either these or other terms. - * - * This license is subject to the following condition: - * - * The above copyright notice and either this complete permission notice or at a - * minimum a reference to the UPL must be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -package com.oracle.graal.python.builtins.objects.str; - -import com.oracle.graal.python.runtime.sequence.storage.NativeByteSequenceStorage; -import com.oracle.truffle.api.CompilerDirectives; -import com.oracle.truffle.api.strings.TruffleString; - -public final class NativeStringData { - private static final byte KIND_ASCII = 0; - private static final byte KIND_1BYTE = 1; - private static final byte KIND_2BYTE = 2; - private static final byte KIND_4BYTE = 4; - private final byte kind; - // We need the storage object for memory management, don't inline its fields here - private final NativeByteSequenceStorage storage; - - private NativeStringData(int charSize, boolean isAscii, NativeByteSequenceStorage storage) { - assert charSize == 1 || charSize == 2 || charSize == 4; - assert !isAscii || charSize == 1; - this.kind = isAscii ? KIND_ASCII : (byte) charSize; - this.storage = storage; - } - - public static NativeStringData create(int charSize, boolean isAscii, long ptr, int length) { - return new NativeStringData(charSize, isAscii, NativeByteSequenceStorage.create(ptr, length, length, true)); - } - - public boolean isAscii() { - return kind == KIND_ASCII; - } - - public int getCharSize() { - return kind != 0 ? kind : KIND_1BYTE; - } - - public long getPtr() { - return storage.getPtr(); - } - - public int length() { - return storage.length(); - } - - public TruffleString toTruffleString(TruffleString.FromNativePointerWithCompactionUTF32Node fromNativePointerNode) { - TruffleString.CompactionLevel compactionLevel = switch (kind) { - case KIND_ASCII, KIND_1BYTE -> TruffleString.CompactionLevel.S1; - case KIND_2BYTE -> TruffleString.CompactionLevel.S2; - case KIND_4BYTE -> TruffleString.CompactionLevel.S4; - default -> throw CompilerDirectives.shouldNotReachHere(); - }; - // NativeByteSequenceStorage implements asPointer - return fromNativePointerNode.execute(storage, 0, storage.length(), compactionLevel, false); - } -} diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/PString.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/PString.java index ba1c90c833..88bccb681c 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/PString.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/PString.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2025, Oracle and/or its affiliates. + * Copyright (c) 2017, 2026, Oracle and/or its affiliates. * Copyright (c) 2013, Regents of the University of California * * All rights reserved. @@ -66,14 +66,6 @@ public TruffleString getValueUncached() { return isMaterialized() ? getMaterialized() : StringMaterializeNode.executeUncached(this); } - public NativeStringData getNativeStringData(Node inliningTarget, HiddenAttr.ReadNode readNode) { - return (NativeStringData) readNode.execute(inliningTarget, this, HiddenAttr.PSTRING_NATIVE_DATA, null); - } - - public void setNativeStringData(Node inliningTarget, HiddenAttr.WriteNode writeNode, NativeStringData value) { - writeNode.execute(inliningTarget, this, HiddenAttr.PSTRING_NATIVE_DATA, value); - } - public PBytes getUtf8Bytes(Node inliningTarget, HiddenAttr.ReadNode readNode) { return (PBytes) readNode.execute(inliningTarget, this, HiddenAttr.PSTRING_UTF8, null); } @@ -107,9 +99,7 @@ public void setMaterialized(TruffleString materialized) { @TruffleBoundary public void intern() { assert isBuiltinPString(this); - TruffleString ts = getValueUncached(); - TruffleString interned = PythonUtils.internString(ts); - materializedValue = interned; + materializedValue = PythonUtils.internString(getValueUncached()); } @Override diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringNodes.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringNodes.java index 8d1743991f..a47b088d51 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringNodes.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringNodes.java @@ -60,6 +60,7 @@ import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTiming; import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions; import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.HandlePointerConverter; +import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.PythonToNativeInternalNode; import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.PythonToNativeNode; import com.oracle.graal.python.builtins.objects.cext.structs.CFields; import com.oracle.graal.python.builtins.objects.cext.structs.CStructAccess; @@ -73,7 +74,6 @@ import com.oracle.graal.python.lib.PyIterNextNode; import com.oracle.graal.python.lib.PyObjectGetIter; import com.oracle.graal.python.nodes.ErrorMessages; -import com.oracle.graal.python.nodes.HiddenAttr; import com.oracle.graal.python.nodes.PGuards; import com.oracle.graal.python.nodes.PNodeWithContext; import com.oracle.graal.python.nodes.PRaiseNode; @@ -91,8 +91,8 @@ import com.oracle.graal.python.runtime.sequence.storage.SequenceStorage; import com.oracle.graal.python.util.OverflowException; import com.oracle.graal.python.util.PythonUtils; +import com.oracle.truffle.api.CompilerAsserts; import com.oracle.truffle.api.CompilerDirectives; -import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.HostCompilerDirectives.InliningCutoff; import com.oracle.truffle.api.dsl.Bind; import com.oracle.truffle.api.dsl.Cached; @@ -117,15 +117,11 @@ public abstract class StringNodes { @GenerateUncached @GenerateInline @GenerateCached(false) - @ImportStatic(StringNodes.class) public abstract static class StringMaterializeNode extends Node { public static TruffleString executeUncached(PString s) { - return executeUncached(s, false); - } - - public static TruffleString executeUncached(PString s, boolean copyNativeData) { - return StringMaterializeNodeGen.getUncached().execute(null, s, copyNativeData); + CompilerAsserts.neverPartOfCompilation(); + return StringMaterializeNodeGen.getUncached().execute(null, s, false); } public final TruffleString execute(Node inliningTarget, PString materialize) { @@ -139,35 +135,29 @@ static TruffleString doMaterialized(PString x, @SuppressWarnings("unused") boole return x.getMaterialized(); } - @Fallback + @Specialization(guards = "!x.isMaterialized()") @InliningCutoff - static TruffleString doNative(Node inliningTarget, PString x, boolean copyNativeData, - @Cached HiddenAttr.ReadNode readAttrNode, + static TruffleString doNative(PString x, boolean copyNativeData, @Cached TruffleString.FromNativePointerWithCompactionUTF32Node fromNativePointerNode) { - if (x.isNative()) { - long ptr = HandlePointerConverter.pointerToStub(x.getNativePointer()); - long data = CStructAccess.readPtrField(ptr, CFields.GraalPyUnicodeObject__data); - if (data != 0) { - int byteLength; - try { - byteLength = PInt.intValueExact(CStructAccess.readLongField(ptr, CFields.GraalPyUnicodeObject__byte_length)); - } catch (OverflowException e) { - throw CompilerDirectives.shouldNotReachHere(e); - } - int kind = CApiTransitions.getGraalPyUnicodeObjectKind(ptr); - TruffleString.CompactionLevel compactionLevel = switch (kind) { - case 1 -> TruffleString.CompactionLevel.S1; - case 2 -> TruffleString.CompactionLevel.S2; - case 4 -> TruffleString.CompactionLevel.S4; - default -> throw CompilerDirectives.shouldNotReachHere(); - }; - TruffleString materialized = fromNativePointerNode.execute(data, 0, byteLength, compactionLevel, copyNativeData); - x.setMaterialized(materialized); - return materialized; - } + assert x.isNative(); + assert PythonToNativeInternalNode.executeUncached(x, false) == x.getNativePointer(); + long ptr = HandlePointerConverter.pointerToStub(x.getNativePointer()); + long data = CStructAccess.readPtrField(ptr, CFields.GraalPyUnicodeObject__data); + assert data != 0; + int byteLength; + try { + byteLength = PInt.intValueExact(CStructAccess.readLongField(ptr, CFields.GraalPyUnicodeObject__byte_length)); + } catch (OverflowException e) { + throw CompilerDirectives.shouldNotReachHere(e); } - NativeStringData nativeData = x.getNativeStringData(inliningTarget, readAttrNode); - TruffleString materialized = nativeData.toTruffleString(fromNativePointerNode); + int kind = CApiTransitions.getGraalPyUnicodeObjectKind(ptr); + TruffleString.CompactionLevel compactionLevel = switch (kind) { + case 1 -> TruffleString.CompactionLevel.S1; + case 2 -> TruffleString.CompactionLevel.S2; + case 4 -> TruffleString.CompactionLevel.S4; + default -> throw CompilerDirectives.shouldNotReachHere(); + }; + TruffleString materialized = fromNativePointerNode.execute(data, 0, byteLength, compactionLevel, copyNativeData); x.setMaterialized(materialized); return materialized; } @@ -187,36 +177,21 @@ static int doString(TruffleString str, return codePointLengthNode.execute(str, TS_ENCODING); } - @Specialization(guards = "x.isMaterialized()") - static int doMaterialized(PString x, - @Shared @Cached TruffleString.CodePointLengthNode codePointLengthNode) { - return doString(x.getMaterialized(), codePointLengthNode); - } - - @Specialization(guards = "!x.isMaterialized()") - static int doNative(PString x, + @Specialization + static int doPString(PString x, @Bind Node inliningTarget, - @Cached HiddenAttr.ReadNode readAttrNode, - @Cached InlinedConditionProfile oneByteProfile, - @Cached StringMaterializeNode materializeNode, + @Cached InlinedConditionProfile isMaterializedProfile, @Shared @Cached TruffleString.CodePointLengthNode codePointLengthNode) { - NativeStringData nativeData = x.getNativeStringData(inliningTarget, readAttrNode); - if (nativeData == null && x.isNative()) { - long ptr = HandlePointerConverter.pointerToStub(x.getNativePointer()); - try { - return PInt.intValueExact(CStructAccess.readLongField(ptr, CFields.GraalPyUnicodeObject__length)); - } catch (OverflowException e) { - throw CompilerDirectives.shouldNotReachHere(e); - } + if (isMaterializedProfile.profile(inliningTarget, x.isMaterialized())) { + return doString(x.getMaterialized(), codePointLengthNode); + } + assert x.isNative(); + long ptr = HandlePointerConverter.pointerToStub(x.getNativePointer()); + try { + return PInt.intValueExact(CStructAccess.readLongField(ptr, CFields.GraalPyUnicodeObject__length)); + } catch (OverflowException e) { + throw CompilerDirectives.shouldNotReachHere(e); } - int charSize = nativeData.getCharSize(); - assert charSize == 1 || charSize == 2 || charSize == 4; - /* - * Avoid materialization of strings backed by native memory. It is correct to use the - * 'length / charSize' because native data always contains characters with fixed byte - * size (i.e. Py_UCS1, Py_UCS2, or Py_UCS4). - */ - return nativeData.length() / nativeData.getCharSize(); } @Specialization @@ -232,10 +207,15 @@ static int doNativeObject(PythonNativeObject x, assert EnsurePythonObjectNode.doesNotNeedPromotion(x); PythonContext context = PythonContext.get(inliningTarget); var callable = CApiContext.getNativeSymbol(inliningTarget, NativeCAPISymbol.FUN_PY_UNICODE_GET_LENGTH); - return intValue(ExternalFunctionInvoker.invokePY_UNICODE_GET_LENGTH(null, C_API_TIMING, context.ensureNativeContext(), + long lresult = ExternalFunctionInvoker.invokePY_UNICODE_GET_LENGTH(null, C_API_TIMING, context.ensureNativeContext(), BoundaryCallData.getUncached(), context.getThreadState(context.getLanguage(inliningTarget)), callable, - toNativeNode.executeLong(x))); + toNativeNode.executeLong(x)); + try { + return PInt.intValueExact(lresult); + } catch (OverflowException e) { + throw CompilerDirectives.shouldNotReachHere(e); + } } // the object's type is not a subclass of 'str' throw raiseNode.raise(inliningTarget, PythonBuiltinClassType.TypeError, ErrorMessages.BAD_ARG_TYPE_FOR_BUILTIN_OP); @@ -250,11 +230,6 @@ static int other(Object x, TruffleString tstring = cast.cast(inliningTarget, x, ErrorMessages.DESCRIPTOR_REQUIRES_S_OBJ_RECEIVED_P, "str", x); return doString(tstring, codePointLengthNode); } - - @TruffleBoundary - private static int intValue(Number result) { - return result.intValue(); - } } @GenerateUncached diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/HiddenAttr.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/HiddenAttr.java index a5978ccf94..5a9a9f2c84 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/HiddenAttr.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/HiddenAttr.java @@ -108,7 +108,6 @@ public final class HiddenAttr { public static final HiddenAttr NATIVE_SLOTS = new HiddenAttr("__native_slots__"); public static final HiddenAttr INSTANCESHAPE = new HiddenAttr("instanceshape"); public static final HiddenAttr STRUCTSEQ_FIELD_NAMES = new HiddenAttr("struct_seq_field_names"); - public static final HiddenAttr PSTRING_NATIVE_DATA = new HiddenAttr("native_string_data"); public static final HiddenAttr PSTRING_UTF8 = new HiddenAttr("utf8"); public static final HiddenAttr PSTRING_WCHAR = new HiddenAttr("wchar"); From bc6838b0bd9a28446c34b076f7b43e55c9459328 Mon Sep 17 00:00:00 2001 From: Florian Angerer Date: Fri, 8 May 2026 15:05:41 +0200 Subject: [PATCH 7/8] Test invalid parameters to PyUnicode_New --- .../src/tests/cpyext/test_unicode.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/graalpython/com.oracle.graal.python.test/src/tests/cpyext/test_unicode.py b/graalpython/com.oracle.graal.python.test/src/tests/cpyext/test_unicode.py index a990dff177..ed0cb4722d 100644 --- a/graalpython/com.oracle.graal.python.test/src/tests/cpyext/test_unicode.py +++ b/graalpython/com.oracle.graal.python.test/src/tests/cpyext/test_unicode.py @@ -79,6 +79,13 @@ def _reference_unicode_escape(args): return _codecs.unicode_escape_encode(args[0])[0] +def _reference_unicode_new(args): + expected = args[3] + if isinstance(expected, type) and issubclass(expected, BaseException): + raise expected() + return expected + + def _reference_fromformat(args): fmt = args[0] fmt_args = args[1:] @@ -897,7 +904,7 @@ class TestPyUnicode(CPyExtTestCase): ) test_PyUnicode_New = CPyExtFunction( - lambda args: args[3], + _reference_unicode_new, lambda: ( (134818, bytearray([0xA2, 0x0E, 0x02, 0x00]), 1, "𠺢"), (134988, bytearray([0xA2, 0x0E, 0x02, 0x00, 0x4C, 0x0F, 0x02, 0x00]), 2, "𠺢𠽌"), @@ -905,9 +912,15 @@ class TestPyUnicode(CPyExtTestCase): (8252, bytearray([0x30, 0x20, 0x3C, 0x20]), 2, "‰‼"), (127, bytearray([0x61, 0x62, 0x63, 0x64]), 4, "abcd"), (127, bytearray([0x61, 0x62, 0x63, 0x64]), 2, "ab"), + (127, bytearray(), -1, SystemError), + (0x10ffff, bytearray(), sys.maxsize, MemoryError), + (0x110000, bytearray(), 1, SystemError), ), code='''PyObject* wrap_PyUnicode_New(Py_ssize_t maxchar, Py_buffer buffer, Py_ssize_t nchars, PyObject* dummy) { PyObject* obj = PyUnicode_New(nchars, (Py_UCS4) maxchar); + if (obj == NULL) { + return NULL; + } void* data = PyUnicode_DATA(obj); size_t char_size; if (maxchar < 256) { From 0d4e617da3e8c0f9ff9a01c04479afed6f53881c Mon Sep 17 00:00:00 2001 From: Florian Angerer Date: Mon, 11 May 2026 10:52:24 +0200 Subject: [PATCH 8/8] Use @CApiConstant for GRAALPY_ATTACH_NATIVE_* --- .../com.oracle.graal.python.cext/src/capi.h | 4 --- .../objects/cext/capi/CApiContext.java | 34 +++++++------------ .../objects/cext/structs/CConstants.java | 3 -- 3 files changed, 13 insertions(+), 28 deletions(-) diff --git a/graalpython/com.oracle.graal.python.cext/src/capi.h b/graalpython/com.oracle.graal.python.cext/src/capi.h index e0165ba9f2..6a24f858ef 100644 --- a/graalpython/com.oracle.graal.python.cext/src/capi.h +++ b/graalpython/com.oracle.graal.python.cext/src/capi.h @@ -68,10 +68,6 @@ typedef int (*graalpy_attach_native_thread_func)(void); typedef void (*graalpy_detach_native_thread_func)(void); -#define GRAALPY_ATTACH_NATIVE_FAILED (-1) -#define GRAALPY_ATTACH_NATIVE_OWNED 1 -#define GRAALPY_ATTACH_NATIVE_FOREIGN 2 - extern graalpy_attach_native_thread_func graalpy_attach_native_thread; extern graalpy_detach_native_thread_func graalpy_detach_native_thread; diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/CApiContext.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/CApiContext.java index 922bfd1d65..37b986e2ab 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/CApiContext.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/CApiContext.java @@ -78,6 +78,7 @@ import org.graalvm.shadowed.com.ibm.icu.text.StringPrepParseException; import com.oracle.graal.python.PythonLanguage; +import com.oracle.graal.python.annotations.CApiConstant; import com.oracle.graal.python.builtins.PythonBuiltinClassType; import com.oracle.graal.python.builtins.modules.cext.PythonCApiAssertions; import com.oracle.graal.python.builtins.modules.cext.PythonCextBuiltinRegistry; @@ -98,7 +99,6 @@ import com.oracle.graal.python.builtins.objects.cext.common.LoadCExtException.ImportException; import com.oracle.graal.python.builtins.objects.cext.common.NativePointer; import com.oracle.graal.python.builtins.objects.cext.copying.NativeLibraryLocator; -import com.oracle.graal.python.builtins.objects.cext.structs.CConstants; import com.oracle.graal.python.builtins.objects.cext.structs.CFields; import com.oracle.graal.python.builtins.objects.cext.structs.CStructAccess; import com.oracle.graal.python.builtins.objects.cext.structs.CStructs; @@ -787,12 +787,12 @@ void runBackgroundGCTask(PythonContext context) { private static final NativeSignature ATTACH_NATIVE_THREAD_SIGNATURE = NativeSignature.create(NativeSimpleType.SINT32); private static final NativeSignature DETACH_NATIVE_THREAD_SIGNATURE = NativeSignature.create(NativeSimpleType.VOID); - /** Must stay in sync with {@code GRAALPY_ATTACH_NATIVE_FAILED} in {@code capi.h}. */ - private static final int ATTACH_NATIVE_THREAD_FAILED = -1; - /** Must stay in sync with {@code GRAALPY_ATTACH_NATIVE_OWNED} in {@code capi.h}. */ - private static final int ATTACH_NATIVE_THREAD_OWNED = 1; - /** Must stay in sync with {@code GRAALPY_ATTACH_NATIVE_FOREIGN} in {@code capi.h}. */ - private static final int ATTACH_NATIVE_THREAD_FOREIGN = 2; + @CApiConstant // + private static final int GRAALPY_ATTACH_NATIVE_FAILED = -1; + @CApiConstant // + private static final int GRAALPY_ATTACH_NATIVE_OWNED = 1; + @CApiConstant // + private static final int GRAALPY_ATTACH_NATIVE_FOREIGN = 2; private static final MethodHandle HANDLE_ATTACH_NATIVE_THREAD; private static final MethodHandle HANDLE_DETACH_NATIVE_THREAD; @@ -809,34 +809,27 @@ void runBackgroundGCTask(PythonContext context) { /** * Called from native threads before using the C API. Returns - * {@link #ATTACH_NATIVE_THREAD_OWNED} if this method entered the context and a matching - * {@link #detachNativeThread()} is required, {@link #ATTACH_NATIVE_THREAD_FOREIGN} if the context + * {@link #GRAALPY_ATTACH_NATIVE_OWNED} if this method entered the context and a matching + * {@link #detachNativeThread()} is required, {@link #GRAALPY_ATTACH_NATIVE_FOREIGN} if the context * was already active on this thread from Truffle/Java/Python, and - * {@link #ATTACH_NATIVE_THREAD_FAILED} if entering failed. + * {@link #GRAALPY_ATTACH_NATIVE_FAILED} if entering failed. */ private int attachNativeThread() { CompilerAsserts.neverPartOfCompilation(); TruffleContext truffleContext = getContext().getEnv().getContext(); if (truffleContext.isActive()) { - return ATTACH_NATIVE_THREAD_FOREIGN; + return GRAALPY_ATTACH_NATIVE_FOREIGN; } try { Object previousContext = truffleContext.enter(null); attachedThreadPreviousContext.set(previousContext); - return ATTACH_NATIVE_THREAD_OWNED; + return GRAALPY_ATTACH_NATIVE_OWNED; } catch (Throwable t) { LOGGER.severe("could not attach native thread to polyglot context: " + t.getMessage()); - return ATTACH_NATIVE_THREAD_FAILED; + return GRAALPY_ATTACH_NATIVE_FAILED; } } - private static boolean assertAttachNativeThreadConstants() { - assert ATTACH_NATIVE_THREAD_FAILED == CConstants.GRAALPY_ATTACH_NATIVE_FAILED.intValue(); - assert ATTACH_NATIVE_THREAD_OWNED == CConstants.GRAALPY_ATTACH_NATIVE_OWNED.intValue(); - assert ATTACH_NATIVE_THREAD_FOREIGN == CConstants.GRAALPY_ATTACH_NATIVE_FOREIGN.intValue(); - return true; - } - /** * Called from native immediately before detaching a thread that was previously attached by * {@link #attachNativeThread()}. @@ -911,7 +904,6 @@ public static CApiContext ensureCapiWasLoaded(Node node, PythonContext context, // This can happen when other languages restrict multithreading LOGGER.warning(() -> "didn't start the background GC task due to: " + e.getMessage()); } - assert assertAttachNativeThreadConstants(); } catch (ImportException e) { context.setCApiState(PythonContext.CApiState.CANNOT_IMPORT); throw e; diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/structs/CConstants.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/structs/CConstants.java index 1f8eab5cc4..93f0eca793 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/structs/CConstants.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/structs/CConstants.java @@ -65,9 +65,6 @@ public enum CConstants { PYLONG_BITS_IN_DIGIT, READONLY, CHAR_MIN, - GRAALPY_ATTACH_NATIVE_FAILED, - GRAALPY_ATTACH_NATIVE_OWNED, - GRAALPY_ATTACH_NATIVE_FOREIGN, _PY_NSMALLNEGINTS, _PY_NSMALLPOSINTS;