diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index ca4a7c216eda53..359d95e68f57aa 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -212,6 +212,32 @@ _PyStackRef_FromPyObjectBorrow(PyObject *obj, const char *filename, int linenumb } #define PyStackRef_FromPyObjectBorrow(obj) _PyStackRef_FromPyObjectBorrow(_PyObject_CAST(obj), __FILE__, __LINE__) +/* Tag a PyObject pointer as a borrowed operand for BORROW variants. */ +static inline uintptr_t +PyStackRef_TagBorrow(PyObject *obj) +{ + return (uintptr_t)obj | Py_TAG_REFCNT; +} + +/* Strip tag bits from a pre-tagged operand to recover the PyObject pointer. */ +static inline PyObject * +PyStackRef_UntagBorrow(PyObject *tagged) +{ + return (PyObject *)((uintptr_t)tagged & ~Py_TAG_BITS); +} + +/* Create a stackref from a pre-tagged operand (tag bits already set). + Used by _LOAD_CONST_INLINE_BORROW variants where the operand is + tagged at trace creation time to avoid tagging on every execution. */ +static inline _PyStackRef +_PyStackRef_FromPreTagged(PyObject *tagged, const char *filename, int linenumber) +{ + assert((uintptr_t)tagged & Py_TAG_REFCNT); + PyObject *obj = (PyObject *)((uintptr_t)tagged & ~Py_TAG_BITS); + return _Py_stackref_create(obj, Py_TAG_REFCNT, filename, linenumber); +} +#define PyStackRef_FromPreTagged(tagged) _PyStackRef_FromPreTagged(_PyObject_CAST(tagged), __FILE__, __LINE__) + static inline void _PyStackRef_CLOSE(_PyStackRef ref, const char *filename, int linenumber) { @@ -617,6 +643,30 @@ PyStackRef_FromPyObjectBorrow(PyObject *obj) return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_REFCNT}; } +/* Tag a PyObject pointer as a borrowed operand for BORROW variants. */ +static inline uintptr_t +PyStackRef_TagBorrow(PyObject *obj) +{ + return (uintptr_t)obj | Py_TAG_REFCNT; +} + +/* Strip tag bits from a pre-tagged operand to recover the PyObject pointer. */ +static inline PyObject * +PyStackRef_UntagBorrow(PyObject *tagged) +{ + return (PyObject *)((uintptr_t)tagged & ~Py_TAG_BITS); +} + +/* Create a stackref from a pre-tagged operand (tag bits already set). + Used by _LOAD_CONST_INLINE_BORROW variants where the operand is + tagged at trace creation time to avoid tagging on every execution. */ +static inline _PyStackRef +PyStackRef_FromPreTagged(PyObject *tagged) +{ + assert((uintptr_t)tagged & Py_TAG_REFCNT); + return (_PyStackRef){ .bits = (uintptr_t)tagged }; +} + /* WARNING: This macro evaluates its argument more than once */ #ifdef _WIN32 #define PyStackRef_DUP(REF) \ diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-04-19-35-32.gh-issue-145742.SU9RYL.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-04-19-35-32.gh-issue-145742.SU9RYL.rst new file mode 100644 index 00000000000000..0909ffe44bd331 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-04-19-35-32.gh-issue-145742.SU9RYL.rst @@ -0,0 +1,2 @@ +Optimize _LOAD_CONST_INLINE_BORROW by pre-tagging operands at trace +creation. Patch by Donghee Na. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 7de889b93b71a7..acec4b6303893e 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -6104,11 +6104,11 @@ dummy_func( } tier2 pure op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { - value = PyStackRef_FromPyObjectBorrow(ptr); + value = PyStackRef_FromPreTagged(ptr); } tier2 op(_SHUFFLE_3_LOAD_CONST_INLINE_BORROW, (ptr/4, callable, null, arg -- res, a, c)) { - res = PyStackRef_FromPyObjectBorrow(ptr); + res = PyStackRef_FromPreTagged(ptr); a = arg; c = callable; INPUTS_DEAD(); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 93d39bee1b9ff6..5323901e74e559 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -22300,7 +22300,7 @@ assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); _PyStackRef value; PyObject *ptr = (PyObject *)CURRENT_OPERAND0_64(); - value = PyStackRef_FromPyObjectBorrow(ptr); + value = PyStackRef_FromPreTagged(ptr); _tos_cache0 = value; SET_CURRENT_CACHED_VALUES(1); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); @@ -22313,7 +22313,7 @@ _PyStackRef value; _PyStackRef _stack_item_0 = _tos_cache0; PyObject *ptr = (PyObject *)CURRENT_OPERAND0_64(); - value = PyStackRef_FromPyObjectBorrow(ptr); + value = PyStackRef_FromPreTagged(ptr); _tos_cache1 = value; _tos_cache0 = _stack_item_0; SET_CURRENT_CACHED_VALUES(2); @@ -22328,7 +22328,7 @@ _PyStackRef _stack_item_0 = _tos_cache0; _PyStackRef _stack_item_1 = _tos_cache1; PyObject *ptr = (PyObject *)CURRENT_OPERAND0_64(); - value = PyStackRef_FromPyObjectBorrow(ptr); + value = PyStackRef_FromPreTagged(ptr); _tos_cache2 = value; _tos_cache1 = _stack_item_1; _tos_cache0 = _stack_item_0; @@ -22348,7 +22348,7 @@ arg = stack_pointer[-1]; callable = stack_pointer[-3]; PyObject *ptr = (PyObject *)CURRENT_OPERAND0_64(); - res = PyStackRef_FromPyObjectBorrow(ptr); + res = PyStackRef_FromPreTagged(ptr); a = arg; c = callable; _tos_cache2 = c; @@ -22373,7 +22373,7 @@ arg = _stack_item_0; callable = stack_pointer[-2]; PyObject *ptr = (PyObject *)CURRENT_OPERAND0_64(); - res = PyStackRef_FromPyObjectBorrow(ptr); + res = PyStackRef_FromPreTagged(ptr); a = arg; c = callable; _tos_cache2 = c; @@ -22399,7 +22399,7 @@ arg = _stack_item_1; callable = stack_pointer[-1]; PyObject *ptr = (PyObject *)CURRENT_OPERAND0_64(); - res = PyStackRef_FromPyObjectBorrow(ptr); + res = PyStackRef_FromPreTagged(ptr); a = arg; c = callable; _tos_cache2 = c; @@ -22426,7 +22426,7 @@ arg = _stack_item_2; callable = _stack_item_0; PyObject *ptr = (PyObject *)CURRENT_OPERAND0_64(); - res = PyStackRef_FromPyObjectBorrow(ptr); + res = PyStackRef_FromPreTagged(ptr); a = arg; c = callable; _tos_cache2 = c; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 095bcfc639bc65..60496247216893 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -177,12 +177,13 @@ convert_global_to_const(_PyUOpInstruction *inst, PyObject *obj) if (res == NULL) { return NULL; } - if (_Py_IsImmortal(res)) { + bool borrow = _Py_IsImmortal(res); + if (borrow) { inst->opcode = _LOAD_CONST_INLINE_BORROW; } else { inst->opcode = _LOAD_CONST_INLINE; } - inst->operand0 = (uint64_t)res; + inst->operand0 = borrow ? PyStackRef_TagBorrow(res) : (uint64_t)res; return res; } @@ -233,7 +234,13 @@ add_op(JitOptContext *ctx, _PyUOpInstruction *this_instr, out->format = this_instr->format; out->oparg = (oparg); out->target = this_instr->target; - out->operand0 = (operand0); + if (opcode == _LOAD_CONST_INLINE_BORROW || + opcode == _SHUFFLE_3_LOAD_CONST_INLINE_BORROW) { + out->operand0 = PyStackRef_TagBorrow((PyObject *)operand0); + } + else { + out->operand0 = (operand0); + } out->operand1 = this_instr->operand1; ctx->out_buffer.next++; } diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index daebef4a04320b..cda62c6902ff16 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -890,7 +890,7 @@ dummy_func(void) { } op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { - value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + value = PyJitRef_Borrow(sym_new_const(ctx, PyStackRef_UntagBorrow(ptr))); } op(_POP_TOP_OPARG, (args[oparg] --)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index c3c889e9de9a7e..fc72d66f8aa6af 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -5489,7 +5489,7 @@ case _LOAD_CONST_INLINE_BORROW: { JitOptRef value; PyObject *ptr = (PyObject *)this_instr->operand0; - value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + value = PyJitRef_Borrow(sym_new_const(ctx, PyStackRef_UntagBorrow(ptr))); CHECK_STACK_BOUNDS(1); stack_pointer[0] = value; stack_pointer += 1; diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 414ca18be4654c..93999c9afc05ab 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -600,6 +600,7 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_CLOSE_SPECIALIZED", "PyStackRef_DUP", "PyStackRef_False", + "PyStackRef_FromPreTagged", "PyStackRef_FromPyObjectBorrow", "PyStackRef_FromPyObjectNew", "PyStackRef_FromPyObjectSteal", @@ -766,7 +767,7 @@ def escaping_call_in_simple_stmt(stmt: SimpleStmt, result: dict[SimpleStmt, Esca continue #if not tkn.text.startswith(("Py", "_Py", "monitor")): # continue - if tkn.text.startswith(("sym_", "optimize_", "PyJitRef")): + if tkn.text.startswith(("sym_", "optimize_", "PyJitRef", "PyStackRef_Tag", "PyStackRef_Untag")): # Optimize functions continue if tkn.text.endswith("Check"):