Skip to content

Commit 33093df

Browse files
committed
add record slots map
1 parent 1004eff commit 33093df

File tree

7 files changed

+201
-110
lines changed

7 files changed

+201
-110
lines changed

Include/internal/pycore_opcode_metadata.h

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_optimizer.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -489,8 +489,15 @@ typedef struct {
489489
uint8_t count;
490490
uint8_t indices[MAX_RECORDED_VALUES];
491491
} _PyOpcodeRecordEntry;
492+
493+
typedef struct {
494+
uint8_t count;
495+
uint8_t needs_family_transform;
496+
uint8_t slots[MAX_RECORDED_VALUES];
497+
} _PyOpcodeRecordSlotMap;
498+
492499
PyAPI_DATA(const _PyOpcodeRecordEntry) _PyOpcode_RecordEntries[256];
493-
PyAPI_DATA(const uint8_t) _PyOpcode_RecordIsFamilyOverride[256];
500+
PyAPI_DATA(const _PyOpcodeRecordSlotMap) _PyOpcode_RecordSlotMaps[256];
494501
#endif
495502

496503
#ifdef __cplusplus

Lib/test/test_generated_cases.py

Lines changed: 61 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2087,6 +2087,17 @@ def generate_tables(self, input: str) -> str:
20872087
record_function_generator.generate_recorder_tables(analysis, out)
20882088
return buf.getvalue()
20892089

2090+
def get_slot_map_section(self, output: str) -> str:
2091+
return output.split(
2092+
"const _PyOpcodeRecordSlotMap _PyOpcode_RecordSlotMaps[256] = {\n",
2093+
1,
2094+
)[1].split("};\n\n", 1)[0]
2095+
2096+
def assert_slot_map_lines(self, output: str, *lines: str) -> None:
2097+
slot_map_section = self.get_slot_map_section(output)
2098+
for line in lines:
2099+
self.assertIn(line, slot_map_section)
2100+
20902101
def test_single_recording_uop_generates_count(self):
20912102
input = """
20922103
tier2 op(_RECORD_TOS, (value -- value)) {
@@ -2145,25 +2156,67 @@ def test_four_recording_uops_rejected(self):
21452156
with self.assertRaisesRegex(ValueError, "exceeds MAX_RECORDED_VALUES"):
21462157
self.generate_tables(input)
21472158

2148-
def test_family_member_inherits_recorder_shape(self):
2159+
def test_family_member_needs_transform_only_when_shape_changes(self):
21492160
input = """
21502161
tier2 op(_RECORD_TOS, (value -- value)) {
21512162
RECORD_VALUE(value);
21522163
}
2164+
tier2 op(_RECORD_TOS_TYPE, (value -- value)) {
2165+
RECORD_VALUE(Py_TYPE(value));
2166+
}
21532167
op(_DO_STUFF, (value -- res)) {
21542168
res = value;
21552169
}
2156-
macro(OP) = _RECORD_TOS + _DO_STUFF;
2157-
inst(OP_SPECIALIZED, (value -- res)) {
2158-
res = value;
2170+
macro(OP_RAW) = _RECORD_TOS + _DO_STUFF;
2171+
macro(OP_RAW_SPECIALIZED) = _RECORD_TOS_TYPE + _DO_STUFF;
2172+
family(OP_RAW, INLINE_CACHE_ENTRIES_OP_RAW) = { OP_RAW_SPECIALIZED };
2173+
2174+
macro(OP_TYPED) = _RECORD_TOS_TYPE + _DO_STUFF;
2175+
macro(OP_TYPED_SPECIALIZED) = _RECORD_TOS_TYPE + _DO_STUFF;
2176+
family(OP_TYPED, INLINE_CACHE_ENTRIES_OP_TYPED) = { OP_TYPED_SPECIALIZED };
2177+
"""
2178+
output = self.generate_tables(input)
2179+
self.assert_slot_map_lines(
2180+
output,
2181+
"[OP_RAW_SPECIALIZED] = {1, 1, {0}}",
2182+
"[OP_TYPED_SPECIALIZED] = {1, 0, {0}}",
2183+
)
2184+
2185+
def test_family_member_maps_positional_recorders_to_family_slots(self):
2186+
input = """
2187+
tier2 op(_RECORD_TOS, (sub -- sub)) {
2188+
RECORD_VALUE(sub);
21592189
}
2190+
tier2 op(_RECORD_NOS, (container, sub -- container, sub)) {
2191+
RECORD_VALUE(container);
2192+
}
2193+
op(_DO_STUFF, (container, sub -- res)) {
2194+
res = container;
2195+
}
2196+
macro(OP) = _RECORD_TOS + _RECORD_NOS + _DO_STUFF;
2197+
macro(OP_SPECIALIZED) = _RECORD_NOS + _DO_STUFF;
21602198
family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
21612199
"""
21622200
output = self.generate_tables(input)
2163-
self.assertIn("[OP] = {1, {_RECORD_TOS_INDEX}}", output)
2164-
self.assertIn("[OP_SPECIALIZED] = {1, {_RECORD_TOS_INDEX}}", output)
2165-
self.assertIn("const uint8_t _PyOpcode_RecordIsFamilyOverride[256] = {", output)
2166-
self.assertIn("[OP_SPECIALIZED] = 1", output)
2201+
self.assert_slot_map_lines(output, "[OP_SPECIALIZED] = {1, 0, {1}}")
2202+
2203+
def test_family_member_maps_non_positional_recorders_by_stack_shape(self):
2204+
input = """
2205+
tier2 op(_RECORD_CALLABLE, (callable, self, args[oparg] -- callable, self, args[oparg])) {
2206+
RECORD_VALUE(callable);
2207+
}
2208+
tier2 op(_RECORD_BOUND_METHOD, (callable, self, args[oparg] -- callable, self, args[oparg])) {
2209+
RECORD_VALUE(callable);
2210+
}
2211+
op(_DO_STUFF, (callable, self, args[oparg] -- res)) {
2212+
res = callable;
2213+
}
2214+
macro(OP) = _RECORD_CALLABLE + _DO_STUFF;
2215+
macro(OP_SPECIALIZED) = _RECORD_BOUND_METHOD + _DO_STUFF;
2216+
family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
2217+
"""
2218+
output = self.generate_tables(input)
2219+
self.assert_slot_map_lines(output, "[OP_SPECIALIZED] = {1, 1, {0}}")
21672220

21682221
class TestGeneratedAbstractCases(unittest.TestCase):
21692222
def setUp(self) -> None:

Python/bytecodes.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1371,7 +1371,6 @@ dummy_func(
13711371
}
13721372

13731373
macro(BINARY_OP_SUBSCR_GETITEM) =
1374-
_RECORD_TOS +
13751374
_RECORD_NOS +
13761375
unused/5 + // Skip over the counter and cache
13771376
_CHECK_PEP_523 +
@@ -3688,7 +3687,7 @@ dummy_func(
36883687
}
36893688

36903689
macro(GET_ITER) =
3691-
_RECORD_TOS +
3690+
_RECORD_TOS_TYPE +
36923691
_SPECIALIZE_GET_ITER +
36933692
_GET_ITER;
36943693

Python/optimizer.c

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -828,6 +828,7 @@ _PyJit_translate_single_bytecode_to_trace(
828828
trace->end -= 2;
829829

830830
const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode];
831+
const _PyOpcodeRecordSlotMap *record_slot_map = &_PyOpcode_RecordSlotMaps[opcode];
831832

832833
assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);
833834
assert(!_PyErr_Occurred(tstate));
@@ -1008,15 +1009,17 @@ _PyJit_translate_single_bytecode_to_trace(
10081009
operand = next->op.arg;
10091010
}
10101011
else if (_PyUop_Flags[uop] & HAS_RECORDS_VALUE_FLAG) {
1011-
PyObject *recorded_value = tracer->prev_state.recorded_values[record_idx];
1012-
tracer->prev_state.recorded_values[record_idx] = NULL;
1012+
assert(record_idx < record_slot_map->count);
1013+
uint8_t record_slot = record_slot_map->slots[record_idx];
10131014
record_idx++;
1014-
if (_PyOpcode_RecordIsFamilyOverride[opcode] &&
1015-
recorded_value != NULL) {
1016-
_Py_RecordTraceTransformFn transform = record_trace_transforms[uop];
1017-
if (transform != NULL) {
1018-
recorded_value = transform(recorded_value);
1019-
}
1015+
assert(record_slot < tracer->prev_state.recorded_count);
1016+
1017+
PyObject *recorded_value = tracer->prev_state.recorded_values[record_slot];
1018+
tracer->prev_state.recorded_values[record_slot] = NULL;
1019+
_Py_RecordTraceTransformFn transform =
1020+
record_slot_map->needs_family_transform ? record_trace_transforms[uop] : NULL;
1021+
if (transform != NULL && recorded_value != NULL) {
1022+
recorded_value = transform(recorded_value);
10201023
}
10211024
operand = (uintptr_t)recorded_value;
10221025
}

Python/record_functions.c.h

Lines changed: 53 additions & 77 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)