diff --git a/mypyc/irbuild/ll_builder.py b/mypyc/irbuild/ll_builder.py index dcb352679cf8..8c6b673e3273 100644 --- a/mypyc/irbuild/ll_builder.py +++ b/mypyc/irbuild/ll_builder.py @@ -459,8 +459,6 @@ def coerce_int_to_fixed_width(self, src: Value, target_type: RType, line: int) - assert is_fixed_width_rtype(target_type), target_type assert isinstance(target_type, RPrimitive), target_type - res = Register(target_type) - fast, slow, end = BasicBlock(), BasicBlock(), BasicBlock() check = self.check_tagged_short_int(src, line) @@ -471,37 +469,20 @@ def coerce_int_to_fixed_width(self, src: Value, target_type: RType, line: int) - size = target_type.size if size < int_rprimitive.size: # Add a range check when the target type is smaller than the source type - fast2, fast3 = BasicBlock(), BasicBlock() - upper_bound = 1 << (size * 8 - 1) - if not target_type.is_signed: - upper_bound *= 2 - check2 = self.add(ComparisonOp(src, Integer(upper_bound, src.type), ComparisonOp.SLT)) - self.add(Branch(check2, fast2, slow, Branch.BOOL)) - self.activate_block(fast2) - if target_type.is_signed: - lower_bound = -upper_bound - else: - lower_bound = 0 - check3 = self.add(ComparisonOp(src, Integer(lower_bound, src.type), ComparisonOp.SGE)) - self.add(Branch(check3, fast3, slow, Branch.BOOL)) - self.activate_block(fast3) - tmp = self.int_op( - c_pyssize_t_rprimitive, - src, - Integer(1, c_pyssize_t_rprimitive), - IntOp.RIGHT_SHIFT, - line, + # Use helper method to generate range checking and conversion + res = self.coerce_tagged_to_fixed_width_with_range_check( + src, target_type, int_rprimitive.size, slow, end, line ) - tmp = self.add(Truncate(tmp, target_type)) else: + # No range check needed when target is same size or larger if size > int_rprimitive.size: tmp = self.add(Extend(src, target_type, signed=True)) else: tmp = src tmp = self.int_op(target_type, tmp, Integer(1, target_type), IntOp.RIGHT_SHIFT, line) - - self.add(Assign(res, tmp)) - self.goto(end) + res = Register(target_type) + self.add(Assign(res, tmp)) + self.goto(end) self.activate_block(slow) if is_int64_rprimitive(target_type) or ( @@ -521,31 +502,122 @@ def coerce_int_to_fixed_width(self, src: Value, target_type: RType, line: int) - self.add(Assign(res, tmp)) self.add(KeepAlive([src])) self.goto(end) - elif is_int32_rprimitive(target_type): + else: # Slow path just always generates an OverflowError + self.emit_fixed_width_overflow_error(target_type, line) + + self.activate_block(end) + return res + + def coerce_tagged_to_fixed_width_with_range_check( + self, + src: Value, + target_type: RType, + source_size: int, + overflow_block: BasicBlock, + success_block: BasicBlock, + line: int, + ) -> Register: + """Helper to convert a tagged value to a smaller fixed-width type with range checking. + + This method generates IR for converting a tagged integer (like short_int or the fast + path of int) to a smaller fixed-width type (i32, i16, uint8) with overflow detection. + + The method performs range checks and branches to overflow_block on failure, or + success_block on success (after assigning the result to a register). + + Args: + src: Tagged source value (with tag bit set) + target_type: Target fixed-width type (must be smaller than source_size) + source_size: Size in bytes of the source type + overflow_block: Block to branch to on overflow + success_block: Block to goto after successful conversion + line: Line number + + Returns: + Result register containing the converted value (valid in success_block) + """ + assert is_fixed_width_rtype(target_type), target_type + assert isinstance(target_type, RPrimitive), target_type + size = target_type.size + assert size < source_size, (target_type, size, source_size) + + res = Register(target_type) + in_range, in_range2 = BasicBlock(), BasicBlock() + + # Calculate bounds for the target type (in tagged representation) + upper_bound = 1 << (size * 8 - 1) + if not target_type.is_signed: + upper_bound *= 2 + + # Check if value < upper_bound + check_upper = self.add(ComparisonOp(src, Integer(upper_bound, src.type), ComparisonOp.SLT)) + self.add(Branch(check_upper, in_range, overflow_block, Branch.BOOL)) + + self.activate_block(in_range) + + # Check if value >= lower_bound + if target_type.is_signed: + lower_bound = -upper_bound + else: + lower_bound = 0 + check_lower = self.add(ComparisonOp(src, Integer(lower_bound, src.type), ComparisonOp.SGE)) + self.add(Branch(check_lower, in_range2, overflow_block, Branch.BOOL)) + + self.activate_block(in_range2) + + # Value is in range - shift right to remove tag, then truncate + shifted = self.int_op( + c_pyssize_t_rprimitive, + src, + Integer(1, c_pyssize_t_rprimitive), + IntOp.RIGHT_SHIFT, + line, + ) + tmp = self.add(Truncate(shifted, target_type)) + self.add(Assign(res, tmp)) + self.goto(success_block) + + return res + + def emit_fixed_width_overflow_error(self, target_type: RType, line: int) -> None: + """Emit overflow error for fixed-width type conversion.""" + if is_int32_rprimitive(target_type): self.call_c(int32_overflow, [], line) - self.add(Unreachable()) elif is_int16_rprimitive(target_type): - # Slow path just always generates an OverflowError self.call_c(int16_overflow, [], line) - self.add(Unreachable()) elif is_uint8_rprimitive(target_type): - # Slow path just always generates an OverflowError self.call_c(uint8_overflow, [], line) - self.add(Unreachable()) else: assert False, target_type - - self.activate_block(end) - return res + self.add(Unreachable()) def coerce_short_int_to_fixed_width(self, src: Value, target_type: RType, line: int) -> Value: + # short_int (CPyTagged) is guaranteed to be a tagged value, never a pointer, + # so we don't need the fast/slow path split like coerce_int_to_fixed_width. + # However, we still need range checking when target type is smaller than source. + assert is_fixed_width_rtype(target_type), target_type + assert isinstance(target_type, RPrimitive), target_type + if is_int64_rprimitive(target_type) or ( PLATFORM_SIZE == 4 and is_int32_rprimitive(target_type) ): + # No range check needed - target is same size or larger than source return self.int_op(target_type, src, Integer(1, target_type), IntOp.RIGHT_SHIFT, line) - # TODO: i32 on 64-bit platform - assert False, (src.type, target_type, PLATFORM_SIZE) + + # Target is smaller than source - need range checking + # Use helper method to generate range checking and conversion + overflow, end = BasicBlock(), BasicBlock() + res = self.coerce_tagged_to_fixed_width_with_range_check( + src, target_type, short_int_rprimitive.size, overflow, end, line + ) + + # Handle overflow case + self.activate_block(overflow) + self.emit_fixed_width_overflow_error(target_type, line) + + self.activate_block(end) + return res def coerce_fixed_width_to_int(self, src: Value, line: int) -> Value: if ( diff --git a/mypyc/test-data/irbuild-str.test b/mypyc/test-data/irbuild-str.test index b199f0706ec6..ee618bb34f65 100644 --- a/mypyc/test-data/irbuild-str.test +++ b/mypyc/test-data/irbuild-str.test @@ -508,11 +508,19 @@ L0: return r6 [case testOrdOfStrIndex_64bit] -from mypy_extensions import i64 +from mypy_extensions import i64, i32, i16, u8 def ord_str_index(s: str, i: int) -> int: return ord(s[i]) def ord_str_index_i64(s: str, i: i64) -> int: return ord(s[i]) +def ord_str_index_to_i32(s: str, i: i64) -> i32: + return ord(s[i]) +def ord_str_index_to_i16(s: str, i: i64) -> i16: + return ord(s[i]) +def ord_str_index_to_u8(s: str, i: i64) -> u8: + return ord(s[i]) +def ord_str_index_to_i64(s: str, i: i64) -> i64: + return ord(s[i]) [typing fixtures/typing-full.pyi] [out] def ord_str_index(s, i): @@ -565,6 +573,119 @@ L1: L2: r3 = CPyStr_GetItemUnsafeAsInt(s, r0) return r3 +def ord_str_index_to_i32(s, i): + s :: str + i, r0 :: i64 + r1, r2 :: bool + r3 :: short_int + r4, r5 :: bit + r6 :: native_int + r7, r8 :: i32 +L0: + r0 = CPyStr_AdjustIndex(s, i) + r1 = CPyStr_RangeCheck(s, r0) + if r1 goto L2 else goto L1 :: bool +L1: + r2 = raise IndexError('index out of range') + unreachable +L2: + r3 = CPyStr_GetItemUnsafeAsInt(s, r0) + r4 = r3 < 4294967296 :: signed + if r4 goto L3 else goto L5 :: bool +L3: + r5 = r3 >= -4294967296 :: signed + if r5 goto L4 else goto L5 :: bool +L4: + r6 = r3 >> 1 + r7 = truncate r6: native_int to i32 + r8 = r7 + goto L6 +L5: + CPyInt32_Overflow() + unreachable +L6: + return r8 +def ord_str_index_to_i16(s, i): + s :: str + i, r0 :: i64 + r1, r2 :: bool + r3 :: short_int + r4, r5 :: bit + r6 :: native_int + r7, r8 :: i16 +L0: + r0 = CPyStr_AdjustIndex(s, i) + r1 = CPyStr_RangeCheck(s, r0) + if r1 goto L2 else goto L1 :: bool +L1: + r2 = raise IndexError('index out of range') + unreachable +L2: + r3 = CPyStr_GetItemUnsafeAsInt(s, r0) + r4 = r3 < 65536 :: signed + if r4 goto L3 else goto L5 :: bool +L3: + r5 = r3 >= -65536 :: signed + if r5 goto L4 else goto L5 :: bool +L4: + r6 = r3 >> 1 + r7 = truncate r6: native_int to i16 + r8 = r7 + goto L6 +L5: + CPyInt16_Overflow() + unreachable +L6: + return r8 +def ord_str_index_to_u8(s, i): + s :: str + i, r0 :: i64 + r1, r2 :: bool + r3 :: short_int + r4, r5 :: bit + r6 :: native_int + r7, r8 :: u8 +L0: + r0 = CPyStr_AdjustIndex(s, i) + r1 = CPyStr_RangeCheck(s, r0) + if r1 goto L2 else goto L1 :: bool +L1: + r2 = raise IndexError('index out of range') + unreachable +L2: + r3 = CPyStr_GetItemUnsafeAsInt(s, r0) + r4 = r3 < 512 :: signed + if r4 goto L3 else goto L5 :: bool +L3: + r5 = r3 >= 0 :: signed + if r5 goto L4 else goto L5 :: bool +L4: + r6 = r3 >> 1 + r7 = truncate r6: native_int to u8 + r8 = r7 + goto L6 +L5: + CPyUInt8_Overflow() + unreachable +L6: + return r8 +def ord_str_index_to_i64(s, i): + s :: str + i, r0 :: i64 + r1, r2 :: bool + r3 :: short_int + r4 :: i64 +L0: + r0 = CPyStr_AdjustIndex(s, i) + r1 = CPyStr_RangeCheck(s, r0) + if r1 goto L2 else goto L1 :: bool +L1: + r2 = raise IndexError('index out of range') + unreachable +L2: + r3 = CPyStr_GetItemUnsafeAsInt(s, r0) + r4 = r3 >> 1 + return r4 [case testStrip] from typing import NewType, Union diff --git a/mypyc/test-data/run-strings.test b/mypyc/test-data/run-strings.test index de976bbab78a..b13fc8dfe83f 100644 --- a/mypyc/test-data/run-strings.test +++ b/mypyc/test-data/run-strings.test @@ -808,7 +808,7 @@ def test_chr() -> None: [case testOrd] from testutil import assertRaises -from mypy_extensions import i64, i32, i16 +from mypy_extensions import i64, i32, i16, u8 def test_ord() -> None: assert ord(' ') == 32 @@ -906,6 +906,46 @@ def test_ord_str_index_unicode_mix() -> None: assert ord(s[2 + int()]) == 20320 # 3-byte assert ord(s[3 + int()]) == 128512 # 4-byte +def test_ord_str_index_to_fixed_width() -> None: + # Test i32 coercion (signed 32-bit: -2^31 to 2^31-1) + # All valid Unicode code points fit in i32, so test min/max boundaries + s_i32_min = chr(0) + assert i32(ord(s_i32_min[0 + int()])) == 0 + + s_i32_max = chr(0x10FFFF) # Max Unicode code point + assert i32(ord(s_i32_max[0 + int()])) == 0x10FFFF + + # Test i16 coercion (signed 16-bit: -2^15 to 2^15-1, i.e., -32768 to 32767) + # ord() returns non-negative, so test 0 to 32767 + s_i16_min = chr(0) + assert i16(ord(s_i16_min[0 + int()])) == 0 + + s_i16_max = chr(32767) # 2^15 - 1 + assert i16(ord(s_i16_max[0 + int()])) == 32767 + + s_i16_overflow = chr(32768) # 2^15 + with assertRaises(ValueError, "int too large to convert to i16"): + i16(ord(s_i16_overflow[0 + int()])) + + s_i16_overflow2 = chr(32769) # 2^15 + 1 + with assertRaises(ValueError, "int too large to convert to i16"): + i16(ord(s_i16_overflow2[0 + int()])) + + # Test u8 coercion (unsigned 8-bit: 0 to 2^8-1, i.e., 0 to 255) + s_u8_min = chr(0) + assert u8(ord(s_u8_min[0 + int()])) == 0 + + s_u8_max = chr(255) + assert u8(ord(s_u8_max[0 + int()])) == 255 + + s_u8_overflow = chr(256) + with assertRaises(ValueError, "int too large or small to convert to u8"): + u8(ord(s_u8_overflow[0 + int()])) + + s_u8_overflow2 = chr(257) + with assertRaises(ValueError, "int too large or small to convert to u8"): + u8(ord(s_u8_overflow2[0 + int()])) + [case testDecode] from testutil import assertRaises