Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 109 additions & 37 deletions mypyc/irbuild/ll_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,8 +459,6 @@ def coerce_int_to_fixed_width(self, src: Value, target_type: RType, line: int) -
assert is_fixed_width_rtype(target_type), target_type
assert isinstance(target_type, RPrimitive), target_type

res = Register(target_type)

fast, slow, end = BasicBlock(), BasicBlock(), BasicBlock()

check = self.check_tagged_short_int(src, line)
Expand All @@ -471,37 +469,20 @@ def coerce_int_to_fixed_width(self, src: Value, target_type: RType, line: int) -
size = target_type.size
if size < int_rprimitive.size:
# Add a range check when the target type is smaller than the source type
fast2, fast3 = BasicBlock(), BasicBlock()
upper_bound = 1 << (size * 8 - 1)
if not target_type.is_signed:
upper_bound *= 2
check2 = self.add(ComparisonOp(src, Integer(upper_bound, src.type), ComparisonOp.SLT))
self.add(Branch(check2, fast2, slow, Branch.BOOL))
self.activate_block(fast2)
if target_type.is_signed:
lower_bound = -upper_bound
else:
lower_bound = 0
check3 = self.add(ComparisonOp(src, Integer(lower_bound, src.type), ComparisonOp.SGE))
self.add(Branch(check3, fast3, slow, Branch.BOOL))
self.activate_block(fast3)
tmp = self.int_op(
c_pyssize_t_rprimitive,
src,
Integer(1, c_pyssize_t_rprimitive),
IntOp.RIGHT_SHIFT,
line,
# Use helper method to generate range checking and conversion
res = self.coerce_tagged_to_fixed_width_with_range_check(
src, target_type, int_rprimitive.size, slow, end, line
)
tmp = self.add(Truncate(tmp, target_type))
else:
# No range check needed when target is same size or larger
if size > int_rprimitive.size:
tmp = self.add(Extend(src, target_type, signed=True))
else:
tmp = src
tmp = self.int_op(target_type, tmp, Integer(1, target_type), IntOp.RIGHT_SHIFT, line)

self.add(Assign(res, tmp))
self.goto(end)
res = Register(target_type)
self.add(Assign(res, tmp))
self.goto(end)

self.activate_block(slow)
if is_int64_rprimitive(target_type) or (
Expand All @@ -521,31 +502,122 @@ def coerce_int_to_fixed_width(self, src: Value, target_type: RType, line: int) -
self.add(Assign(res, tmp))
self.add(KeepAlive([src]))
self.goto(end)
elif is_int32_rprimitive(target_type):
else:
# Slow path just always generates an OverflowError
self.emit_fixed_width_overflow_error(target_type, line)

self.activate_block(end)
return res

def coerce_tagged_to_fixed_width_with_range_check(
self,
src: Value,
target_type: RType,
source_size: int,
overflow_block: BasicBlock,
success_block: BasicBlock,
line: int,
) -> Register:
"""Helper to convert a tagged value to a smaller fixed-width type with range checking.

This method generates IR for converting a tagged integer (like short_int or the fast
path of int) to a smaller fixed-width type (i32, i16, uint8) with overflow detection.

The method performs range checks and branches to overflow_block on failure, or
success_block on success (after assigning the result to a register).

Args:
src: Tagged source value (with tag bit set)
target_type: Target fixed-width type (must be smaller than source_size)
source_size: Size in bytes of the source type
overflow_block: Block to branch to on overflow
success_block: Block to goto after successful conversion
line: Line number

Returns:
Result register containing the converted value (valid in success_block)
"""
assert is_fixed_width_rtype(target_type), target_type
assert isinstance(target_type, RPrimitive), target_type
size = target_type.size
assert size < source_size, (target_type, size, source_size)

res = Register(target_type)
in_range, in_range2 = BasicBlock(), BasicBlock()

# Calculate bounds for the target type (in tagged representation)
upper_bound = 1 << (size * 8 - 1)
if not target_type.is_signed:
upper_bound *= 2

# Check if value < upper_bound
check_upper = self.add(ComparisonOp(src, Integer(upper_bound, src.type), ComparisonOp.SLT))
self.add(Branch(check_upper, in_range, overflow_block, Branch.BOOL))

self.activate_block(in_range)

# Check if value >= lower_bound
if target_type.is_signed:
lower_bound = -upper_bound
else:
lower_bound = 0
check_lower = self.add(ComparisonOp(src, Integer(lower_bound, src.type), ComparisonOp.SGE))
self.add(Branch(check_lower, in_range2, overflow_block, Branch.BOOL))

self.activate_block(in_range2)

# Value is in range - shift right to remove tag, then truncate
shifted = self.int_op(
c_pyssize_t_rprimitive,
src,
Integer(1, c_pyssize_t_rprimitive),
IntOp.RIGHT_SHIFT,
line,
)
tmp = self.add(Truncate(shifted, target_type))
self.add(Assign(res, tmp))
self.goto(success_block)

return res

def emit_fixed_width_overflow_error(self, target_type: RType, line: int) -> None:
"""Emit overflow error for fixed-width type conversion."""
if is_int32_rprimitive(target_type):
self.call_c(int32_overflow, [], line)
self.add(Unreachable())
elif is_int16_rprimitive(target_type):
# Slow path just always generates an OverflowError
self.call_c(int16_overflow, [], line)
self.add(Unreachable())
elif is_uint8_rprimitive(target_type):
# Slow path just always generates an OverflowError
self.call_c(uint8_overflow, [], line)
self.add(Unreachable())
else:
assert False, target_type

self.activate_block(end)
return res
self.add(Unreachable())

def coerce_short_int_to_fixed_width(self, src: Value, target_type: RType, line: int) -> Value:
# short_int (CPyTagged) is guaranteed to be a tagged value, never a pointer,
# so we don't need the fast/slow path split like coerce_int_to_fixed_width.
# However, we still need range checking when target type is smaller than source.
assert is_fixed_width_rtype(target_type), target_type
assert isinstance(target_type, RPrimitive), target_type

if is_int64_rprimitive(target_type) or (
PLATFORM_SIZE == 4 and is_int32_rprimitive(target_type)
):
# No range check needed - target is same size or larger than source
return self.int_op(target_type, src, Integer(1, target_type), IntOp.RIGHT_SHIFT, line)
# TODO: i32 on 64-bit platform
assert False, (src.type, target_type, PLATFORM_SIZE)

# Target is smaller than source - need range checking
# Use helper method to generate range checking and conversion
overflow, end = BasicBlock(), BasicBlock()
res = self.coerce_tagged_to_fixed_width_with_range_check(
src, target_type, short_int_rprimitive.size, overflow, end, line
)

# Handle overflow case
self.activate_block(overflow)
self.emit_fixed_width_overflow_error(target_type, line)

self.activate_block(end)
return res

def coerce_fixed_width_to_int(self, src: Value, line: int) -> Value:
if (
Expand Down
123 changes: 122 additions & 1 deletion mypyc/test-data/irbuild-str.test
Original file line number Diff line number Diff line change
Expand Up @@ -508,11 +508,19 @@ L0:
return r6

[case testOrdOfStrIndex_64bit]
from mypy_extensions import i64
from mypy_extensions import i64, i32, i16, u8
def ord_str_index(s: str, i: int) -> int:
return ord(s[i])
def ord_str_index_i64(s: str, i: i64) -> int:
return ord(s[i])
def ord_str_index_to_i32(s: str, i: i64) -> i32:
return ord(s[i])
def ord_str_index_to_i16(s: str, i: i64) -> i16:
return ord(s[i])
def ord_str_index_to_u8(s: str, i: i64) -> u8:
return ord(s[i])
def ord_str_index_to_i64(s: str, i: i64) -> i64:
return ord(s[i])
[typing fixtures/typing-full.pyi]
[out]
def ord_str_index(s, i):
Expand Down Expand Up @@ -565,6 +573,119 @@ L1:
L2:
r3 = CPyStr_GetItemUnsafeAsInt(s, r0)
return r3
def ord_str_index_to_i32(s, i):
s :: str
i, r0 :: i64
r1, r2 :: bool
r3 :: short_int
r4, r5 :: bit
r6 :: native_int
r7, r8 :: i32
L0:
r0 = CPyStr_AdjustIndex(s, i)
r1 = CPyStr_RangeCheck(s, r0)
if r1 goto L2 else goto L1 :: bool
L1:
r2 = raise IndexError('index out of range')
unreachable
L2:
r3 = CPyStr_GetItemUnsafeAsInt(s, r0)
r4 = r3 < 4294967296 :: signed
if r4 goto L3 else goto L5 :: bool
L3:
r5 = r3 >= -4294967296 :: signed
if r5 goto L4 else goto L5 :: bool
L4:
r6 = r3 >> 1
r7 = truncate r6: native_int to i32
r8 = r7
goto L6
L5:
CPyInt32_Overflow()
unreachable
L6:
return r8
def ord_str_index_to_i16(s, i):
s :: str
i, r0 :: i64
r1, r2 :: bool
r3 :: short_int
r4, r5 :: bit
r6 :: native_int
r7, r8 :: i16
L0:
r0 = CPyStr_AdjustIndex(s, i)
r1 = CPyStr_RangeCheck(s, r0)
if r1 goto L2 else goto L1 :: bool
L1:
r2 = raise IndexError('index out of range')
unreachable
L2:
r3 = CPyStr_GetItemUnsafeAsInt(s, r0)
r4 = r3 < 65536 :: signed
if r4 goto L3 else goto L5 :: bool
L3:
r5 = r3 >= -65536 :: signed
if r5 goto L4 else goto L5 :: bool
L4:
r6 = r3 >> 1
r7 = truncate r6: native_int to i16
r8 = r7
goto L6
L5:
CPyInt16_Overflow()
unreachable
L6:
return r8
def ord_str_index_to_u8(s, i):
s :: str
i, r0 :: i64
r1, r2 :: bool
r3 :: short_int
r4, r5 :: bit
r6 :: native_int
r7, r8 :: u8
L0:
r0 = CPyStr_AdjustIndex(s, i)
r1 = CPyStr_RangeCheck(s, r0)
if r1 goto L2 else goto L1 :: bool
L1:
r2 = raise IndexError('index out of range')
unreachable
L2:
r3 = CPyStr_GetItemUnsafeAsInt(s, r0)
r4 = r3 < 512 :: signed
if r4 goto L3 else goto L5 :: bool
L3:
r5 = r3 >= 0 :: signed
if r5 goto L4 else goto L5 :: bool
L4:
r6 = r3 >> 1
r7 = truncate r6: native_int to u8
r8 = r7
goto L6
L5:
CPyUInt8_Overflow()
unreachable
L6:
return r8
def ord_str_index_to_i64(s, i):
s :: str
i, r0 :: i64
r1, r2 :: bool
r3 :: short_int
r4 :: i64
L0:
r0 = CPyStr_AdjustIndex(s, i)
r1 = CPyStr_RangeCheck(s, r0)
if r1 goto L2 else goto L1 :: bool
L1:
r2 = raise IndexError('index out of range')
unreachable
L2:
r3 = CPyStr_GetItemUnsafeAsInt(s, r0)
r4 = r3 >> 1
return r4

[case testStrip]
from typing import NewType, Union
Expand Down
42 changes: 41 additions & 1 deletion mypyc/test-data/run-strings.test
Original file line number Diff line number Diff line change
Expand Up @@ -808,7 +808,7 @@ def test_chr() -> None:

[case testOrd]
from testutil import assertRaises
from mypy_extensions import i64, i32, i16
from mypy_extensions import i64, i32, i16, u8

def test_ord() -> None:
assert ord(' ') == 32
Expand Down Expand Up @@ -906,6 +906,46 @@ def test_ord_str_index_unicode_mix() -> None:
assert ord(s[2 + int()]) == 20320 # 3-byte
assert ord(s[3 + int()]) == 128512 # 4-byte

def test_ord_str_index_to_fixed_width() -> None:
# Test i32 coercion (signed 32-bit: -2^31 to 2^31-1)
# All valid Unicode code points fit in i32, so test min/max boundaries
s_i32_min = chr(0)
assert i32(ord(s_i32_min[0 + int()])) == 0

s_i32_max = chr(0x10FFFF) # Max Unicode code point
assert i32(ord(s_i32_max[0 + int()])) == 0x10FFFF

# Test i16 coercion (signed 16-bit: -2^15 to 2^15-1, i.e., -32768 to 32767)
# ord() returns non-negative, so test 0 to 32767
s_i16_min = chr(0)
assert i16(ord(s_i16_min[0 + int()])) == 0

s_i16_max = chr(32767) # 2^15 - 1
assert i16(ord(s_i16_max[0 + int()])) == 32767

s_i16_overflow = chr(32768) # 2^15
with assertRaises(ValueError, "int too large to convert to i16"):
i16(ord(s_i16_overflow[0 + int()]))

s_i16_overflow2 = chr(32769) # 2^15 + 1
with assertRaises(ValueError, "int too large to convert to i16"):
i16(ord(s_i16_overflow2[0 + int()]))

# Test u8 coercion (unsigned 8-bit: 0 to 2^8-1, i.e., 0 to 255)
s_u8_min = chr(0)
assert u8(ord(s_u8_min[0 + int()])) == 0

s_u8_max = chr(255)
assert u8(ord(s_u8_max[0 + int()])) == 255

s_u8_overflow = chr(256)
with assertRaises(ValueError, "int too large or small to convert to u8"):
u8(ord(s_u8_overflow[0 + int()]))

s_u8_overflow2 = chr(257)
with assertRaises(ValueError, "int too large or small to convert to u8"):
u8(ord(s_u8_overflow2[0 + int()]))

[case testDecode]
from testutil import assertRaises

Expand Down