diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 5b36485692..aa9596f38a 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -3855,7 +3855,13 @@ SwitchLookupTable::SwitchLookupTable( // If the type is integer and the table fits in a register, build a bitmap. if (WouldFitInRegister(DL, TableSize, ValueType)) { IntegerType *IT = cast(ValueType); - APInt TableInt(TableSize * IT->getBitWidth(), 0); + // HLSL Change Begin: Round bitmap width up to size supported by DXIL (i16 + // or i32) + uint64_t RawBitMapWidth = TableSize * IT->getBitWidth(); + uint64_t BitMapWidth = + NextPowerOf2(std::max(UINT64_C(15), RawBitMapWidth - 1)); + APInt TableInt(BitMapWidth, 0); + // HLSL Change End for (uint64_t I = TableSize; I > 0; --I) { TableInt <<= IT->getBitWidth(); // Insert values into the bitmap. Undef values are set to zero. @@ -3950,6 +3956,11 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL, // Avoid overflow, fitsInLegalInteger uses unsigned int for the width. if (TableSize >= UINT_MAX/IT->getBitWidth()) return false; + // HLSL Change Begin: Cap at 32 bits so the bitmap stays in a DXIL-supported + // integer width and never promotes to i64 (which would set Int64Ops). + if (TableSize * IT->getBitWidth() > 32) + return false; + // HLSL Change End return DL.fitsInLegalInteger(TableSize * IT->getBitWidth()); } diff --git a/tools/clang/test/HLSLFileCheck/passes/llvm/simplifycfg/dxil_switch_bitmap_avoids_int64.ll b/tools/clang/test/HLSLFileCheck/passes/llvm/simplifycfg/dxil_switch_bitmap_avoids_int64.ll new file mode 100644 index 0000000000..4b94f9f530 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/passes/llvm/simplifycfg/dxil_switch_bitmap_avoids_int64.ll @@ -0,0 +1,69 @@ +; RUN: %opt %s -simplifycfg -S | FileCheck %s +; +; Companion test for https://github.com/microsoft/DirectXShaderCompiler/issues/8421 +; +; A switch with 33 boolean cases would naively build an i33 bitmap. Rounding up +; to i64 would emit i64 ops and silently set the Int64Ops shader flag, so the +; bitmap optimization is capped at 32 bits and the switch is preserved instead. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +; CHECK-LABEL: @switch_bool_33_cases +; CHECK-NOT: i33 +; CHECK-NOT: lshr i64 +; CHECK-NOT: trunc i64 +; CHECK: switch i32 +; CHECK: ret i1 + +define i1 @switch_bool_33_cases(i32 %x) { +entry: + switch i32 %x, label %default [ + i32 1, label %case_true + i32 2, label %case_true + i32 3, label %case_false + i32 4, label %case_true + i32 5, label %case_false + i32 6, label %case_true + i32 7, label %case_true + i32 8, label %case_false + i32 9, label %case_false + i32 10, label %case_true + i32 11, label %case_true + i32 12, label %case_false + i32 13, label %case_true + i32 14, label %case_false + i32 15, label %case_true + i32 16, label %case_true + i32 17, label %case_false + i32 18, label %case_true + i32 19, label %case_false + i32 20, label %case_false + i32 21, label %case_true + i32 22, label %case_true + i32 23, label %case_false + i32 24, label %case_true + i32 25, label %case_false + i32 26, label %case_true + i32 27, label %case_false + i32 28, label %case_true + i32 29, label %case_true + i32 30, label %case_false + i32 31, label %case_true + i32 32, label %case_false + i32 33, label %case_true + ] + +case_true: + br label %end + +case_false: + br label %end + +default: + br label %end + +end: + %result = phi i1 [ true, %case_true ], [ false, %case_false ], [ false, %default ] + ret i1 %result +} diff --git a/tools/clang/test/HLSLFileCheck/passes/llvm/simplifycfg/dxil_switch_bitmap_legal_types.ll b/tools/clang/test/HLSLFileCheck/passes/llvm/simplifycfg/dxil_switch_bitmap_legal_types.ll new file mode 100644 index 0000000000..96d7f26b63 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/passes/llvm/simplifycfg/dxil_switch_bitmap_legal_types.ll @@ -0,0 +1,176 @@ +; RUN: %opt %s -simplifycfg -S | FileCheck %s +; +; Regression test for https://github.com/microsoft/DirectXShaderCompiler/issues/8421 +; +; SimplifyCFG's switch-to-lookup-table built bitmaps of width +; "TableSize * ValueBitWidth", producing illegal widths like i9, i17, i26 or +; i48. The fix rounds the bitmap up to i16 or i32, and skips the lookup table +; entirely if the bitmap would exceed 32 bits. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +; 5 boolean cases with mixed values would naively build an i5 bitmap; rounded +; to i16 by the fix. +; +; CHECK-LABEL: @switch_bool_5_cases +; CHECK: switch.lookup: +; CHECK: lshr i16 +; CHECK-NOT: i5 +; CHECK: ret i1 + +define i1 @switch_bool_5_cases(i32 %x) { +entry: + switch i32 %x, label %default [ + i32 0, label %case_t + i32 1, label %case_t + i32 2, label %case_f + i32 3, label %case_t + i32 4, label %case_f + ] + +case_t: + br label %end + +case_f: + br label %end + +default: + br label %end + +end: + %result = phi i1 [ true, %case_t ], [ false, %case_f ], [ false, %default ] + ret i1 %result +} + +; 9 boolean cases would naively build an i9 bitmap; rounded to i16 by the fix. +; +; CHECK-LABEL: @switch_bool_9_cases +; CHECK: switch.lookup: +; CHECK: lshr i16 +; CHECK-NOT: i9 +; CHECK: ret i1 + +define i1 @switch_bool_9_cases(i32 %x) { +entry: + switch i32 %x, label %default [ + i32 0, label %case_true + i32 1, label %case_true + i32 3, label %case_true + i32 4, label %case_true + i32 5, label %case_true + i32 6, label %case_true + i32 7, label %case_true + i32 8, label %case_true + ] + +case_true: + br label %end + +default: + br label %end + +end: + %result = phi i1 [ true, %case_true ], [ false, %default ] + ret i1 %result +} + +; 17 boolean cases would naively build an i17 bitmap; rounded to i32 by the +; fix. +; +; CHECK-LABEL: @switch_bool_17_cases +; CHECK: switch.lookup: +; CHECK: lshr i32 +; CHECK-NOT: i17 +; CHECK: ret i1 + +define i1 @switch_bool_17_cases(i32 %x) { +entry: + switch i32 %x, label %default [ + i32 0, label %case_true + i32 1, label %case_true + i32 3, label %case_true + i32 4, label %case_true + i32 5, label %case_true + i32 7, label %case_true + i32 8, label %case_true + i32 10, label %case_true + i32 11, label %case_true + i32 12, label %case_true + i32 14, label %case_true + i32 15, label %case_true + i32 16, label %case_true + ] + +case_true: + br label %end + +default: + br label %end + +end: + %result = phi i1 [ true, %case_true ], [ false, %default ] + ret i1 %result +} + +; 26 boolean cases (the original #8421 repro) would naively build an i26 +; bitmap; rounded to i32 by the fix. +; +; CHECK-LABEL: @switch_bool_26_cases +; CHECK: switch.lookup: +; CHECK: lshr i32 +; CHECK-NOT: i26 +; CHECK: ret i1 + +define i1 @switch_bool_26_cases(i32 %x) { +entry: + switch i32 %x, label %default [ + i32 1, label %case_true + i32 6, label %case_true + i32 11, label %case_true + i32 16, label %case_true + i32 21, label %case_true + i32 26, label %case_true + ] + +case_true: + br label %end + +default: + br label %end + +end: + %result = phi i1 [ true, %case_true ], [ false, %default ] + ret i1 %result +} + +; Non-i1 result: 3 i16 cases would naively build an i48 bitmap. The fix makes +; bitmaps wider than 32 bits skip the lookup table; the original switch is +; preserved. +; +; CHECK-LABEL: @switch_i16_3_cases +; CHECK: switch i32 +; CHECK-NOT: switch.lookup +; CHECK-NOT: i48 +; CHECK: ret i16 + +define i16 @switch_i16_3_cases(i32 %x) { +entry: + switch i32 %x, label %default [ + i32 0, label %c0 + i32 1, label %c1 + i32 2, label %c2 + ] + +c0: br label %end +c1: br label %end +c2: br label %end +default: br label %end + +end: + ; Non-linear values prevent the LinearMap fast path so the bitmap path is + ; the one that would have been chosen, but since this would want to use an i48 + ; (which isn't valid in DXIL) the switch is preserved. + %result = phi i16 [ 73, %c0 ], [ 42, %c1 ], [ 88, %c2 ], [ 0, %default ] + ret i16 %result +}