[llvm] 540f68c - [SimplifyCFG] Don't use a mask for lookup tables generated from switches with an unreachable default case (#94468)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jun 8 06:32:37 PDT 2024
Author: DaPorkchop_
Date: 2024-06-08T21:32:34+08:00
New Revision: 540f68c44f4813c2c16f92ccbba5a15e8ff87c85
URL: https://github.com/llvm/llvm-project/commit/540f68c44f4813c2c16f92ccbba5a15e8ff87c85
DIFF: https://github.com/llvm/llvm-project/commit/540f68c44f4813c2c16f92ccbba5a15e8ff87c85.diff
LOG: [SimplifyCFG] Don't use a mask for lookup tables generated from switches with an unreachable default case (#94468)
When transforming a switch with holes into a lookup table, we currently
use a mask to check if the current index is handled by the switch or if
it is a hole. If it is a hole, we skip loading from the lookup table.
Normally, if the switch's default case is unreachable this has no
impact, as the mask test gets optimized away by subsequent passes.
However, if the switch is large enough that the number of lookup table
entries exceeds the target's register width, we won't be able to fit all
the cases into a mask and the switch won't get transformed into a lookup
table. If we know that the switch's default case is unreachable, we know
that the mask is unnecessary and can skip constructing it entirely,
which allows us to transform the switch into a lookup table.
[Example](https://godbolt.org/z/7x7qfx8M1)
In the future, it might be interesting to consider allowing lookup table
masks to be more than one register large (e.g. using a constant array of
bit flags, similar to `std::bitset`).
Added:
llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table_big.ll
Modified:
llvm/lib/Transforms/Utils/SimplifyCFG.cpp
llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index fe6ec8819ff99..292739b6c5fda 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -6743,8 +6743,25 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
TableSize =
(MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
+ // If the default destination is unreachable, or if the lookup table covers
+ // all values of the conditional variable, branch directly to the lookup table
+ // BB. Otherwise, check that the condition is within the case range.
+ bool DefaultIsReachable = !SI->defaultDestUndefined();
+
bool TableHasHoles = (NumResults < TableSize);
- bool NeedMask = (TableHasHoles && !HasDefaultResults);
+
+ // If the table has holes but the default destination doesn't produce any
+ // constant results, the lookup table entries corresponding to the holes will
+ // contain undefined values.
+ bool AllHolesAreUndefined = TableHasHoles && !HasDefaultResults;
+
+ // If the default destination doesn't produce a constant result but is still
+ // reachable, and the lookup table has holes, we need to use a mask to
+ // determine if the current index should load from the lookup table or jump
+ // to the default case.
+ // The mask is unnecessary if the table has holes but the default destination
+ // is unreachable, as in that case the holes must also be unreachable.
+ bool NeedMask = AllHolesAreUndefined && DefaultIsReachable;
if (NeedMask) {
// As an extra penalty for the validity test we require more cases.
if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
@@ -6766,12 +6783,6 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
"It is impossible for a switch to have more entries than the max "
"representable value of its input integer type's size.");
- // If the default destination is unreachable, or if the lookup table covers
- // all values of the conditional variable, branch directly to the lookup table
- // BB. Otherwise, check that the condition is within the case range.
- bool DefaultIsReachable =
- !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
-
// Create the BB that does the lookups.
Module &Mod = *CommonDest->getParent()->getParent();
BasicBlock *LookupBB = BasicBlock::Create(
@@ -6895,8 +6906,9 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
for (PHINode *PHI : PHIs) {
const ResultListTy &ResultList = ResultLists[PHI];
- // If using a bitmask, use any value to fill the lookup table holes.
- Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
+ // Use any value to fill the lookup table holes.
+ Constant *DV =
+ AllHolesAreUndefined ? ResultLists[PHI][0].second : DefaultResults[PHI];
StringRef FuncName = Fn->getName();
SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV,
DL, FuncName);
diff --git a/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
index 3ded78d85a610..2ac94afd95910 100644
--- a/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
+++ b/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
@@ -34,10 +34,6 @@ define i32 @switch_of_powers(i32 %x) {
; RV64ZBB-LABEL: @switch_of_powers(
; RV64ZBB-NEXT: entry:
; RV64ZBB-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true)
-; RV64ZBB-NEXT: [[SWITCH_MASKINDEX:%.*]] = trunc i32 [[TMP0]] to i8
-; RV64ZBB-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i8 121, [[SWITCH_MASKINDEX]]
-; RV64ZBB-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i8 [[SWITCH_SHIFTED]] to i1
-; RV64ZBB-NEXT: call void @llvm.assume(i1 [[SWITCH_LOBIT]])
; RV64ZBB-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers, i32 0, i32 [[TMP0]]
; RV64ZBB-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
; RV64ZBB-NEXT: ret i32 [[SWITCH_LOAD]]
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index d6450a2b4a348..845c5008e3837 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -38,6 +38,7 @@ target triple = "x86_64-unknown-linux-gnu"
; CHECK: @switch.table.threecases = private unnamed_addr constant [3 x i32] [i32 10, i32 7, i32 5], align 4
; CHECK: @switch.table.covered_switch_with_bit_tests = private unnamed_addr constant [8 x i32] [i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 1, i32 1], align 4
; CHECK: @switch.table.signed_overflow1 = private unnamed_addr constant [4 x i32] [i32 3333, i32 4444, i32 1111, i32 2222], align 4
+; CHECK: @switch.table.signed_overflow2 = private unnamed_addr constant [4 x i32] [i32 3333, i32 4444, i32 2222, i32 2222], align 4
;.
define i32 @f(i32 %c) {
; CHECK-LABEL: @f(
@@ -1738,12 +1739,53 @@ define i32 @signed_overflow2(i8 %n) {
; CHECK-LABEL: @signed_overflow2(
; CHECK-NEXT: start:
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[N:%.*]] to i2
-; CHECK-NEXT: switch i2 [[TRUNC]], label [[BB1:%.*]] [
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i2 [[TRUNC]], -2
+; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i2 [[SWITCH_TABLEIDX]] to i3
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @switch.table.signed_overflow2, i32 0, i3 [[SWITCH_TABLEIDX_ZEXT]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT: ret i32 [[SWITCH_LOAD]]
+;
+start:
+ %trunc = trunc i8 %n to i2
+ switch i2 %trunc, label %bb1 [
+ i2 1, label %bb3
+ i2 -2, label %bb4
+ i2 -1, label %bb5
+ ]
+
+bb1: ; preds = %start
+ unreachable
+
+bb3: ; preds = %start
+ br label %bb6
+
+bb4: ; preds = %start
+ br label %bb6
+
+bb5: ; preds = %start
+ br label %bb6
+
+bb6: ; preds = %start, %bb3, %bb4, %bb5
+ %.sroa.0.0 = phi i32 [ 4444, %bb5 ], [ 3333, %bb4 ], [ 2222, %bb3 ]
+ ret i32 %.sroa.0.0
+}
+
+; This is the same as @signed_overflow2 except that the default case calls @exit(), so it
+; isn't treated as unreachable
+define i32 @signed_overflow3(i8 %n) {
+; CHECK-LABEL: @signed_overflow3(
+; CHECK-NEXT: start:
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[N:%.*]] to i2
+; CHECK-NEXT: switch i2 [[TRUNC]], label [[START_UNREACHABLEDEFAULT:%.*]] [
; CHECK-NEXT: i2 1, label [[BB6:%.*]]
; CHECK-NEXT: i2 -2, label [[BB4:%.*]]
; CHECK-NEXT: i2 -1, label [[BB5:%.*]]
+; CHECK-NEXT: i2 0, label [[BB1:%.*]]
; CHECK-NEXT: ]
+; CHECK: start.unreachabledefault:
+; CHECK-NEXT: unreachable
; CHECK: bb1:
+; CHECK-NEXT: call void @exit(i32 1)
; CHECK-NEXT: unreachable
; CHECK: bb4:
; CHECK-NEXT: br label [[BB6]]
@@ -1762,6 +1804,7 @@ start:
]
bb1: ; preds = %start
+ call void @exit(i32 1)
unreachable
bb3: ; preds = %start
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table_big.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table_big.ll
new file mode 100644
index 0000000000000..7988e3057a2c2
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table_big.ll
@@ -0,0 +1,542 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
+; RUN: opt < %s -passes=simplifycfg -switch-to-lookup=true -S | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+; A dense switch with a reachable default case should be optimized into a lookup table with a bounds check
+;.
+; CHECK: @switch.table.reachable_default_dense_0to31 = private unnamed_addr constant [32 x i32] [i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1], align 4
+; CHECK: @switch.table.unreachable_default_dense_0to31 = private unnamed_addr constant [32 x i32] [i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1], align 4
+; CHECK: @switch.table.reachable_default_holes_0to31 = private unnamed_addr constant [32 x i32] [i32 0, i32 7, i32 6, i32 0, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 0, i32 2, i32 1, i32 0, i32 7, i32 0, i32 5, i32 4, i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 5, i32 0, i32 3, i32 2, i32 1], align 4
+; CHECK: @switch.table.unreachable_default_holes_0to31 = private unnamed_addr constant [32 x i32] [i32 0, i32 7, i32 6, i32 0, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 0, i32 2, i32 1, i32 0, i32 7, i32 0, i32 5, i32 4, i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 5, i32 0, i32 3, i32 2, i32 1], align 4
+; CHECK: @switch.table.reachable_default_dense_0to32 = private unnamed_addr constant [33 x i32] [i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0], align 4
+; CHECK: @switch.table.unreachable_default_dense_0to32 = private unnamed_addr constant [33 x i32] [i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0], align 4
+; CHECK: @switch.table.unreachable_default_holes_0to32 = private unnamed_addr constant [33 x i32] [i32 0, i32 7, i32 6, i32 0, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 0, i32 2, i32 1, i32 0, i32 7, i32 0, i32 5, i32 4, i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 5, i32 0, i32 3, i32 2, i32 1, i32 0], align 4
+;.
+define i32 @reachable_default_dense_0to31(i32 %x, i32 %y) {
+; CHECK-LABEL: @reachable_default_dense_0to31(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 32
+; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK: switch.lookup:
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [32 x i32], ptr @switch.table.reachable_default_dense_0to31, i32 0, i32 [[X]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: return:
+; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ [[Y:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+entry:
+ switch i32 %x, label %sw.default [
+ i32 0, label %bb0
+ i32 1, label %bb7
+ i32 2, label %bb6
+ i32 3, label %bb5
+ i32 4, label %bb4
+ i32 5, label %bb3
+ i32 6, label %bb2
+ i32 7, label %bb1
+ i32 8, label %bb0
+ i32 9, label %bb7
+ i32 10, label %bb6
+ i32 11, label %bb5
+ i32 12, label %bb4
+ i32 13, label %bb3
+ i32 14, label %bb2
+ i32 15, label %bb1
+ i32 16, label %bb0
+ i32 17, label %bb7
+ i32 18, label %bb6
+ i32 19, label %bb5
+ i32 20, label %bb4
+ i32 21, label %bb3
+ i32 22, label %bb2
+ i32 23, label %bb1
+ i32 24, label %bb0
+ i32 25, label %bb7
+ i32 26, label %bb6
+ i32 27, label %bb5
+ i32 28, label %bb4
+ i32 29, label %bb3
+ i32 30, label %bb2
+ i32 31, label %bb1
+ ]
+
+sw.default: br label %return
+bb0: br label %return
+bb1: br label %return
+bb2: br label %return
+bb3: br label %return
+bb4: br label %return
+bb5: br label %return
+bb6: br label %return
+bb7: br label %return
+
+return:
+ %res = phi i32 [ %y, %sw.default ], [ 0, %bb0 ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ], [ 5, %bb5 ], [ 6, %bb6 ], [ 7, %bb7 ]
+ ret i32 %res
+
+}
+
+; A dense switch with an unreachable default case should be optimized into a lookup table without bounds checks
+define i32 @unreachable_default_dense_0to31(i32 %x, i32 %y) {
+; CHECK-LABEL: @unreachable_default_dense_0to31(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [32 x i32], ptr @switch.table.unreachable_default_dense_0to31, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT: ret i32 [[SWITCH_LOAD]]
+;
+entry:
+ switch i32 %x, label %sw.default [
+ i32 0, label %bb0
+ i32 1, label %bb7
+ i32 2, label %bb6
+ i32 3, label %bb5
+ i32 4, label %bb4
+ i32 5, label %bb3
+ i32 6, label %bb2
+ i32 7, label %bb1
+ i32 8, label %bb0
+ i32 9, label %bb7
+ i32 10, label %bb6
+ i32 11, label %bb5
+ i32 12, label %bb4
+ i32 13, label %bb3
+ i32 14, label %bb2
+ i32 15, label %bb1
+ i32 16, label %bb0
+ i32 17, label %bb7
+ i32 18, label %bb6
+ i32 19, label %bb5
+ i32 20, label %bb4
+ i32 21, label %bb3
+ i32 22, label %bb2
+ i32 23, label %bb1
+ i32 24, label %bb0
+ i32 25, label %bb7
+ i32 26, label %bb6
+ i32 27, label %bb5
+ i32 28, label %bb4
+ i32 29, label %bb3
+ i32 30, label %bb2
+ i32 31, label %bb1
+ ]
+
+sw.default: unreachable
+bb0: br label %return
+bb1: br label %return
+bb2: br label %return
+bb3: br label %return
+bb4: br label %return
+bb5: br label %return
+bb6: br label %return
+bb7: br label %return
+
+return:
+ %res = phi i32 [ 0, %bb0 ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ], [ 5, %bb5 ], [ 6, %bb6 ], [ 7, %bb7 ]
+ ret i32 %res
+
+}
+
+; A sparse switch with a reachable default case should be optimized into a lookup table with a bounds check and a mask
+define i32 @reachable_default_holes_0to31(i32 %x, i32 %y) {
+; CHECK-LABEL: @reachable_default_holes_0to31(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 32
+; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_HOLE_CHECK:%.*]], label [[RETURN:%.*]]
+; CHECK: switch.hole_check:
+; CHECK-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i32 -277094665, [[X]]
+; CHECK-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i32 [[SWITCH_SHIFTED]] to i1
+; CHECK-NEXT: br i1 [[SWITCH_LOBIT]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN]]
+; CHECK: switch.lookup:
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [32 x i32], ptr @switch.table.reachable_default_holes_0to31, i32 0, i32 [[X]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: return:
+; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ [[Y:%.*]], [[SWITCH_HOLE_CHECK]] ], [ [[Y]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+entry:
+ switch i32 %x, label %sw.default [
+ i32 0, label %bb0
+ i32 1, label %bb7
+ i32 2, label %bb6
+ i32 4, label %bb4
+ i32 5, label %bb3
+ i32 6, label %bb2
+ i32 7, label %bb1
+ i32 9, label %bb7
+ i32 10, label %bb6
+ i32 11, label %bb5
+ i32 12, label %bb4
+ i32 14, label %bb2
+ i32 15, label %bb1
+ i32 16, label %bb0
+ i32 17, label %bb7
+ i32 19, label %bb5
+ i32 20, label %bb4
+ i32 21, label %bb3
+ i32 22, label %bb2
+ i32 24, label %bb0
+ i32 25, label %bb7
+ i32 26, label %bb6
+ i32 27, label %bb5
+ i32 29, label %bb3
+ i32 30, label %bb2
+ i32 31, label %bb1
+ ]
+
+sw.default: br label %return
+bb0: br label %return
+bb1: br label %return
+bb2: br label %return
+bb3: br label %return
+bb4: br label %return
+bb5: br label %return
+bb6: br label %return
+bb7: br label %return
+
+return:
+ %res = phi i32 [ %y, %sw.default ], [ 0, %bb0 ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ], [ 5, %bb5 ], [ 6, %bb6 ], [ 7, %bb7 ]
+ ret i32 %res
+
+}
+
+; A sparse switch with an unreachable default case should be optimized into a lookup table without bounds checks
+define i32 @unreachable_default_holes_0to31(i32 %x, i32 %y) {
+; CHECK-LABEL: @unreachable_default_holes_0to31(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [32 x i32], ptr @switch.table.unreachable_default_holes_0to31, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT: ret i32 [[SWITCH_LOAD]]
+;
+entry:
+ switch i32 %x, label %sw.default [
+ i32 0, label %bb0
+ i32 1, label %bb7
+ i32 2, label %bb6
+ i32 4, label %bb4
+ i32 5, label %bb3
+ i32 6, label %bb2
+ i32 7, label %bb1
+ i32 9, label %bb7
+ i32 10, label %bb6
+ i32 11, label %bb5
+ i32 12, label %bb4
+ i32 14, label %bb2
+ i32 15, label %bb1
+ i32 16, label %bb0
+ i32 17, label %bb7
+ i32 19, label %bb5
+ i32 20, label %bb4
+ i32 21, label %bb3
+ i32 22, label %bb2
+ i32 24, label %bb0
+ i32 25, label %bb7
+ i32 26, label %bb6
+ i32 27, label %bb5
+ i32 29, label %bb3
+ i32 30, label %bb2
+ i32 31, label %bb1
+ ]
+
+sw.default: unreachable
+bb0: br label %return
+bb1: br label %return
+bb2: br label %return
+bb3: br label %return
+bb4: br label %return
+bb5: br label %return
+bb6: br label %return
+bb7: br label %return
+
+return:
+ %res = phi i32 [ 0, %bb0 ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ], [ 5, %bb5 ], [ 6, %bb6 ], [ 7, %bb7 ]
+ ret i32 %res
+
+}
+
+; A dense switch with a reachable default case should be optimized into a lookup table with a bounds check
+define i32 @reachable_default_dense_0to32(i32 %x, i32 %y) {
+; CHECK-LABEL: @reachable_default_dense_0to32(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 33
+; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK: switch.lookup:
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [33 x i32], ptr @switch.table.reachable_default_dense_0to32, i32 0, i32 [[X]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: return:
+; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ [[Y:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+entry:
+ switch i32 %x, label %sw.default [
+ i32 0, label %bb0
+ i32 1, label %bb7
+ i32 2, label %bb6
+ i32 3, label %bb5
+ i32 4, label %bb4
+ i32 5, label %bb3
+ i32 6, label %bb2
+ i32 7, label %bb1
+ i32 8, label %bb0
+ i32 9, label %bb7
+ i32 10, label %bb6
+ i32 11, label %bb5
+ i32 12, label %bb4
+ i32 13, label %bb3
+ i32 14, label %bb2
+ i32 15, label %bb1
+ i32 16, label %bb0
+ i32 17, label %bb7
+ i32 18, label %bb6
+ i32 19, label %bb5
+ i32 20, label %bb4
+ i32 21, label %bb3
+ i32 22, label %bb2
+ i32 23, label %bb1
+ i32 24, label %bb0
+ i32 25, label %bb7
+ i32 26, label %bb6
+ i32 27, label %bb5
+ i32 28, label %bb4
+ i32 29, label %bb3
+ i32 30, label %bb2
+ i32 31, label %bb1
+ i32 32, label %bb0
+ ]
+
+sw.default: br label %return
+bb0: br label %return
+bb1: br label %return
+bb2: br label %return
+bb3: br label %return
+bb4: br label %return
+bb5: br label %return
+bb6: br label %return
+bb7: br label %return
+
+return:
+ %res = phi i32 [ %y, %sw.default ], [ 0, %bb0 ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ], [ 5, %bb5 ], [ 6, %bb6 ], [ 7, %bb7 ]
+ ret i32 %res
+
+}
+
+; A dense switch with an unreachable default case should be optimized into a lookup table without bounds checks
+define i32 @unreachable_default_dense_0to32(i32 %x, i32 %y) {
+; CHECK-LABEL: @unreachable_default_dense_0to32(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [33 x i32], ptr @switch.table.unreachable_default_dense_0to32, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT: ret i32 [[SWITCH_LOAD]]
+;
+entry:
+ switch i32 %x, label %sw.default [
+ i32 0, label %bb0
+ i32 1, label %bb7
+ i32 2, label %bb6
+ i32 3, label %bb5
+ i32 4, label %bb4
+ i32 5, label %bb3
+ i32 6, label %bb2
+ i32 7, label %bb1
+ i32 8, label %bb0
+ i32 9, label %bb7
+ i32 10, label %bb6
+ i32 11, label %bb5
+ i32 12, label %bb4
+ i32 13, label %bb3
+ i32 14, label %bb2
+ i32 15, label %bb1
+ i32 16, label %bb0
+ i32 17, label %bb7
+ i32 18, label %bb6
+ i32 19, label %bb5
+ i32 20, label %bb4
+ i32 21, label %bb3
+ i32 22, label %bb2
+ i32 23, label %bb1
+ i32 24, label %bb0
+ i32 25, label %bb7
+ i32 26, label %bb6
+ i32 27, label %bb5
+ i32 28, label %bb4
+ i32 29, label %bb3
+ i32 30, label %bb2
+ i32 31, label %bb1
+ i32 32, label %bb0
+ ]
+
+sw.default: unreachable
+bb0: br label %return
+bb1: br label %return
+bb2: br label %return
+bb3: br label %return
+bb4: br label %return
+bb5: br label %return
+bb6: br label %return
+bb7: br label %return
+
+return:
+ %res = phi i32 [ 0, %bb0 ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ], [ 5, %bb5 ], [ 6, %bb6 ], [ 7, %bb7 ]
+ ret i32 %res
+
+}
+
+; A sparse switch with a reachable default case which would be optimized into a lookup table with a bounds check and a mask, but doesn't because
+; it would require a 33-bit mask
+define i32 @reachable_default_holes_0to32(i32 %x, i32 %y) {
+; CHECK-LABEL: @reachable_default_holes_0to32(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: switch i32 [[X:%.*]], label [[RETURN:%.*]] [
+; CHECK-NEXT: i32 0, label [[BB0:%.*]]
+; CHECK-NEXT: i32 1, label [[BB7:%.*]]
+; CHECK-NEXT: i32 2, label [[BB6:%.*]]
+; CHECK-NEXT: i32 4, label [[BB4:%.*]]
+; CHECK-NEXT: i32 5, label [[BB3:%.*]]
+; CHECK-NEXT: i32 6, label [[BB2:%.*]]
+; CHECK-NEXT: i32 7, label [[BB1:%.*]]
+; CHECK-NEXT: i32 9, label [[BB7]]
+; CHECK-NEXT: i32 10, label [[BB6]]
+; CHECK-NEXT: i32 11, label [[BB5:%.*]]
+; CHECK-NEXT: i32 12, label [[BB4]]
+; CHECK-NEXT: i32 14, label [[BB2]]
+; CHECK-NEXT: i32 15, label [[BB1]]
+; CHECK-NEXT: i32 16, label [[BB0]]
+; CHECK-NEXT: i32 17, label [[BB7]]
+; CHECK-NEXT: i32 19, label [[BB5]]
+; CHECK-NEXT: i32 20, label [[BB4]]
+; CHECK-NEXT: i32 21, label [[BB3]]
+; CHECK-NEXT: i32 22, label [[BB2]]
+; CHECK-NEXT: i32 24, label [[BB0]]
+; CHECK-NEXT: i32 25, label [[BB7]]
+; CHECK-NEXT: i32 26, label [[BB6]]
+; CHECK-NEXT: i32 27, label [[BB5]]
+; CHECK-NEXT: i32 29, label [[BB3]]
+; CHECK-NEXT: i32 30, label [[BB2]]
+; CHECK-NEXT: i32 31, label [[BB1]]
+; CHECK-NEXT: i32 32, label [[BB0]]
+; CHECK-NEXT: ]
+; CHECK: bb0:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: bb1:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: bb2:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: bb3:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: bb4:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: bb5:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: bb6:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: bb7:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: return:
+; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 0, [[BB0]] ], [ 1, [[BB1]] ], [ 2, [[BB2]] ], [ 3, [[BB3]] ], [ 4, [[BB4]] ], [ 5, [[BB5]] ], [ 6, [[BB6]] ], [ 7, [[BB7]] ], [ [[Y:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+entry:
+ switch i32 %x, label %sw.default [
+ i32 0, label %bb0
+ i32 1, label %bb7
+ i32 2, label %bb6
+ i32 4, label %bb4
+ i32 5, label %bb3
+ i32 6, label %bb2
+ i32 7, label %bb1
+ i32 9, label %bb7
+ i32 10, label %bb6
+ i32 11, label %bb5
+ i32 12, label %bb4
+ i32 14, label %bb2
+ i32 15, label %bb1
+ i32 16, label %bb0
+ i32 17, label %bb7
+ i32 19, label %bb5
+ i32 20, label %bb4
+ i32 21, label %bb3
+ i32 22, label %bb2
+ i32 24, label %bb0
+ i32 25, label %bb7
+ i32 26, label %bb6
+ i32 27, label %bb5
+ i32 29, label %bb3
+ i32 30, label %bb2
+ i32 31, label %bb1
+ i32 32, label %bb0
+ ]
+
+sw.default: br label %return
+bb0: br label %return
+bb1: br label %return
+bb2: br label %return
+bb3: br label %return
+bb4: br label %return
+bb5: br label %return
+bb6: br label %return
+bb7: br label %return
+
+return:
+ %res = phi i32 [ %y, %sw.default ], [ 0, %bb0 ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ], [ 5, %bb5 ], [ 6, %bb6 ], [ 7, %bb7 ]
+ ret i32 %res
+
+}
+
+; A sparse switch with an unreachable default case which can be optimized into a lookup table without bounds checks. Because the default case is
+; unreachable, the fact that a 33-bit mask would be required doesn't prevent lookup table optimization.
+define i32 @unreachable_default_holes_0to32(i32 %x, i32 %y) {
+; CHECK-LABEL: @unreachable_default_holes_0to32(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [33 x i32], ptr @switch.table.unreachable_default_holes_0to32, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT: ret i32 [[SWITCH_LOAD]]
+;
+entry:
+ switch i32 %x, label %sw.default [
+ i32 0, label %bb0
+ i32 1, label %bb7
+ i32 2, label %bb6
+ i32 4, label %bb4
+ i32 5, label %bb3
+ i32 6, label %bb2
+ i32 7, label %bb1
+ i32 9, label %bb7
+ i32 10, label %bb6
+ i32 11, label %bb5
+ i32 12, label %bb4
+ i32 14, label %bb2
+ i32 15, label %bb1
+ i32 16, label %bb0
+ i32 17, label %bb7
+ i32 19, label %bb5
+ i32 20, label %bb4
+ i32 21, label %bb3
+ i32 22, label %bb2
+ i32 24, label %bb0
+ i32 25, label %bb7
+ i32 26, label %bb6
+ i32 27, label %bb5
+ i32 29, label %bb3
+ i32 30, label %bb2
+ i32 31, label %bb1
+ i32 32, label %bb0
+ ]
+
+sw.default: unreachable
+bb0: br label %return
+bb1: br label %return
+bb2: br label %return
+bb3: br label %return
+bb4: br label %return
+bb5: br label %return
+bb6: br label %return
+bb7: br label %return
+
+return:
+ %res = phi i32 [ 0, %bb0 ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ], [ 5, %bb5 ], [ 6, %bb6 ], [ 7, %bb7 ]
+ ret i32 %res
+
+}
More information about the llvm-commits
mailing list