[clang-tools-extra] [llvm] [SimplifyCFG] Find the minimal table considering overflow in `switchToLookupTable` (PR #67885)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Nov 22 06:40:21 PST 2023
https://github.com/DianQK updated https://github.com/llvm/llvm-project/pull/67885
>From 9c2ce941f6eebbc84d5ed0860d5b10f70dfecb85 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Thu, 28 Sep 2023 18:06:52 -0600
Subject: [PATCH 1/6] [SimplifyCFG] Pre-commit test for `switchToLookupTable`
---
.../SimplifyCFG/X86/switch_to_lookup_table.ll | 119 ++++++++++++++++++
1 file changed, 119 insertions(+)
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index 3873f0c0ae0bbd5..8a6b3165ce03f5d 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -115,6 +115,125 @@ return:
}
+; The minimal table range is [122, -128]([122, 128]).
+
+define i32 @f_i8_128(i8 %c) {
+; CHECK-LABEL: @f_i8_128(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: switch i8 [[C:%.*]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT: i8 122, label [[RETURN:%.*]]
+; CHECK-NEXT: i8 123, label [[SW_BB1:%.*]]
+; CHECK-NEXT: i8 124, label [[SW_BB2:%.*]]
+; CHECK-NEXT: i8 125, label [[SW_BB3:%.*]]
+; CHECK-NEXT: i8 126, label [[SW_BB4:%.*]]
+; CHECK-NEXT: i8 127, label [[SW_BB5:%.*]]
+; CHECK-NEXT: i8 -128, label [[SW_BB6:%.*]]
+; CHECK-NEXT: ]
+; CHECK: sw.bb1:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: sw.bb2:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: sw.bb3:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: sw.bb4:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: sw.bb5:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: sw.bb6:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: sw.default:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: return:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 15, [[SW_DEFAULT]] ], [ 1, [[SW_BB6]] ], [ 62, [[SW_BB5]] ], [ 27, [[SW_BB4]] ], [ -1, [[SW_BB3]] ], [ 0, [[SW_BB2]] ], [ 123, [[SW_BB1]] ], [ 55, [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
+;
+entry:
+ switch i8 %c, label %sw.default [
+ i8 122, label %return
+ i8 123, label %sw.bb1
+ i8 124, label %sw.bb2
+ i8 125, label %sw.bb3
+ i8 126, label %sw.bb4
+ i8 127, label %sw.bb5
+ i8 -128, label %sw.bb6
+ ]
+
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.bb4: br label %return
+sw.bb5: br label %return
+sw.bb6: br label %return
+sw.default: br label %return
+return:
+ %retval.0 = phi i32 [ 15, %sw.default ], [ 1, %sw.bb6 ], [ 62, %sw.bb5 ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
+ ret i32 %retval.0
+}
+
+; The minimal table range is [3, 0].
+
+define i32 @f_min_max(i3 %c) {
+; CHECK-LABEL: @f_min_max(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[C:%.*]], -4
+; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i4
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], ptr @switch.table.f_min_max, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT: ret i32 [[SWITCH_LOAD]]
+;
+entry:
+ switch i3 %c, label %sw.default [
+ i3 -4, label %return
+ i3 -3, label %sw.bb1
+ i3 -2, label %sw.bb2
+ i3 -1, label %sw.bb3
+ i3 0, label %sw.bb4
+ i3 3, label %sw.bb6
+ ]
+
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.bb4: br label %return
+sw.bb6: br label %return
+sw.default: br label %return
+return:
+ %retval.0 = phi i32 [ 15, %sw.default ], [ 1, %sw.bb6 ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
+ ret i32 %retval.0
+}
+
+; The minimal table range is [-1, -4].
+
+define i32 @f_min_max_2(i3 %c) {
+; CHECK-LABEL: @f_min_max_2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[C:%.*]], -4
+; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i4
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], ptr @switch.table.f_min_max_2, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT: ret i32 [[SWITCH_LOAD]]
+;
+entry:
+ switch i3 %c, label %sw.default [
+ i3 -1, label %return
+ i3 0, label %sw.bb1
+ i3 1, label %sw.bb2
+ i3 2, label %sw.bb3
+ i3 3, label %sw.bb4
+ i3 -4, label %sw.bb6
+ ]
+
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.bb4: br label %return
+sw.bb6: br label %return
+sw.default: br label %return
+return:
+ %retval.0 = phi i32 [ 15, %sw.default ], [ 1, %sw.bb6 ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
+ ret i32 %retval.0
+}
+
; A switch used to initialize two variables, an i8 and a float.
declare void @dummy(i8 signext, float)
>From 60822766937bee41d146c577f82f0fa971c59859 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Thu, 28 Sep 2023 18:07:06 -0600
Subject: [PATCH 2/6] [SimplifyCFG] Find the smallest table considering
overflow in `switchToLookupTable`
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 94 +++++++++++-----
.../SimplifyCFG/X86/CoveredLookupTable.ll | 13 +--
.../SimplifyCFG/X86/switch-covered-bug.ll | 12 ++-
.../SimplifyCFG/X86/switch-table-bug.ll | 7 +-
.../SimplifyCFG/X86/switch_to_lookup_table.ll | 102 ++++++++----------
5 files changed, 127 insertions(+), 101 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 35fead111aa9666..2a824c284dc3593 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -6378,18 +6378,20 @@ ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
}
static bool ShouldUseSwitchConditionAsTableIndex(
- ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
+ ConstantInt &BeginCaseVal, const ConstantInt &EndCaseVal,
bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes,
const DataLayout &DL, const TargetTransformInfo &TTI) {
- if (MinCaseVal.isNullValue())
+ if (BeginCaseVal.isNullValue())
return true;
- if (MinCaseVal.isNegative() ||
- MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
+ if (BeginCaseVal.getValue().sge(EndCaseVal.getValue()))
+ return false;
+ if (BeginCaseVal.isNegative() ||
+ EndCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
!HasDefaultResults)
return false;
return all_of(ResultTypes, [&](const auto &KV) {
return SwitchLookupTable::WouldFitInRegister(
- DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */,
+ DL, EndCaseVal.getLimitedValue() + 1 /* TableSize */,
KV.second /* ResultType */);
});
}
@@ -6478,7 +6480,7 @@ static void reuseTableCompare(
/// If the switch is only used to initialize one or more phi nodes in a common
/// successor block with different constant values, replace the switch with
/// lookup tables.
-static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
+static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
DomTreeUpdater *DTU, const DataLayout &DL,
const TargetTransformInfo &TTI) {
assert(SI->getNumCases() > 1 && "Degenerate switch?");
@@ -6506,9 +6508,6 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// Figure out the corresponding result for each case value and phi node in the
// common destination, as well as the min and max case values.
assert(!SI->cases().empty());
- SwitchInst::CaseIt CI = SI->case_begin();
- ConstantInt *MinCaseVal = CI->getCaseValue();
- ConstantInt *MaxCaseVal = CI->getCaseValue();
BasicBlock *CommonDest = nullptr;
@@ -6519,17 +6518,57 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
SmallDenseMap<PHINode *, Type *> ResultTypes;
SmallVector<PHINode *, 4> PHIs;
- for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
- ConstantInt *CaseVal = CI->getCaseValue();
- if (CaseVal->getValue().slt(MinCaseVal->getValue()))
- MinCaseVal = CaseVal;
- if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
- MaxCaseVal = CaseVal;
+ SmallVector<ConstantInt *, 8> CaseVals;
+ for (auto CI : SI->cases())
+ CaseVals.push_back(CI.getCaseValue());
+
+ // We want to find a range of indexes that will create the minimal table.
+ // We can treat all possible index values as a circle. For example, the i8 is
+ // [-128, -1] and [0, 127]. After that find the minimal range from this circle
+ // that can cover all exist values. First, create an incrementing sequence.
+ llvm::sort(CaseVals, [](const ConstantInt *A, const ConstantInt *B) {
+ return A->getValue().slt(B->getValue());
+ });
+ auto *CaseValIter = CaseVals.begin();
+ // We start by using the begin and end as the minimal table.
+ ConstantInt *BeginCaseVal = *CaseValIter;
+ ConstantInt *EndCaseVal = *CaseVals.rbegin();
+ bool RangeOverflow = false;
+ uint64_t MinTableSize = EndCaseVal->getValue()
+ .ssub_ov(BeginCaseVal->getValue(), RangeOverflow)
+ .getLimitedValue() +
+ 1;
+ // If there is no overflow, then this must be the minimal table.
+ if (RangeOverflow) {
+ auto MaxValue = APInt::getMaxValue(BeginCaseVal->getBitWidth());
+ while (CaseValIter != CaseVals.end()) {
+ auto *CurrentCaseVal = *CaseValIter++;
+ if (CaseValIter == CaseVals.end())
+ break;
+ ConstantInt *NextCaseVal = *CaseValIter;
+ const APInt &NextVal = NextCaseVal->getValue();
+ auto CurVal = CurrentCaseVal->getValue();
+ uint64_t RequireTableSize =
+ (MaxValue - (NextVal - CurVal) + 1).getLimitedValue() + 1;
+ // FIXME: When there is more than one minimal table, we can choose the
+ // best one. The current simple strategy may not be the best.
+ if (((RequireTableSize < MinTableSize) ||
+ (RequireTableSize == MinTableSize &&
+ NextCaseVal->getValue().isZero()))) {
+ BeginCaseVal = NextCaseVal;
+ EndCaseVal = CurrentCaseVal;
+ MinTableSize = RequireTableSize;
+ }
+ }
+ }
+
+ for (const auto &CI : SI->cases()) {
+ ConstantInt *CaseVal = CI.getCaseValue();
// Resulting value at phi nodes for this case value.
using ResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
ResultsTy Results;
- if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
+ if (!getCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest,
Results, DL, TTI))
return false;
@@ -6564,13 +6603,12 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
}
bool UseSwitchConditionAsTableIndex = ShouldUseSwitchConditionAsTableIndex(
- *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
+ *BeginCaseVal, *EndCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
uint64_t TableSize;
if (UseSwitchConditionAsTableIndex)
- TableSize = MaxCaseVal->getLimitedValue() + 1;
+ TableSize = EndCaseVal->getLimitedValue() + 1;
else
- TableSize =
- (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
+ TableSize = MinTableSize;
bool TableHasHoles = (NumResults < TableSize);
bool NeedMask = (TableHasHoles && !HasDefaultResults);
@@ -6589,7 +6627,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// Compute the maximum table size representable by the integer type we are
// switching upon.
- unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
+ unsigned CaseSize = BeginCaseVal->getType()->getPrimitiveSizeInBits();
uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
assert(MaxTableSize >= TableSize &&
"It is impossible for a switch to have more entries than the max "
@@ -6612,15 +6650,17 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
Value *TableIndex;
ConstantInt *TableIndexOffset;
if (UseSwitchConditionAsTableIndex) {
- TableIndexOffset = ConstantInt::get(MaxCaseVal->getType(), 0);
+ TableIndexOffset = ConstantInt::get(EndCaseVal->getType(), 0);
TableIndex = SI->getCondition();
} else {
- TableIndexOffset = MinCaseVal;
+ TableIndexOffset = BeginCaseVal;
// If the default is unreachable, all case values are s>= MinCaseVal. Then
// we can try to attach nsw.
bool MayWrap = true;
- if (!DefaultIsReachable) {
- APInt Res = MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
+ if (!DefaultIsReachable &&
+ EndCaseVal->getValue().sge(BeginCaseVal->getValue())) {
+ APInt Res =
+ EndCaseVal->getValue().ssub_ov(BeginCaseVal->getValue(), MayWrap);
(void)Res;
}
@@ -6639,7 +6679,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// PHI value for the default case in case we're using a bit mask.
} else {
Value *Cmp = Builder.CreateICmpULT(
- TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
+ TableIndex, ConstantInt::get(BeginCaseVal->getType(), TableSize));
RangeCheckBranch =
Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
if (DTU)
@@ -6883,7 +6923,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// CVP. Therefore, only apply this transformation during late stages of the
// optimisation pipeline.
if (Options.ConvertSwitchToLookupTable &&
- SwitchToLookupTable(SI, Builder, DTU, DL, TTI))
+ switchToLookupTable(SI, Builder, DTU, DL, TTI))
return requestResimplify();
if (ReduceSwitchRange(SI, Builder, DL, TTI))
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/CoveredLookupTable.ll b/llvm/test/Transforms/SimplifyCFG/X86/CoveredLookupTable.ll
index 56fa249d23bba2d..6d72fdb7085159c 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/CoveredLookupTable.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/CoveredLookupTable.ll
@@ -9,12 +9,13 @@ target triple = "x86_64-apple-darwin12.0.0"
define i3 @coveredswitch_test(i3 %input) {
; CHECK-LABEL: @coveredswitch_test(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[INPUT:%.*]], -4
-; CHECK-NEXT: [[SWITCH_CAST:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i24
-; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul nuw nsw i24 [[SWITCH_CAST]], 3
-; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i24 7507338, [[SWITCH_SHIFTAMT]]
-; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i24 [[SWITCH_DOWNSHIFT]] to i3
-; CHECK-NEXT: ret i3 [[SWITCH_MASKED]]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i3 [[INPUT:%.*]], -2
+; CHECK-NEXT: [[SWITCH_CAST:%.*]] = zext i3 [[INPUT]] to i18
+; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul nuw nsw i18 [[SWITCH_CAST]], 3
+; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i18 42792, [[SWITCH_SHIFTAMT]]
+; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i18 [[SWITCH_DOWNSHIFT]] to i3
+; CHECK-NEXT: [[RESULT:%.*]] = select i1 [[TMP0]], i3 [[SWITCH_MASKED]], i3 -2
+; CHECK-NEXT: ret i3 [[RESULT]]
;
entry:
switch i3 %input, label %bb8 [
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
index cf244e6b63457c3..8e291b025113d85 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
@@ -9,11 +9,17 @@ target triple = "x86_64-apple-darwin12.0.0"
define i64 @test(i3 %arg) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[ARG:%.*]], -4
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[ARG:%.*]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i3 [[SWITCH_TABLEIDX]], -2
+; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[DEFAULT:%.*]]
+; CHECK: switch.lookup:
; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i4
-; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i64], ptr @switch.table.test, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [6 x i64], ptr @switch.table.test, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i64, ptr [[SWITCH_GEP]], align 8
-; CHECK-NEXT: [[V3:%.*]] = add i64 [[SWITCH_LOAD]], 0
+; CHECK-NEXT: br label [[DEFAULT]]
+; CHECK: Default:
+; CHECK-NEXT: [[V1:%.*]] = phi i64 [ 8, [[ENTRY:%.*]] ], [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ]
+; CHECK-NEXT: [[V3:%.*]] = add i64 [[V1]], 0
; CHECK-NEXT: ret i64 [[V3]]
;
entry:
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
index 37001f4fba2aa84..75cee53d230374d 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
@@ -9,11 +9,8 @@ target triple = "x86_64-apple-darwin12.0.0"
define i64 @_TFO6reduce1E5toRawfS0_FT_Si(i2) {
; CHECK-LABEL: @_TFO6reduce1E5toRawfS0_FT_Si(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i2 [[TMP0:%.*]], -2
-; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i2 [[SWITCH_TABLEIDX]] to i3
-; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i64], ptr @switch.table._TFO6reduce1E5toRawfS0_FT_Si, i32 0, i3 [[SWITCH_TABLEIDX_ZEXT]]
-; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i64, ptr [[SWITCH_GEP]], align 8
-; CHECK-NEXT: ret i64 [[SWITCH_LOAD]]
+; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = zext i2 [[TMP0:%.*]] to i64
+; CHECK-NEXT: ret i64 [[SWITCH_IDX_CAST]]
;
entry:
switch i2 %0, label %1 [
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index 8a6b3165ce03f5d..b05b013a96f8454 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -120,31 +120,15 @@ return:
define i32 @f_i8_128(i8 %c) {
; CHECK-LABEL: @f_i8_128(
; CHECK-NEXT: entry:
-; CHECK-NEXT: switch i8 [[C:%.*]], label [[SW_DEFAULT:%.*]] [
-; CHECK-NEXT: i8 122, label [[RETURN:%.*]]
-; CHECK-NEXT: i8 123, label [[SW_BB1:%.*]]
-; CHECK-NEXT: i8 124, label [[SW_BB2:%.*]]
-; CHECK-NEXT: i8 125, label [[SW_BB3:%.*]]
-; CHECK-NEXT: i8 126, label [[SW_BB4:%.*]]
-; CHECK-NEXT: i8 127, label [[SW_BB5:%.*]]
-; CHECK-NEXT: i8 -128, label [[SW_BB6:%.*]]
-; CHECK-NEXT: ]
-; CHECK: sw.bb1:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: sw.bb2:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: sw.bb3:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: sw.bb4:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: sw.bb5:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: sw.bb6:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: sw.default:
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i8 [[C:%.*]], 122
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i8 [[SWITCH_TABLEIDX]], 7
+; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK: switch.lookup:
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.f_i8_128, i32 0, i8 [[SWITCH_TABLEIDX]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
; CHECK-NEXT: br label [[RETURN]]
; CHECK: return:
-; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 15, [[SW_DEFAULT]] ], [ 1, [[SW_BB6]] ], [ 62, [[SW_BB5]] ], [ 27, [[SW_BB4]] ], [ -1, [[SW_BB3]] ], [ 0, [[SW_BB2]] ], [ 123, [[SW_BB1]] ], [ 55, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ 15, [[ENTRY:%.*]] ]
; CHECK-NEXT: ret i32 [[RETVAL_0]]
;
entry:
@@ -175,11 +159,17 @@ return:
define i32 @f_min_max(i3 %c) {
; CHECK-LABEL: @f_min_max(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[C:%.*]], -4
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[C:%.*]], 3
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i3 [[SWITCH_TABLEIDX]], -2
+; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK: switch.lookup:
; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i4
-; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], ptr @switch.table.f_min_max, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [6 x i32], ptr @switch.table.f_min_max, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
-; CHECK-NEXT: ret i32 [[SWITCH_LOAD]]
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: return:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ 15, [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
;
entry:
switch i3 %c, label %sw.default [
@@ -207,11 +197,17 @@ return:
define i32 @f_min_max_2(i3 %c) {
; CHECK-LABEL: @f_min_max_2(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[C:%.*]], -4
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[C:%.*]], -1
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i3 [[SWITCH_TABLEIDX]], -2
+; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK: switch.lookup:
; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i4
-; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], ptr @switch.table.f_min_max_2, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [6 x i32], ptr @switch.table.f_min_max_2, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
-; CHECK-NEXT: ret i32 [[SWITCH_LOAD]]
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: return:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ 15, [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
;
entry:
switch i3 %c, label %sw.default [
@@ -1657,14 +1653,11 @@ end:
define i32 @covered_switch_with_bit_tests(i3) {
; CHECK-LABEL: @covered_switch_with_bit_tests(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[TMP0:%.*]], -4
-; CHECK-NEXT: [[SWITCH_MASKINDEX:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i8
-; CHECK-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i8 -61, [[SWITCH_MASKINDEX]]
-; CHECK-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i8 [[SWITCH_SHIFTED]] to i1
-; CHECK-NEXT: br i1 [[SWITCH_LOBIT]], label [[SWITCH_LOOKUP:%.*]], label [[L6:%.*]]
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[TMP0:%.*]], 2
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i3 [[SWITCH_TABLEIDX]], -4
+; CHECK-NEXT: br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[L6:%.*]]
; CHECK: switch.lookup:
-; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i4
-; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], ptr @switch.table.covered_switch_with_bit_tests, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @switch.table.covered_switch_with_bit_tests, i32 0, i3 [[SWITCH_TABLEIDX]]
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
; CHECK-NEXT: br label [[L6]]
; CHECK: l6:
@@ -1815,11 +1808,10 @@ define i32 @signed_overflow1(i8 %n) {
; CHECK-LABEL: @signed_overflow1(
; CHECK-NEXT: start:
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[N:%.*]] to i2
-; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i2 [[TRUNC]], -2
-; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i2 [[SWITCH_TABLEIDX]] to i3
-; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @switch.table.signed_overflow1, i32 0, i3 [[SWITCH_TABLEIDX_ZEXT]]
-; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
-; CHECK-NEXT: ret i32 [[SWITCH_LOAD]]
+; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = zext i2 [[TRUNC]] to i32
+; CHECK-NEXT: [[SWITCH_IDX_MULT:%.*]] = mul nsw i32 [[SWITCH_IDX_CAST]], 1111
+; CHECK-NEXT: [[SWITCH_OFFSET:%.*]] = add nsw i32 [[SWITCH_IDX_MULT]], 1111
+; CHECK-NEXT: ret i32 [[SWITCH_OFFSET]]
;
start:
%trunc = trunc i8 %n to i2
@@ -1851,20 +1843,11 @@ define i32 @signed_overflow2(i8 %n) {
; CHECK-LABEL: @signed_overflow2(
; CHECK-NEXT: start:
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[N:%.*]] to i2
-; CHECK-NEXT: switch i2 [[TRUNC]], label [[BB1:%.*]] [
-; CHECK-NEXT: i2 1, label [[BB6:%.*]]
-; CHECK-NEXT: i2 -2, label [[BB4:%.*]]
-; CHECK-NEXT: i2 -1, label [[BB5:%.*]]
-; CHECK-NEXT: ]
-; CHECK: bb1:
-; CHECK-NEXT: unreachable
-; CHECK: bb4:
-; CHECK-NEXT: br label [[BB6]]
-; CHECK: bb5:
-; CHECK-NEXT: br label [[BB6]]
-; CHECK: bb6:
-; CHECK-NEXT: [[DOTSROA_0_0:%.*]] = phi i32 [ 4444, [[BB5]] ], [ 3333, [[BB4]] ], [ 2222, [[START:%.*]] ]
-; CHECK-NEXT: ret i32 [[DOTSROA_0_0]]
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i2 [[TRUNC]], 1
+; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = zext i2 [[SWITCH_TABLEIDX]] to i32
+; CHECK-NEXT: [[SWITCH_IDX_MULT:%.*]] = mul nsw i32 [[SWITCH_IDX_CAST]], 1111
+; CHECK-NEXT: [[SWITCH_OFFSET:%.*]] = add nsw i32 [[SWITCH_IDX_MULT]], 2222
+; CHECK-NEXT: ret i32 [[SWITCH_OFFSET]]
;
start:
%trunc = trunc i8 %n to i2
@@ -1895,11 +1878,10 @@ define i32 @signed_overflow_negative(i8 %n) {
; CHECK-LABEL: @signed_overflow_negative(
; CHECK-NEXT: start:
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[N:%.*]] to i2
-; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i2 [[TRUNC]], -2
-; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = zext i2 [[SWITCH_TABLEIDX]] to i32
-; CHECK-NEXT: [[SWITCH_IDX_MULT:%.*]] = mul nsw i32 [[SWITCH_IDX_CAST]], 1111
-; CHECK-NEXT: [[SWITCH_OFFSET:%.*]] = add nsw i32 [[SWITCH_IDX_MULT]], 1111
-; CHECK-NEXT: ret i32 [[SWITCH_OFFSET]]
+; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i2 [[TRUNC]] to i3
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @switch.table.signed_overflow_negative, i32 0, i3 [[SWITCH_TABLEIDX_ZEXT]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT: ret i32 [[SWITCH_LOAD]]
;
start:
%trunc = trunc i8 %n to i2
>From 5085d362e72e9dee3b25de225414e6b1c9baab4a Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sun, 1 Oct 2023 13:38:39 +0800
Subject: [PATCH 3/6] fixup! [SimplifyCFG] Find the smallest table considering
overflow in `switchToLookupTable`
Apply feedback
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 20 +++++++++-----------
1 file changed, 9 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 2a824c284dc3593..1fd745e82f4ec3e 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -6519,20 +6519,20 @@ static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
SmallVector<PHINode *, 4> PHIs;
SmallVector<ConstantInt *, 8> CaseVals;
- for (auto CI : SI->cases())
+ for (const auto &CI : SI->cases())
CaseVals.push_back(CI.getCaseValue());
// We want to find a range of indexes that will create the minimal table.
- // We can treat all possible index values as a circle. For example, the i8 is
- // [-128, -1] and [0, 127]. After that find the minimal range from this circle
- // that can cover all exist values. First, create an incrementing sequence.
- llvm::sort(CaseVals, [](const ConstantInt *A, const ConstantInt *B) {
- return A->getValue().slt(B->getValue());
+ // When considering overflow, the next indexed value of the maximum signed
+ // integer is the minimum signed integer.
+ llvm::sort(CaseVals, [](const auto *L, const auto *R) {
+ return L->getValue().slt(R->getValue());
});
auto *CaseValIter = CaseVals.begin();
- // We start by using the begin and end as the minimal table.
+ // We start by using the minimum signed integer and maximum signed as the
+ // minimal table. It's the most common case.
ConstantInt *BeginCaseVal = *CaseValIter;
- ConstantInt *EndCaseVal = *CaseVals.rbegin();
+ ConstantInt *EndCaseVal = CaseVals.back();
bool RangeOverflow = false;
uint64_t MinTableSize = EndCaseVal->getValue()
.ssub_ov(BeginCaseVal->getValue(), RangeOverflow)
@@ -6540,7 +6540,6 @@ static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
1;
// If there is no overflow, then this must be the minimal table.
if (RangeOverflow) {
- auto MaxValue = APInt::getMaxValue(BeginCaseVal->getBitWidth());
while (CaseValIter != CaseVals.end()) {
auto *CurrentCaseVal = *CaseValIter++;
if (CaseValIter == CaseVals.end())
@@ -6548,8 +6547,7 @@ static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
ConstantInt *NextCaseVal = *CaseValIter;
const APInt &NextVal = NextCaseVal->getValue();
auto CurVal = CurrentCaseVal->getValue();
- uint64_t RequireTableSize =
- (MaxValue - (NextVal - CurVal) + 1).getLimitedValue() + 1;
+ uint64_t RequireTableSize = (CurVal - NextVal).getLimitedValue() + 1;
// FIXME: When there is more than one minimal table, we can choose the
// best one. The current simple strategy may not be the best.
if (((RequireTableSize < MinTableSize) ||
>From 04d4fc57b1993ef6f8c8613ce0dd4ccd66916511 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Thu, 5 Oct 2023 21:07:31 +0800
Subject: [PATCH 4/6] fixup! [SimplifyCFG] Find the smallest table considering
overflow in `switchToLookupTable`
Apply feedback
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 21 ++++++++++-----------
1 file changed, 10 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 1fd745e82f4ec3e..682b84c3be2a501 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -6378,7 +6378,7 @@ ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
}
static bool ShouldUseSwitchConditionAsTableIndex(
- ConstantInt &BeginCaseVal, const ConstantInt &EndCaseVal,
+ const ConstantInt &BeginCaseVal, const ConstantInt &EndCaseVal,
bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes,
const DataLayout &DL, const TargetTransformInfo &TTI) {
if (BeginCaseVal.isNullValue())
@@ -6480,7 +6480,7 @@ static void reuseTableCompare(
/// If the switch is only used to initialize one or more phi nodes in a common
/// successor block with different constant values, replace the switch with
/// lookup tables.
-static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
+static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
DomTreeUpdater *DTU, const DataLayout &DL,
const TargetTransformInfo &TTI) {
assert(SI->getNumCases() > 1 && "Degenerate switch?");
@@ -6518,9 +6518,8 @@ static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
SmallDenseMap<PHINode *, Type *> ResultTypes;
SmallVector<PHINode *, 4> PHIs;
- SmallVector<ConstantInt *, 8> CaseVals;
- for (const auto &CI : SI->cases())
- CaseVals.push_back(CI.getCaseValue());
+ SmallVector<ConstantInt *, 8> CaseVals(llvm::map_range(
+ SI->cases(), [](const auto &C) { return C.getCaseValue(); }));
// We want to find a range of indexes that will create the minimal table.
// When considering overflow, the next indexed value of the maximum signed
@@ -6541,12 +6540,12 @@ static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// If there is no overflow, then this must be the minimal table.
if (RangeOverflow) {
while (CaseValIter != CaseVals.end()) {
- auto *CurrentCaseVal = *CaseValIter++;
+ auto *CurCaseVal = *CaseValIter++;
if (CaseValIter == CaseVals.end())
break;
- ConstantInt *NextCaseVal = *CaseValIter;
- const APInt &NextVal = NextCaseVal->getValue();
- auto CurVal = CurrentCaseVal->getValue();
+ auto *NextCaseVal = *CaseValIter;
+ const auto &NextVal = NextCaseVal->getValue();
+ const auto &CurVal = CurCaseVal->getValue();
uint64_t RequireTableSize = (CurVal - NextVal).getLimitedValue() + 1;
// FIXME: When there is more than one minimal table, we can choose the
// best one. The current simple strategy may not be the best.
@@ -6554,7 +6553,7 @@ static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
(RequireTableSize == MinTableSize &&
NextCaseVal->getValue().isZero()))) {
BeginCaseVal = NextCaseVal;
- EndCaseVal = CurrentCaseVal;
+ EndCaseVal = CurCaseVal;
MinTableSize = RequireTableSize;
}
}
@@ -6921,7 +6920,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// CVP. Therefore, only apply this transformation during late stages of the
// optimisation pipeline.
if (Options.ConvertSwitchToLookupTable &&
- switchToLookupTable(SI, Builder, DTU, DL, TTI))
+ SwitchToLookupTable(SI, Builder, DTU, DL, TTI))
return requestResimplify();
if (ReduceSwitchRange(SI, Builder, DL, TTI))
>From d48beac29dabf1ead6f016552c0bd33e934c96f2 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Wed, 22 Nov 2023 22:18:00 +0800
Subject: [PATCH 5/6] fixup! [SimplifyCFG] Find the smallest table considering
overflow in `switchToLookupTable`
When a signed max-min cannot construct a lookup table, fall back to the min lookup table.
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 19 ++---
.../SimplifyCFG/X86/CoveredLookupTable.ll | 13 ++--
.../SimplifyCFG/X86/switch-covered-bug.ll | 12 +---
.../SimplifyCFG/X86/switch-table-bug.ll | 7 +-
.../SimplifyCFG/X86/switch_to_lookup_table.ll | 72 ++++++++++---------
5 files changed, 61 insertions(+), 62 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 682b84c3be2a501..74d737954eb4ee8 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -6482,7 +6482,8 @@ static void reuseTableCompare(
/// lookup tables.
static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
DomTreeUpdater *DTU, const DataLayout &DL,
- const TargetTransformInfo &TTI) {
+ const TargetTransformInfo &TTI,
+ bool TryMinTableSize) {
assert(SI->getNumCases() > 1 && "Degenerate switch?");
BasicBlock *BB = SI->getParent();
@@ -6538,7 +6539,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
.getLimitedValue() +
1;
// If there is no overflow, then this must be the minimal table.
- if (RangeOverflow) {
+ // The signed max-min can no longer build a lookup table, so return.
+ if (RangeOverflow && TryMinTableSize) {
while (CaseValIter != CaseVals.end()) {
auto *CurCaseVal = *CaseValIter++;
if (CaseValIter == CaseVals.end())
@@ -6547,11 +6549,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
const auto &NextVal = NextCaseVal->getValue();
const auto &CurVal = CurCaseVal->getValue();
uint64_t RequireTableSize = (CurVal - NextVal).getLimitedValue() + 1;
- // FIXME: When there is more than one minimal table, we can choose the
- // best one. The current simple strategy may not be the best.
- if (((RequireTableSize < MinTableSize) ||
- (RequireTableSize == MinTableSize &&
- NextCaseVal->getValue().isZero()))) {
+ if (RequireTableSize < MinTableSize) {
BeginCaseVal = NextCaseVal;
EndCaseVal = CurCaseVal;
MinTableSize = RequireTableSize;
@@ -6618,7 +6616,10 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
}
if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
- return false;
+ // When a signed max-min cannot construct a lookup table, try to find a
+ // range with a minimal lookup table.
+ return !TryMinTableSize &&
+ SwitchToLookupTable(SI, Builder, DTU, DL, TTI, true);
std::vector<DominatorTree::UpdateType> Updates;
@@ -6920,7 +6921,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// CVP. Therefore, only apply this transformation during late stages of the
// optimisation pipeline.
if (Options.ConvertSwitchToLookupTable &&
- SwitchToLookupTable(SI, Builder, DTU, DL, TTI))
+ SwitchToLookupTable(SI, Builder, DTU, DL, TTI, false))
return requestResimplify();
if (ReduceSwitchRange(SI, Builder, DL, TTI))
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/CoveredLookupTable.ll b/llvm/test/Transforms/SimplifyCFG/X86/CoveredLookupTable.ll
index 6d72fdb7085159c..56fa249d23bba2d 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/CoveredLookupTable.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/CoveredLookupTable.ll
@@ -9,13 +9,12 @@ target triple = "x86_64-apple-darwin12.0.0"
define i3 @coveredswitch_test(i3 %input) {
; CHECK-LABEL: @coveredswitch_test(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i3 [[INPUT:%.*]], -2
-; CHECK-NEXT: [[SWITCH_CAST:%.*]] = zext i3 [[INPUT]] to i18
-; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul nuw nsw i18 [[SWITCH_CAST]], 3
-; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i18 42792, [[SWITCH_SHIFTAMT]]
-; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i18 [[SWITCH_DOWNSHIFT]] to i3
-; CHECK-NEXT: [[RESULT:%.*]] = select i1 [[TMP0]], i3 [[SWITCH_MASKED]], i3 -2
-; CHECK-NEXT: ret i3 [[RESULT]]
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[INPUT:%.*]], -4
+; CHECK-NEXT: [[SWITCH_CAST:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i24
+; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul nuw nsw i24 [[SWITCH_CAST]], 3
+; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i24 7507338, [[SWITCH_SHIFTAMT]]
+; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i24 [[SWITCH_DOWNSHIFT]] to i3
+; CHECK-NEXT: ret i3 [[SWITCH_MASKED]]
;
entry:
switch i3 %input, label %bb8 [
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
index 8e291b025113d85..cf244e6b63457c3 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
@@ -9,17 +9,11 @@ target triple = "x86_64-apple-darwin12.0.0"
define i64 @test(i3 %arg) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[ARG:%.*]], 1
-; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i3 [[SWITCH_TABLEIDX]], -2
-; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[DEFAULT:%.*]]
-; CHECK: switch.lookup:
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[ARG:%.*]], -4
; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i4
-; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [6 x i64], ptr @switch.table.test, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i64], ptr @switch.table.test, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i64, ptr [[SWITCH_GEP]], align 8
-; CHECK-NEXT: br label [[DEFAULT]]
-; CHECK: Default:
-; CHECK-NEXT: [[V1:%.*]] = phi i64 [ 8, [[ENTRY:%.*]] ], [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ]
-; CHECK-NEXT: [[V3:%.*]] = add i64 [[V1]], 0
+; CHECK-NEXT: [[V3:%.*]] = add i64 [[SWITCH_LOAD]], 0
; CHECK-NEXT: ret i64 [[V3]]
;
entry:
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
index 75cee53d230374d..37001f4fba2aa84 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
@@ -9,8 +9,11 @@ target triple = "x86_64-apple-darwin12.0.0"
define i64 @_TFO6reduce1E5toRawfS0_FT_Si(i2) {
; CHECK-LABEL: @_TFO6reduce1E5toRawfS0_FT_Si(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = zext i2 [[TMP0:%.*]] to i64
-; CHECK-NEXT: ret i64 [[SWITCH_IDX_CAST]]
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i2 [[TMP0:%.*]], -2
+; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i2 [[SWITCH_TABLEIDX]] to i3
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i64], ptr @switch.table._TFO6reduce1E5toRawfS0_FT_Si, i32 0, i3 [[SWITCH_TABLEIDX_ZEXT]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i64, ptr [[SWITCH_GEP]], align 8
+; CHECK-NEXT: ret i64 [[SWITCH_LOAD]]
;
entry:
switch i2 %0, label %1 [
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index b05b013a96f8454..dde39aea3c1fd57 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -159,17 +159,11 @@ return:
define i32 @f_min_max(i3 %c) {
; CHECK-LABEL: @f_min_max(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[C:%.*]], 3
-; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i3 [[SWITCH_TABLEIDX]], -2
-; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
-; CHECK: switch.lookup:
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[C:%.*]], -4
; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i4
-; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [6 x i32], ptr @switch.table.f_min_max, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], ptr @switch.table.f_min_max, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: return:
-; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ 15, [[ENTRY:%.*]] ]
-; CHECK-NEXT: ret i32 [[RETVAL_0]]
+; CHECK-NEXT: ret i32 [[SWITCH_LOAD]]
;
entry:
switch i3 %c, label %sw.default [
@@ -197,17 +191,11 @@ return:
define i32 @f_min_max_2(i3 %c) {
; CHECK-LABEL: @f_min_max_2(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[C:%.*]], -1
-; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i3 [[SWITCH_TABLEIDX]], -2
-; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
-; CHECK: switch.lookup:
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[C:%.*]], -4
; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i4
-; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [6 x i32], ptr @switch.table.f_min_max_2, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], ptr @switch.table.f_min_max_2, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: return:
-; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ 15, [[ENTRY:%.*]] ]
-; CHECK-NEXT: ret i32 [[RETVAL_0]]
+; CHECK-NEXT: ret i32 [[SWITCH_LOAD]]
;
entry:
switch i3 %c, label %sw.default [
@@ -1653,11 +1641,14 @@ end:
define i32 @covered_switch_with_bit_tests(i3) {
; CHECK-LABEL: @covered_switch_with_bit_tests(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[TMP0:%.*]], 2
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i3 [[SWITCH_TABLEIDX]], -4
-; CHECK-NEXT: br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[L6:%.*]]
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[TMP0:%.*]], -4
+; CHECK-NEXT: [[SWITCH_MASKINDEX:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i8
+; CHECK-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i8 -61, [[SWITCH_MASKINDEX]]
+; CHECK-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i8 [[SWITCH_SHIFTED]] to i1
+; CHECK-NEXT: br i1 [[SWITCH_LOBIT]], label [[SWITCH_LOOKUP:%.*]], label [[L6:%.*]]
; CHECK: switch.lookup:
-; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @switch.table.covered_switch_with_bit_tests, i32 0, i3 [[SWITCH_TABLEIDX]]
+; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i4
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], ptr @switch.table.covered_switch_with_bit_tests, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
; CHECK-NEXT: br label [[L6]]
; CHECK: l6:
@@ -1808,10 +1799,11 @@ define i32 @signed_overflow1(i8 %n) {
; CHECK-LABEL: @signed_overflow1(
; CHECK-NEXT: start:
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[N:%.*]] to i2
-; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = zext i2 [[TRUNC]] to i32
-; CHECK-NEXT: [[SWITCH_IDX_MULT:%.*]] = mul nsw i32 [[SWITCH_IDX_CAST]], 1111
-; CHECK-NEXT: [[SWITCH_OFFSET:%.*]] = add nsw i32 [[SWITCH_IDX_MULT]], 1111
-; CHECK-NEXT: ret i32 [[SWITCH_OFFSET]]
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i2 [[TRUNC]], -2
+; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i2 [[SWITCH_TABLEIDX]] to i3
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @switch.table.signed_overflow1, i32 0, i3 [[SWITCH_TABLEIDX_ZEXT]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT: ret i32 [[SWITCH_LOAD]]
;
start:
%trunc = trunc i8 %n to i2
@@ -1843,11 +1835,20 @@ define i32 @signed_overflow2(i8 %n) {
; CHECK-LABEL: @signed_overflow2(
; CHECK-NEXT: start:
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[N:%.*]] to i2
-; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i2 [[TRUNC]], 1
-; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = zext i2 [[SWITCH_TABLEIDX]] to i32
-; CHECK-NEXT: [[SWITCH_IDX_MULT:%.*]] = mul nsw i32 [[SWITCH_IDX_CAST]], 1111
-; CHECK-NEXT: [[SWITCH_OFFSET:%.*]] = add nsw i32 [[SWITCH_IDX_MULT]], 2222
-; CHECK-NEXT: ret i32 [[SWITCH_OFFSET]]
+; CHECK-NEXT: switch i2 [[TRUNC]], label [[BB1:%.*]] [
+; CHECK-NEXT: i2 1, label [[BB6:%.*]]
+; CHECK-NEXT: i2 -2, label [[BB4:%.*]]
+; CHECK-NEXT: i2 -1, label [[BB5:%.*]]
+; CHECK-NEXT: ]
+; CHECK: bb1:
+; CHECK-NEXT: unreachable
+; CHECK: bb4:
+; CHECK-NEXT: br label [[BB6]]
+; CHECK: bb5:
+; CHECK-NEXT: br label [[BB6]]
+; CHECK: bb6:
+; CHECK-NEXT: [[DOTSROA_0_0:%.*]] = phi i32 [ 4444, [[BB5]] ], [ 3333, [[BB4]] ], [ 2222, [[START:%.*]] ]
+; CHECK-NEXT: ret i32 [[DOTSROA_0_0]]
;
start:
%trunc = trunc i8 %n to i2
@@ -1878,10 +1879,11 @@ define i32 @signed_overflow_negative(i8 %n) {
; CHECK-LABEL: @signed_overflow_negative(
; CHECK-NEXT: start:
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[N:%.*]] to i2
-; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i2 [[TRUNC]] to i3
-; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @switch.table.signed_overflow_negative, i32 0, i3 [[SWITCH_TABLEIDX_ZEXT]]
-; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
-; CHECK-NEXT: ret i32 [[SWITCH_LOAD]]
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i2 [[TRUNC]], -2
+; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = zext i2 [[SWITCH_TABLEIDX]] to i32
+; CHECK-NEXT: [[SWITCH_IDX_MULT:%.*]] = mul nsw i32 [[SWITCH_IDX_CAST]], 1111
+; CHECK-NEXT: [[SWITCH_OFFSET:%.*]] = add nsw i32 [[SWITCH_IDX_MULT]], 1111
+; CHECK-NEXT: ret i32 [[SWITCH_OFFSET]]
;
start:
%trunc = trunc i8 %n to i2
>From e62ef9e6464329c77997ca5599cefe8ddea3545c Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Wed, 22 Nov 2023 22:32:59 +0800
Subject: [PATCH 6/6] fixup! [SimplifyCFG] Find the smallest table considering
overflow in `switchToLookupTable`
Update comments.
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 6b1db04e7f0da26..affed9c258db746 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -6555,15 +6555,10 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
SmallVector<ConstantInt *, 8> CaseVals(llvm::map_range(
SI->cases(), [](const auto &C) { return C.getCaseValue(); }));
- // We want to find a range of indexes that will create the minimal table.
- // When considering overflow, the next indexed value of the maximum signed
- // integer is the minimum signed integer.
llvm::sort(CaseVals, [](const auto *L, const auto *R) {
return L->getValue().slt(R->getValue());
});
auto *CaseValIter = CaseVals.begin();
- // We start by using the minimum signed integer and maximum signed as the
- // minimal table. It's the most common case.
ConstantInt *BeginCaseVal = *CaseValIter;
ConstantInt *EndCaseVal = CaseVals.back();
bool RangeOverflow = false;
@@ -6574,6 +6569,9 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// If there is no overflow, then this must be the minimal table.
// The signed max-min can no longer build a lookup table, so return.
if (RangeOverflow && TryMinTableSize) {
+ // We consider cases where the starting to the endpoint will cross the
+ // signed max and min. For example, for the i8 range `[-128, -127, 126,
+ // 127]`, we choose from 126 to -127. The length of the lookup table is 4.
while (CaseValIter != CaseVals.end()) {
auto *CurCaseVal = *CaseValIter++;
if (CaseValIter == CaseVals.end())
More information about the cfe-commits
mailing list