[llvm] [SimplifyCFG] Relax `cttz` cost check in `simplifySwitchOfPowersOfTwo` (PR #145159)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jun 21 03:51:09 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Antonio Frighetto (antoniofrighetto)
<details>
<summary>Changes</summary>
We should be able to allow `simplifySwitchOfPowersOfTwo` transform to take place, as, on recent X86 targets, the weighted latency-size appears to be 2. This favours computing trailing zeroes and indexing into a smaller value table, over generating a jump table with an indirect branch (which overall should be more efficient). Also, let the simplification proceed even if the default case may be reachable.
---
Full diff: https://github.com/llvm/llvm-project/pull/145159.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Utils/SimplifyCFG.cpp (+10-16)
- (modified) llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll (+37-23)
``````````diff
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index eb52c1b7e6fba..f4f15c8809c94 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7198,8 +7198,10 @@ static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
/// will be transformed to:
/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
///
-/// This transformation allows better lowering and could allow transforming into
-/// a lookup table.
+/// This transformation allows better lowering and may transform the switch
+/// instruction into a sequence of bit manipulation and a smaller
+/// log2(C)-indexed value table (instead of traditionally emitting a load of the
+/// address of the jump target, and indirectly jump to it).
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
const DataLayout &DL,
const TargetTransformInfo &TTI) {
@@ -7211,26 +7213,18 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
!DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
return false;
- const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
- IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
- {Condition, ConstantInt::getTrue(Context)}),
- TTI::TCK_SizeAndLatency);
-
- if (CttzIntrinsicCost > TTI::TCC_Basic)
- // Inserting intrinsic is too expensive.
+ // Ensure trailing zeroes count intrinsic emission is not too expensive.
+ IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
+ {Condition, ConstantInt::getTrue(Context)});
+ if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
+ TTI::TCC_Basic * 2)
return false;
// Only bother with this optimization if there are more than 3 switch cases.
- // SDAG will only bother creating jump tables for 4 or more cases.
+ // SDAG will start emitting jump tables for 4 or more cases.
if (SI->getNumCases() < 4)
return false;
- // We perform this optimization only for switches with
- // unreachable default case.
- // This assumtion will save us from checking if `Condition` is a power of two.
- if (!SI->defaultDestUnreachable())
- return false;
-
// Check that switch cases are powers of two.
SmallVector<uint64_t, 4> Values;
for (const auto &Case : SI->cases()) {
diff --git a/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
index 2ac94afd95910..37e771fc33b64 100644
--- a/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
+++ b/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
@@ -60,30 +60,44 @@ return:
ret i32 %p
}
-; Check that switch's of powers of two range is not reduced if default case is reachable
+; Check that switch's of powers of two range is not reduced if default case is reachable,
+; unless Zbb extension is on.
define i32 @switch_of_powers_reachable_default(i32 %x) {
-; CHECK-LABEL: @switch_of_powers_reachable_default(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: switch i32 [[X:%.*]], label [[RETURN:%.*]] [
-; CHECK-NEXT: i32 1, label [[BB1:%.*]]
-; CHECK-NEXT: i32 8, label [[BB2:%.*]]
-; CHECK-NEXT: i32 16, label [[BB3:%.*]]
-; CHECK-NEXT: i32 32, label [[BB4:%.*]]
-; CHECK-NEXT: i32 64, label [[BB5:%.*]]
-; CHECK-NEXT: ]
-; CHECK: bb1:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: bb2:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: bb3:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: bb4:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: bb5:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: return:
-; CHECK-NEXT: [[P:%.*]] = phi i32 [ 3, [[BB1]] ], [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ -1, [[ENTRY:%.*]] ]
-; CHECK-NEXT: ret i32 [[P]]
+; RV64I-LABEL: @switch_of_powers_reachable_default(
+; RV64I-NEXT: entry:
+; RV64I-NEXT: switch i32 [[X:%.*]], label [[RETURN:%.*]] [
+; RV64I-NEXT: i32 1, label [[BB1:%.*]]
+; RV64I-NEXT: i32 8, label [[BB2:%.*]]
+; RV64I-NEXT: i32 16, label [[BB3:%.*]]
+; RV64I-NEXT: i32 32, label [[BB4:%.*]]
+; RV64I-NEXT: i32 64, label [[BB5:%.*]]
+; RV64I-NEXT: ]
+; RV64I: bb1:
+; RV64I-NEXT: br label [[RETURN]]
+; RV64I: bb2:
+; RV64I-NEXT: br label [[RETURN]]
+; RV64I: bb3:
+; RV64I-NEXT: br label [[RETURN]]
+; RV64I: bb4:
+; RV64I-NEXT: br label [[RETURN]]
+; RV64I: bb5:
+; RV64I-NEXT: br label [[RETURN]]
+; RV64I: return:
+; RV64I-NEXT: [[P:%.*]] = phi i32 [ 3, [[BB1]] ], [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ -1, [[ENTRY:%.*]] ]
+; RV64I-NEXT: ret i32 [[P]]
+;
+; RV64ZBB-LABEL: @switch_of_powers_reachable_default(
+; RV64ZBB-NEXT: entry:
+; RV64ZBB-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true)
+; RV64ZBB-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 7
+; RV64ZBB-NEXT: br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; RV64ZBB: switch.lookup:
+; RV64ZBB-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_reachable_default, i32 0, i32 [[TMP0]]
+; RV64ZBB-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; RV64ZBB-NEXT: br label [[RETURN]]
+; RV64ZBB: return:
+; RV64ZBB-NEXT: [[P:%.*]] = phi i32 [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ -1, [[ENTRY:%.*]] ]
+; RV64ZBB-NEXT: ret i32 [[P]]
;
entry:
switch i32 %x, label %default_case [
``````````
</details>
https://github.com/llvm/llvm-project/pull/145159
More information about the llvm-commits
mailing list