[llvm] [SimplifyCFG] Relax `cttz` cost check in `simplifySwitchOfPowersOfTwo` (PR #145159)

Sat Jun 21 03:51:09 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Antonio Frighetto (antoniofrighetto)

<details>
<summary>Changes</summary>

We should be able to allow `simplifySwitchOfPowersOfTwo` transform to take place, as, on recent X86 targets, the weighted latency-size appears to be 2. This favours computing trailing zeroes and indexing into a smaller value table, over generating a jump table with an indirect branch (which overall should be more efficient). Also, let the simplification proceed even if the default case may be reachable.

---
Full diff: https://github.com/llvm/llvm-project/pull/145159.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Utils/SimplifyCFG.cpp (+10-16) 
- (modified) llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll (+37-23) 


``````````diff

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index eb52c1b7e6fba..f4f15c8809c94 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7198,8 +7198,10 @@ static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
 /// will be transformed to:
 /// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
 ///
-/// This transformation allows better lowering and could allow transforming into
-/// a lookup table.
+/// This transformation allows better lowering and may transform the switch
+/// instruction into a sequence of bit manipulation and a smaller
+/// log2(C)-indexed value table (instead of traditionally emitting a load of the
+/// address of the jump target, and indirectly jump to it).
 static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
                                         const DataLayout &DL,
                                         const TargetTransformInfo &TTI) {
@@ -7211,26 +7213,18 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
       !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
     return false;
 
-  const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
-      IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
-                              {Condition, ConstantInt::getTrue(Context)}),
-      TTI::TCK_SizeAndLatency);
-
-  if (CttzIntrinsicCost > TTI::TCC_Basic)
-    // Inserting intrinsic is too expensive.
+  // Ensure trailing zeroes count intrinsic emission is not too expensive.
+  IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
+                                {Condition, ConstantInt::getTrue(Context)});
+  if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
+      TTI::TCC_Basic * 2)
     return false;
 
   // Only bother with this optimization if there are more than 3 switch cases.
-  // SDAG will only bother creating jump tables for 4 or more cases.
+  // SDAG will start emitting jump tables for 4 or more cases.
   if (SI->getNumCases() < 4)
     return false;
 
-  // We perform this optimization only for switches with
-  // unreachable default case.
-  // This assumtion will save us from checking if `Condition` is a power of two.
-  if (!SI->defaultDestUnreachable())
-    return false;
-
   // Check that switch cases are powers of two.
   SmallVector<uint64_t, 4> Values;
   for (const auto &Case : SI->cases()) {
diff --git a/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
index 2ac94afd95910..37e771fc33b64 100644
--- a/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
+++ b/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
@@ -60,30 +60,44 @@ return:
   ret i32 %p
 }
 
-; Check that switch's of powers of two range is not reduced if default case is reachable
+; Check that switch's of powers of two range is not reduced if default case is reachable,
+; unless Zbb extension is on.
 define i32 @switch_of_powers_reachable_default(i32 %x) {
-; CHECK-LABEL: @switch_of_powers_reachable_default(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i32 [[X:%.*]], label [[RETURN:%.*]] [
-; CHECK-NEXT:      i32 1, label [[BB1:%.*]]
-; CHECK-NEXT:      i32 8, label [[BB2:%.*]]
-; CHECK-NEXT:      i32 16, label [[BB3:%.*]]
-; CHECK-NEXT:      i32 32, label [[BB4:%.*]]
-; CHECK-NEXT:      i32 64, label [[BB5:%.*]]
-; CHECK-NEXT:    ]
-; CHECK:       bb1:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       bb2:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       bb3:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       bb4:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       bb5:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       return:
-; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 3, [[BB1]] ], [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ -1, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[P]]
+; RV64I-LABEL: @switch_of_powers_reachable_default(
+; RV64I-NEXT:  entry:
+; RV64I-NEXT:    switch i32 [[X:%.*]], label [[RETURN:%.*]] [
+; RV64I-NEXT:      i32 1, label [[BB1:%.*]]
+; RV64I-NEXT:      i32 8, label [[BB2:%.*]]
+; RV64I-NEXT:      i32 16, label [[BB3:%.*]]
+; RV64I-NEXT:      i32 32, label [[BB4:%.*]]
+; RV64I-NEXT:      i32 64, label [[BB5:%.*]]
+; RV64I-NEXT:    ]
+; RV64I:       bb1:
+; RV64I-NEXT:    br label [[RETURN]]
+; RV64I:       bb2:
+; RV64I-NEXT:    br label [[RETURN]]
+; RV64I:       bb3:
+; RV64I-NEXT:    br label [[RETURN]]
+; RV64I:       bb4:
+; RV64I-NEXT:    br label [[RETURN]]
+; RV64I:       bb5:
+; RV64I-NEXT:    br label [[RETURN]]
+; RV64I:       return:
+; RV64I-NEXT:    [[P:%.*]] = phi i32 [ 3, [[BB1]] ], [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ -1, [[ENTRY:%.*]] ]
+; RV64I-NEXT:    ret i32 [[P]]
+;
+; RV64ZBB-LABEL: @switch_of_powers_reachable_default(
+; RV64ZBB-NEXT:  entry:
+; RV64ZBB-NEXT:    [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true)
+; RV64ZBB-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 7
+; RV64ZBB-NEXT:    br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; RV64ZBB:       switch.lookup:
+; RV64ZBB-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_reachable_default, i32 0, i32 [[TMP0]]
+; RV64ZBB-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; RV64ZBB-NEXT:    br label [[RETURN]]
+; RV64ZBB:       return:
+; RV64ZBB-NEXT:    [[P:%.*]] = phi i32 [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ -1, [[ENTRY:%.*]] ]
+; RV64ZBB-NEXT:    ret i32 [[P]]
 ;
 entry:
   switch i32 %x, label %default_case [

``````````

</details>


https://github.com/llvm/llvm-project/pull/145159