[llvm] [SimplifyCFG] Relax `cttz` cost check in `simplifySwitchOfPowersOfTwo` (PR #145159)

Antonio Frighetto via llvm-commits llvm-commits at lists.llvm.org
Sat Jun 21 12:15:30 PDT 2025


https://github.com/antoniofrighetto updated https://github.com/llvm/llvm-project/pull/145159

>From cbcff3d5c987b4859d7848463b09f73804e9e6da Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Sat, 21 Jun 2025 21:12:24 +0200
Subject: [PATCH 1/2] [SimplifyCFG] Precommit test (NFC)

---
 .../X86/switch-of-powers-of-two.ll            | 50 +++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll

diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
new file mode 100644
index 0000000000000..d19ecc4d92eda
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes='simplifycfg<switch-to-lookup>' -simplifycfg-require-and-preserve-domtree=1 -S < %s | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @switch_of_powers_two(i32 %arg) {
+; CHECK-LABEL: @switch_of_powers_two(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[ARG:%.*]], label [[DEFAULT_CASE:%.*]] [
+; CHECK-NEXT:      i32 1, label [[RETURN:%.*]]
+; CHECK-NEXT:      i32 8, label [[BB2:%.*]]
+; CHECK-NEXT:      i32 16, label [[BB3:%.*]]
+; CHECK-NEXT:      i32 32, label [[BB4:%.*]]
+; CHECK-NEXT:      i32 64, label [[BB5:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       default_case:
+; CHECK-NEXT:    unreachable
+; CHECK:       bb2:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       bb3:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       bb4:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       bb5:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = phi i32 [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
+;
+entry:
+  switch i32 %arg, label %default_case [
+  i32 1,  label %bb1
+  i32 8,  label %bb2
+  i32 16, label %bb3
+  i32 32, label %bb4
+  i32 64, label %bb5
+  ]
+
+
+default_case: unreachable
+bb1: br label %return
+bb2: br label %return
+bb3: br label %return
+bb4: br label %return
+bb5: br label %return
+
+return:
+  %phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ]
+  ret i32 %phi
+}

>From 930ae93964e400ecffea060fe37e8b7f347a691b Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Sat, 21 Jun 2025 21:13:16 +0200
Subject: [PATCH 2/2] [SimplifyCFG] Relax `cttz` cost check in
 `simplifySwitchOfPowersOfTwo`

We should be able to allow `simplifySwitchOfPowersOfTwo` transform
to take place, as, on recent X86 targets, the weighted latency-size
appears to be 2. This favours computing trailing zeroes and indexing
into a smaller value table, over generating a jump table with an
indirect branch (which overall should be more efficient).
---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     | 20 ++++++++---------
 .../X86/switch-of-powers-of-two.ll            | 22 +++----------------
 2 files changed, 13 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index eb52c1b7e6fba..e205551658aa5 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7198,8 +7198,10 @@ static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
 /// will be transformed to:
 /// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
 ///
-/// This transformation allows better lowering and could allow transforming into
-/// a lookup table.
+/// This transformation allows better lowering and may transform the switch
+/// instruction into a sequence of bit manipulation and a smaller
+/// log2(C)-indexed value table (instead of traditionally emitting a load of the
+/// address of the jump target, and indirectly jump to it).
 static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
                                         const DataLayout &DL,
                                         const TargetTransformInfo &TTI) {
@@ -7211,17 +7213,15 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
       !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
     return false;
 
-  const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
-      IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
-                              {Condition, ConstantInt::getTrue(Context)}),
-      TTI::TCK_SizeAndLatency);
-
-  if (CttzIntrinsicCost > TTI::TCC_Basic)
-    // Inserting intrinsic is too expensive.
+  // Ensure trailing zeroes count intrinsic emission is not too expensive.
+  IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
+                                {Condition, ConstantInt::getTrue(Context)});
+  if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
+      TTI::TCC_Basic * 2)
     return false;
 
   // Only bother with this optimization if there are more than 3 switch cases.
-  // SDAG will only bother creating jump tables for 4 or more cases.
+  // SDAG will start emitting jump tables for 4 or more cases.
   if (SI->getNumCases() < 4)
     return false;
 
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
index d19ecc4d92eda..0076efd303419 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
@@ -6,25 +6,9 @@ target triple = "x86_64-unknown-linux-gnu"
 define i32 @switch_of_powers_two(i32 %arg) {
 ; CHECK-LABEL: @switch_of_powers_two(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i32 [[ARG:%.*]], label [[DEFAULT_CASE:%.*]] [
-; CHECK-NEXT:      i32 1, label [[RETURN:%.*]]
-; CHECK-NEXT:      i32 8, label [[BB2:%.*]]
-; CHECK-NEXT:      i32 16, label [[BB3:%.*]]
-; CHECK-NEXT:      i32 32, label [[BB4:%.*]]
-; CHECK-NEXT:      i32 64, label [[BB5:%.*]]
-; CHECK-NEXT:    ]
-; CHECK:       default_case:
-; CHECK-NEXT:    unreachable
-; CHECK:       bb2:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       bb3:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       bb4:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       bb5:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       return:
-; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = phi i32 [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG:%.*]], i1 true)
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_two, i32 0, i32 [[TMP0]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
 ; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
 ;
 entry:



More information about the llvm-commits mailing list