[llvm] [SimplifyCFG] Relax `cttz` cost check in `simplifySwitchOfPowersOfTwo` (PR #145159)
Antonio Frighetto via llvm-commits
llvm-commits at lists.llvm.org
Sat Jun 21 12:15:30 PDT 2025
https://github.com/antoniofrighetto updated https://github.com/llvm/llvm-project/pull/145159
>From cbcff3d5c987b4859d7848463b09f73804e9e6da Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Sat, 21 Jun 2025 21:12:24 +0200
Subject: [PATCH 1/2] [SimplifyCFG] Precommit test (NFC)
---
.../X86/switch-of-powers-of-two.ll | 50 +++++++++++++++++++
1 file changed, 50 insertions(+)
create mode 100644 llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
new file mode 100644
index 0000000000000..d19ecc4d92eda
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes='simplifycfg<switch-to-lookup>' -simplifycfg-require-and-preserve-domtree=1 -S < %s | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @switch_of_powers_two(i32 %arg) {
+; CHECK-LABEL: @switch_of_powers_two(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: switch i32 [[ARG:%.*]], label [[DEFAULT_CASE:%.*]] [
+; CHECK-NEXT: i32 1, label [[RETURN:%.*]]
+; CHECK-NEXT: i32 8, label [[BB2:%.*]]
+; CHECK-NEXT: i32 16, label [[BB3:%.*]]
+; CHECK-NEXT: i32 32, label [[BB4:%.*]]
+; CHECK-NEXT: i32 64, label [[BB5:%.*]]
+; CHECK-NEXT: ]
+; CHECK: default_case:
+; CHECK-NEXT: unreachable
+; CHECK: bb2:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: bb3:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: bb4:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: bb5:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: return:
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = phi i32 [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret i32 [[SWITCH_LOAD]]
+;
+entry:
+ switch i32 %arg, label %default_case [
+ i32 1, label %bb1
+ i32 8, label %bb2
+ i32 16, label %bb3
+ i32 32, label %bb4
+ i32 64, label %bb5
+ ]
+
+
+default_case: unreachable
+bb1: br label %return
+bb2: br label %return
+bb3: br label %return
+bb4: br label %return
+bb5: br label %return
+
+return:
+ %phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ]
+ ret i32 %phi
+}
>From 930ae93964e400ecffea060fe37e8b7f347a691b Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Sat, 21 Jun 2025 21:13:16 +0200
Subject: [PATCH 2/2] [SimplifyCFG] Relax `cttz` cost check in
`simplifySwitchOfPowersOfTwo`
We should be able to allow `simplifySwitchOfPowersOfTwo` transform
to take place, as, on recent X86 targets, the weighted latency-size
appears to be 2. This favours computing trailing zeroes and indexing
into a smaller value table, over generating a jump table with an
indirect branch (which overall should be more efficient).
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 20 ++++++++---------
.../X86/switch-of-powers-of-two.ll | 22 +++----------------
2 files changed, 13 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index eb52c1b7e6fba..e205551658aa5 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7198,8 +7198,10 @@ static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
/// will be transformed to:
/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
///
-/// This transformation allows better lowering and could allow transforming into
-/// a lookup table.
+/// This transformation allows better lowering and may transform the switch
+/// instruction into a sequence of bit manipulation and a smaller
+/// log2(C)-indexed value table (instead of traditionally emitting a load of the
+/// address of the jump target, and indirectly jump to it).
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
const DataLayout &DL,
const TargetTransformInfo &TTI) {
@@ -7211,17 +7213,15 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
!DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
return false;
- const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
- IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
- {Condition, ConstantInt::getTrue(Context)}),
- TTI::TCK_SizeAndLatency);
-
- if (CttzIntrinsicCost > TTI::TCC_Basic)
- // Inserting intrinsic is too expensive.
+ // Ensure trailing zeroes count intrinsic emission is not too expensive.
+ IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
+ {Condition, ConstantInt::getTrue(Context)});
+ if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
+ TTI::TCC_Basic * 2)
return false;
// Only bother with this optimization if there are more than 3 switch cases.
- // SDAG will only bother creating jump tables for 4 or more cases.
+ // SDAG will start emitting jump tables for 4 or more cases.
if (SI->getNumCases() < 4)
return false;
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
index d19ecc4d92eda..0076efd303419 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
@@ -6,25 +6,9 @@ target triple = "x86_64-unknown-linux-gnu"
define i32 @switch_of_powers_two(i32 %arg) {
; CHECK-LABEL: @switch_of_powers_two(
; CHECK-NEXT: entry:
-; CHECK-NEXT: switch i32 [[ARG:%.*]], label [[DEFAULT_CASE:%.*]] [
-; CHECK-NEXT: i32 1, label [[RETURN:%.*]]
-; CHECK-NEXT: i32 8, label [[BB2:%.*]]
-; CHECK-NEXT: i32 16, label [[BB3:%.*]]
-; CHECK-NEXT: i32 32, label [[BB4:%.*]]
-; CHECK-NEXT: i32 64, label [[BB5:%.*]]
-; CHECK-NEXT: ]
-; CHECK: default_case:
-; CHECK-NEXT: unreachable
-; CHECK: bb2:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: bb3:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: bb4:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: bb5:
-; CHECK-NEXT: br label [[RETURN]]
-; CHECK: return:
-; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = phi i32 [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG:%.*]], i1 true)
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_two, i32 0, i32 [[TMP0]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
; CHECK-NEXT: ret i32 [[SWITCH_LOAD]]
;
entry:
More information about the llvm-commits
mailing list