[llvm] [SimplifyCFG] Relax `cttz` cost check in `simplifySwitchOfPowersOfTwo` (PR #145159)
Antonio Frighetto via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 24 00:06:44 PDT 2025
https://github.com/antoniofrighetto updated https://github.com/llvm/llvm-project/pull/145159
>From 1247fddf3676543837b71965c88f021072513c63 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Tue, 24 Jun 2025 09:06:18 +0200
Subject: [PATCH] [SimplifyCFG] Relax `cttz` cost check in
`simplifySwitchOfPowersOfTwo`
We should be able to allow `simplifySwitchOfPowersOfTwo` transform
to take place, as, on recent X86 targets, the weighted latency-size
appears to be 2. This favours computing trailing zeroes and indexing
into a smaller value table, over generating a jump table with an
indirect branch, which overall should be more efficient.
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 20 +++++------
.../X86/switch-of-powers-of-two.ll | 35 +++++++++++++++++++
2 files changed, 45 insertions(+), 10 deletions(-)
create mode 100644 llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index eb52c1b7e6fba..e205551658aa5 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7198,8 +7198,10 @@ static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
/// will be transformed to:
/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
///
-/// This transformation allows better lowering and could allow transforming into
-/// a lookup table.
+/// This transformation allows better lowering and may transform the switch
+/// instruction into a sequence of bit manipulation and a smaller
+/// log2(C)-indexed value table (instead of traditionally emitting a load of the
+/// address of the jump target, and indirectly jump to it).
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
const DataLayout &DL,
const TargetTransformInfo &TTI) {
@@ -7211,17 +7213,15 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
!DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
return false;
- const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
- IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
- {Condition, ConstantInt::getTrue(Context)}),
- TTI::TCK_SizeAndLatency);
-
- if (CttzIntrinsicCost > TTI::TCC_Basic)
- // Inserting intrinsic is too expensive.
+ // Ensure trailing zeroes count intrinsic emission is not too expensive.
+ IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
+ {Condition, ConstantInt::getTrue(Context)});
+ if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
+ TTI::TCC_Basic * 2)
return false;
// Only bother with this optimization if there are more than 3 switch cases.
- // SDAG will only bother creating jump tables for 4 or more cases.
+ // SDAG will start emitting jump tables for 4 or more cases.
if (SI->getNumCases() < 4)
return false;
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
new file mode 100644
index 0000000000000..529826758c138
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes='simplifycfg<switch-to-lookup>' -simplifycfg-require-and-preserve-domtree=1 -S < %s | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @switch_of_powers_two(i32 %arg) {
+; CHECK-LABEL: define i32 @switch_of_powers_two(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG]], i1 true)
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_two, i32 0, i32 [[TMP0]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT: ret i32 [[SWITCH_LOAD]]
+;
+entry:
+ switch i32 %arg, label %default_case [
+ i32 1, label %bb1
+ i32 8, label %bb2
+ i32 16, label %bb3
+ i32 32, label %bb4
+ i32 64, label %bb5
+ ]
+
+
+default_case: unreachable
+bb1: br label %return
+bb2: br label %return
+bb3: br label %return
+bb4: br label %return
+bb5: br label %return
+
+return:
+ %phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ]
+ ret i32 %phi
+}
More information about the llvm-commits
mailing list