[llvm] [SimplifyCFG] Add optimization for switches of powers of two (PR #70977)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 9 17:44:39 PST 2023
https://github.com/DKay7 updated https://github.com/llvm/llvm-project/pull/70977
>From 52e95e4fb4a65381b657c96820bdc8db51e01cd5 Mon Sep 17 00:00:00 2001
From: Daniil <kalinin.de at phystech.edu>
Date: Wed, 1 Nov 2023 22:32:23 +0300
Subject: [PATCH] Added optimization for switches of powers of two
Optimization reduces range for switches which cases are positive powers of two by replacing each case with count_trailing_zero(case).
Also, this optimization is performed only for switches with default case unreachable
Resolves #70756
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 64 ++++++++++++-
.../SimplifyCFG/switch-of-powers-of-two.ll | 90 +++++++++++++++++++
2 files changed, 151 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/Transforms/SimplifyCFG/switch-of-powers-of-two.ll
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 68b5b1a78a3460e..eeb00e9f3bdc6d5 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -6792,9 +6792,6 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
// This transform can be done speculatively because it is so cheap - it
// results in a single rotate operation being inserted.
- // FIXME: It's possible that optimizing a switch on powers of two might also
- // be beneficial - flag values are often powers of two and we could use a CLZ
- // as the key function.
// countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
// one element and LLVM disallows duplicate cases, Shift is guaranteed to be
@@ -6839,6 +6836,64 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
return true;
}
+static bool isSwitchOfPowersOfTwo(ArrayRef<APInt> Values) {
+ for (auto &Value : Values) {
+ if (!Value.isPowerOf2())
+ return false;
+ }
+
+ return true;
+}
+
+static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
+ const DataLayout &DL) {
+
+ auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
+
+ if (CondTy->getIntegerBitWidth() > 64 ||
+ !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
+ return false;
+
+ // Only bother with this optimization if there are more than 3 switch cases;
+ // SDAG will only bother creating jump tables for 4 or more cases.
+ if (SI->getNumCases() < 4)
+ return false;
+ SmallVector<APInt, 4> Values;
+ for (const auto &Case : SI->cases())
+ Values.push_back(Case.getCaseValue()->getValue());
+
+ // We perform this optimization only for switches with
+ // unreachable default case.
+ // This assumtion will save us from checking if `Condition` is a power of two
+ bool HasDefault =
+ !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
+
+ if (HasDefault || !isSwitchOfPowersOfTwo(Values))
+ return false;
+
+ Builder.SetInsertPoint(SI);
+
+ auto *Condition = SI->getCondition();
+ auto &Context = SI->getContext();
+
+ // FIXME maybe we should check if cttz intrinsic is cheap on the target
+ // architecture
+ auto *ConditionTrailingZeros =
+ Builder.CreateIntrinsic(Intrinsic::cttz, {Condition->getType()},
+ {Condition, ConstantInt::getTrue(Context)});
+
+ SI->replaceUsesOfWith(Condition, ConditionTrailingZeros);
+
+ // Replace each case with its trailing zeros number
+ for (auto &Case : SI->cases()) {
+ auto *OrigValue = Case.getCaseValue();
+ Case.setValue(cast<ConstantInt>(ConstantInt::get(
+ OrigValue->getType(), OrigValue->getValue().countr_zero())));
+ }
+
+ return true;
+}
+
bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
BasicBlock *BB = SI->getParent();
@@ -6886,6 +6941,9 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
SwitchToLookupTable(SI, Builder, DTU, DL, TTI))
return requestResimplify();
+ if (simplifySwitchOfPowersOfTwo(SI, Builder, DL))
+ return requestResimplify();
+
if (ReduceSwitchRange(SI, Builder, DL, TTI))
return requestResimplify();
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/switch-of-powers-of-two.ll
new file mode 100644
index 000000000000000..7a5f3c9d3d7cf9e
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/switch-of-powers-of-two.ll
@@ -0,0 +1,90 @@
+; RUN: opt %s -passes='simplifycfg<switch-to-lookup>' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Check that the range of switch of powers of two is reduced
+define i32 @switch_of_powers(i32 %x) {
+; CHECK-LABEL: switch_of_powers
+; CHECK-LABEL: entry
+; CHECK: %{{.*}} = call i32 @llvm.cttz.i32(i32 %x, i1 true)
+; CHECK-NOT: switch i32 %x
+
+entry:
+ switch i32 %x, label %def [
+ i32 1, label %bb1
+ i32 8, label %bb2
+ i32 16, label %bb3
+ i32 32, label %bb4
+ i32 64, label %bb5
+ ]
+
+
+def: unreachable
+bb1: br label %return
+bb2: br label %return
+bb3: br label %return
+bb4: br label %return
+bb5: br label %return
+
+return:
+ %p = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ]
+ ret i32 %p
+}
+
+; Check that switch's of powers of two range is not reduced if default case is reachable
+define i32 @switch_of_powers_reachable_default(i32 %x) {
+; CHECK-LABEL: switch_of_powers_reachable_default
+; CHECK-LABEL: entry
+; CHECK-NOT: %{{.*}} = call i32 @llvm.cttz.i32(i32 %x, i1 true)
+; CHECK: switch i32 %x
+
+entry:
+ switch i32 %x, label %def [
+ i32 1, label %bb1
+ i32 8, label %bb2
+ i32 16, label %bb3
+ i32 32, label %bb4
+ i32 64, label %bb5
+ ]
+
+
+def: br label %return
+bb1: br label %return
+bb2: br label %return
+bb3: br label %return
+bb4: br label %return
+bb5: br label %return
+
+return:
+ %p = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ], [-1, %def]
+ ret i32 %p
+}
+
+; Check that switch with zero case is not considered as switch of powers of two
+define i32 @switch_of_non_powers(i32 %x) {
+; CHECK-LABEL: switch_of_non_powers
+; CHECK-LABEL: entry
+; CHECK-NOT: %{{.*}} = call i32 @llvm.cttz.i32(i32 %x, i1 true)
+; CHECK: switch i32 %x
+
+entry:
+ switch i32 %x, label %def [
+ i32 0, label %bb1
+ i32 1, label %bb2
+ i32 16, label %bb3
+ i32 32, label %bb4
+ i32 64, label %bb5
+ ]
+
+
+def: unreachable
+bb1: br label %return
+bb2: br label %return
+bb3: br label %return
+bb4: br label %return
+bb5: br label %return
+
+return:
+ %p = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ]
+ ret i32 %p
+}
\ No newline at end of file
More information about the llvm-commits
mailing list