[llvm] [PPC] Set minimum of largest number of comparisons to use bit test for switch lowering (PR #155910)
Shimin Cui via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 28 13:08:00 PDT 2025
https://github.com/scui-ibm updated https://github.com/llvm/llvm-project/pull/155910
>From fe22a19f07cf38e43764fc44bcdbb799bb3909cc Mon Sep 17 00:00:00 2001
From: Shimin Cui <scui at xlperflep9.rtp.raleigh.ibm.com>
Date: Thu, 28 Aug 2025 19:40:29 +0000
Subject: [PATCH] [PPC] Set minimum of largest number of comparisons to use bit
test for switch
---
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 11 +-
llvm/include/llvm/CodeGen/TargetLowering.h | 27 ++-
llvm/lib/CodeGen/SwitchLoweringUtils.cpp | 21 ++-
llvm/lib/CodeGen/TargetLoweringBase.cpp | 14 ++
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 8 +
llvm/test/CodeGen/PowerPC/bittest.ll | 193 ++++++++++++++++++++
6 files changed, 256 insertions(+), 18 deletions(-)
create mode 100644 llvm/test/CodeGen/PowerPC/bittest.ll
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 0a10b51f97c63..efd7580e10033 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -594,12 +594,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
// Check if suitable for a bit test
if (N <= DL.getIndexSizeInBits(0u)) {
- SmallPtrSet<const BasicBlock *, 4> Dests;
- for (auto I : SI.cases())
- Dests.insert(I.getCaseSuccessor());
+ DenseMap<const BasicBlock *, unsigned int> DestMap;
+ for (auto I : SI.cases()) {
+ const BasicBlock *BB = I.getCaseSuccessor();
+ DestMap[BB] = DestMap.count(BB) ? (DestMap.count(BB) + 1) : 1;
+ }
- if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
- DL))
+ if (TLI->isSuitableForBitTests(DestMap, MinCaseVal, MaxCaseVal, DL))
return 1;
}
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 438b6ff55c85f..4c94ed34123c7 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1441,9 +1441,10 @@ class LLVM_ABI TargetLoweringBase {
/// \p High as its lowest and highest case values, and expects \p NumCmps
/// case value comparisons. Check if the number of destinations, comparison
/// metric, and range are all suitable.
- bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
- const APInt &Low, const APInt &High,
- const DataLayout &DL) const {
+ bool
+ isSuitableForBitTests(DenseMap<const BasicBlock *, unsigned int> DestCmps,
+ const APInt &Low, const APInt &High,
+ const DataLayout &DL) const {
// FIXME: I don't think NumCmps is the correct metric: a single case and a
// range of cases both require only one branch to lower. Just looking at the
// number of clusters and destinations should be enough to decide whether to
@@ -1454,6 +1455,20 @@ class LLVM_ABI TargetLoweringBase {
if (!rangeFitsInWord(Low, High, DL))
return false;
+ unsigned NumDests = DestCmps.size();
+ unsigned NumCmps = 0;
+ unsigned int MaxBitTestEntry = 0;
+ for (auto &DestCmp : DestCmps) {
+ NumCmps += DestCmp.second;
+ if (DestCmp.second > MaxBitTestEntry)
+ MaxBitTestEntry = DestCmp.second;
+ }
+
+ // Comparisons might be cheaper for small number of comparisons, which can
+ // be Arch Target specific.
+ if (MaxBitTestEntry < getMinimumBitTestCmps())
+ return false;
+
// Decide whether it's profitable to lower this range with bit tests. Each
// destination requires a bit test and branch, and there is an overall range
// check branch. For a small number of clusters, separate comparisons might
@@ -2063,6 +2078,9 @@ class LLVM_ABI TargetLoweringBase {
virtual bool isJumpTableRelative() const;
+ /// Retuen the minimum of largest number of comparisons in BitTest.
+ virtual unsigned getMinimumBitTestCmps() const;
+
/// If a physical register, this specifies the register that
/// llvm.savestack/llvm.restorestack should save and restore.
Register getStackPointerRegisterToSaveRestore() const {
@@ -2579,6 +2597,9 @@ class LLVM_ABI TargetLoweringBase {
/// Set to zero to generate unlimited jump tables.
void setMaximumJumpTableSize(unsigned);
+ /// Set the minimum of largest of number of comparisons to generate BitTest.
+ void setMinimumBitTestCmps(unsigned Val);
+
/// If set to a physical register, this specifies the register that
/// llvm.savestack/llvm.restorestack should save and restore.
void setStackPointerRegisterToSaveRestore(Register R) {
diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
index 038c499fe236e..8662f7801e2a6 100644
--- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
+++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -206,12 +206,16 @@ bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters,
for (unsigned I = First; I <= Last; ++I)
JTProbs[Clusters[I].MBB] = BranchProbability::getZero();
+ DenseMap<const BasicBlock *, unsigned int> DestMap;
for (unsigned I = First; I <= Last; ++I) {
assert(Clusters[I].Kind == CC_Range);
Prob += Clusters[I].Prob;
const APInt &Low = Clusters[I].Low->getValue();
const APInt &High = Clusters[I].High->getValue();
- NumCmps += (Low == High) ? 1 : 2;
+ unsigned int NumCmp = (Low == High) ? 1 : 2;
+ const BasicBlock *BB = Clusters[I].MBB->getBasicBlock();
+ DestMap[BB] = DestMap.count(BB) ? (DestMap[BB] + NumCmp) : NumCmp;
+
if (I != First) {
// Fill the gap between this and the previous cluster.
const APInt &PreviousHigh = Clusters[I - 1].High->getValue();
@@ -226,9 +230,7 @@ bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters,
JTProbs[Clusters[I].MBB] += Clusters[I].Prob;
}
- unsigned NumDests = JTProbs.size();
- if (TLI->isSuitableForBitTests(NumDests, NumCmps,
- Clusters[First].Low->getValue(),
+ if (TLI->isSuitableForBitTests(DestMap, Clusters[First].Low->getValue(),
Clusters[Last].High->getValue(), *DL)) {
// Clusters[First..Last] should be lowered as bit tests instead.
return false;
@@ -372,20 +374,19 @@ bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters,
if (First == Last)
return false;
- BitVector Dests(FuncInfo.MF->getNumBlockIDs());
- unsigned NumCmps = 0;
+ DenseMap<const BasicBlock *, unsigned int> DestMap;
for (int64_t I = First; I <= Last; ++I) {
assert(Clusters[I].Kind == CC_Range);
- Dests.set(Clusters[I].MBB->getNumber());
- NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
+ unsigned NumCmp = (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
+ const BasicBlock *BB = Clusters[I].MBB->getBasicBlock();
+ DestMap[BB] = DestMap.count(BB) ? (DestMap[BB] + NumCmp) : NumCmp;
}
- unsigned NumDests = Dests.count();
APInt Low = Clusters[First].Low->getValue();
APInt High = Clusters[Last].High->getValue();
assert(Low.slt(High));
- if (!TLI->isSuitableForBitTests(NumDests, NumCmps, Low, High, *DL))
+ if (!TLI->isSuitableForBitTests(DestMap, Low, High, *DL))
return false;
APInt LowBound;
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 0549a947600dc..eeafe0605d654 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@@ -90,6 +91,11 @@ static cl::opt<unsigned> OptsizeJumpTableDensity(
cl::desc("Minimum density for building a jump table in "
"an optsize function"));
+static cl::opt<unsigned>
+ MinimumBitTestCmps("min-bit-test-cmps", cl::init(2), cl::Hidden,
+ cl::desc("Set minimum of largest number of comparisons "
+ "to use bit test for switch."));
+
// FIXME: This option is only to test if the strict fp operation processed
// correctly by preventing mutating strict fp operation to normal fp operation
// during development. When the backend supports strict float operation, this
@@ -2120,6 +2126,14 @@ bool TargetLoweringBase::isJumpTableRelative() const {
return getTargetMachine().isPositionIndependent();
}
+unsigned TargetLoweringBase::getMinimumBitTestCmps() const {
+ return MinimumBitTestCmps;
+}
+
+void TargetLoweringBase::setMinimumBitTestCmps(unsigned Val) {
+ MinimumBitTestCmps = Val;
+}
+
Align TargetLoweringBase::getPrefLoopAlignment(MachineLoop *ML) const {
if (TM.Options.LoopAlignment)
return Align(TM.Options.LoopAlignment);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 5039f5df7a128..0e83599f9c366 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -138,6 +138,11 @@ static cl::opt<unsigned> PPCMinimumJumpTableEntries(
"ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
cl::desc("Set minimum number of entries to use a jump table on PPC"));
+static cl::opt<unsigned> PPCMinimumBitTestCmps(
+ "ppc-min-bit-test-cmps", cl::init(3), cl::Hidden,
+ cl::desc("Set minimum of largest number of comparisons to use bit test for "
+ "switch on PPC."));
+
static cl::opt<unsigned> PPCGatherAllAliasesMaxDepth(
"ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
cl::desc("max depth when checking alias info in GatherAllAliases()"));
@@ -1438,6 +1443,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// Re-evaluate this value on future HWs that can do better with mtctr.
setMinimumJumpTableEntries(PPCMinimumJumpTableEntries);
+ // The default minimum of largest number in a BitTest cluster is 3.
+ setMinimumBitTestCmps(PPCMinimumBitTestCmps);
+
setMinFunctionAlignment(Align(4));
setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
diff --git a/llvm/test/CodeGen/PowerPC/bittest.ll b/llvm/test/CodeGen/PowerPC/bittest.ll
new file mode 100644
index 0000000000000..cba56e3d5798f
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/bittest.ll
@@ -0,0 +1,193 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs < %s -O3 -mcpu=ppc -mtriple powerpc-ibm-aix \
+; RUN: -ppc-asm-full-reg-names | FileCheck %s
+
+define i32 @foo(i32 noundef signext %x) {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stwu r1, -64(r1)
+; CHECK-NEXT: stw r0, 72(r1)
+; CHECK-NEXT: cmpwi r3, 8
+; CHECK-NEXT: stw r31, 60(r1) # 4-byte Folded Spill
+; CHECK-NEXT: mr r31, r3
+; CHECK-NEXT: li r3, 0
+; CHECK-NEXT: ble cr0, L..BB0_4
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: cmpwi r31, 11
+; CHECK-NEXT: bge cr0, L..BB0_7
+; CHECK-NEXT: # %bb.2: # %entry
+; CHECK-NEXT: cmplwi r31, 9
+; CHECK-NEXT: beq cr0, L..BB0_9
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: cmplwi r31, 10
+; CHECK-NEXT: beq cr0, L..BB0_11
+; CHECK-NEXT: b L..BB0_13
+; CHECK-NEXT: L..BB0_4: # %entry
+; CHECK-NEXT: cmplwi r31, 4
+; CHECK-NEXT: beq cr0, L..BB0_12
+; CHECK-NEXT: # %bb.5: # %entry
+; CHECK-NEXT: cmplwi r31, 7
+; CHECK-NEXT: beq cr0, L..BB0_11
+; CHECK-NEXT: # %bb.6: # %entry
+; CHECK-NEXT: cmplwi r31, 8
+; CHECK-NEXT: beq cr0, L..BB0_10
+; CHECK-NEXT: b L..BB0_13
+; CHECK-NEXT: L..BB0_7: # %entry
+; CHECK-NEXT: beq cr0, L..BB0_10
+; CHECK-NEXT: # %bb.8: # %entry
+; CHECK-NEXT: cmplwi r31, 12
+; CHECK-NEXT: bne cr0, L..BB0_13
+; CHECK-NEXT: L..BB0_9: # %sw.bb2
+; CHECK-NEXT: mr r3, r31
+; CHECK-NEXT: bl .foo3[PR]
+; CHECK-NEXT: nop
+; CHECK-NEXT: mr r3, r31
+; CHECK-NEXT: b L..BB0_13
+; CHECK-NEXT: L..BB0_10: # %sw.bb1
+; CHECK-NEXT: mr r3, r31
+; CHECK-NEXT: bl .foo2[PR]
+; CHECK-NEXT: nop
+; CHECK-NEXT: mr r3, r31
+; CHECK-NEXT: b L..BB0_13
+; CHECK-NEXT: L..BB0_11: # %sw.bb
+; CHECK-NEXT: mr r3, r31
+; CHECK-NEXT: bl .foo1[PR]
+; CHECK-NEXT: nop
+; CHECK-NEXT: mr r3, r31
+; CHECK-NEXT: b L..BB0_13
+; CHECK-NEXT: L..BB0_12: # %sw.bb3
+; CHECK-NEXT: li r3, 4
+; CHECK-NEXT: bl .foo4[PR]
+; CHECK-NEXT: nop
+; CHECK-NEXT: li r3, 4
+; CHECK-NEXT: L..BB0_13: # %return
+; CHECK-NEXT: lwz r31, 60(r1) # 4-byte Folded Reload
+; CHECK-NEXT: addi r1, r1, 64
+; CHECK-NEXT: lwz r0, 8(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+entry:
+ switch i32 %x, label %return [
+ i32 7, label %sw.bb
+ i32 10, label %sw.bb
+ i32 8, label %sw.bb1
+ i32 11, label %sw.bb1
+ i32 9, label %sw.bb2
+ i32 12, label %sw.bb2
+ i32 4, label %sw.bb3
+ ]
+
+sw.bb: ; preds = %entry, %entry
+ tail call void @foo1(i32 noundef signext %x)
+ br label %return
+
+sw.bb1: ; preds = %entry, %entry
+ tail call void @foo2(i32 noundef signext %x)
+ br label %return
+
+sw.bb2: ; preds = %entry, %entry
+ tail call void @foo3(i32 noundef signext %x)
+ br label %return
+
+sw.bb3: ; preds = %entry
+ tail call void @foo4(i32 noundef signext 4)
+ br label %return
+
+return: ; preds = %sw.bb, %sw.bb1, %sw.bb2, %sw.bb3, %entry
+ %retval.0 = phi i32 [ 0, %entry ], [ 4, %sw.bb3 ], [ %x, %sw.bb2 ], [ %x, %sw.bb1 ], [ %x, %sw.bb ]
+ ret i32 %retval.0
+}
+
+define i32 @goo(i32 noundef signext %x) {
+; CHECK-LABEL: goo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stwu r1, -64(r1)
+; CHECK-NEXT: stw r0, 72(r1)
+; CHECK-NEXT: cmplwi r3, 12
+; CHECK-NEXT: stw r31, 60(r1) # 4-byte Folded Spill
+; CHECK-NEXT: mr r31, r3
+; CHECK-NEXT: bgt cr0, L..BB1_7
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: li r3, 1
+; CHECK-NEXT: slw r3, r3, r31
+; CHECK-NEXT: andi. r4, r3, 5632
+; CHECK-NEXT: bne cr0, L..BB1_4
+; CHECK-NEXT: # %bb.2: # %entry
+; CHECK-NEXT: andi. r3, r3, 2304
+; CHECK-NEXT: beq cr0, L..BB1_5
+; CHECK-NEXT: # %bb.3: # %sw.bb1
+; CHECK-NEXT: mr r3, r31
+; CHECK-NEXT: bl .foo2[PR]
+; CHECK-NEXT: nop
+; CHECK-NEXT: b L..BB1_9
+; CHECK-NEXT: L..BB1_4: # %sw.bb2
+; CHECK-NEXT: mr r3, r31
+; CHECK-NEXT: bl .foo3[PR]
+; CHECK-NEXT: nop
+; CHECK-NEXT: b L..BB1_9
+; CHECK-NEXT: L..BB1_5: # %entry
+; CHECK-NEXT: cmplwi r31, 7
+; CHECK-NEXT: bne cr0, L..BB1_7
+; CHECK-NEXT: # %bb.6: # %sw.bb
+; CHECK-NEXT: li r3, 7
+; CHECK-NEXT: li r31, 7
+; CHECK-NEXT: bl .foo1[PR]
+; CHECK-NEXT: nop
+; CHECK-NEXT: b L..BB1_9
+; CHECK-NEXT: L..BB1_7: # %entry
+; CHECK-NEXT: cmplwi r31, 4
+; CHECK-NEXT: li r31, 0
+; CHECK-NEXT: bne cr0, L..BB1_9
+; CHECK-NEXT: # %bb.8: # %sw.bb3
+; CHECK-NEXT: li r3, 4
+; CHECK-NEXT: li r31, 4
+; CHECK-NEXT: bl .foo4[PR]
+; CHECK-NEXT: nop
+; CHECK-NEXT: L..BB1_9: # %return
+; CHECK-NEXT: mr r3, r31
+; CHECK-NEXT: lwz r31, 60(r1) # 4-byte Folded Reload
+; CHECK-NEXT: addi r1, r1, 64
+; CHECK-NEXT: lwz r0, 8(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+entry:
+ switch i32 %x, label %return [
+ i32 7, label %sw.bb
+ i32 8, label %sw.bb1
+ i32 11, label %sw.bb1
+ i32 9, label %sw.bb2
+ i32 10, label %sw.bb2
+ i32 12, label %sw.bb2
+ i32 4, label %sw.bb3
+ ]
+
+sw.bb: ; preds = %entry
+ tail call void @foo1(i32 noundef signext 7)
+ br label %return
+
+sw.bb1: ; preds = %entry, %entry
+ tail call void @foo2(i32 noundef signext %x)
+ br label %return
+
+sw.bb2: ; preds = %entry, %entry, %entry
+ tail call void @foo3(i32 noundef signext %x)
+ br label %return
+
+sw.bb3: ; preds = %entry
+ tail call void @foo4(i32 noundef signext 4)
+ br label %return
+
+return: ; preds = %sw.bb, %sw.bb1, %sw.bb2, %sw.bb3, %entry
+ %retval.0 = phi i32 [ 0, %entry ], [ 4, %sw.bb3 ], [ %x, %sw.bb2 ], [ %x, %sw.bb1 ], [ 7, %sw.bb ]
+ ret i32 %retval.0
+}
+
+declare void @foo1(i32 noundef signext)
+
+declare void @foo2(i32 noundef signext)
+
+declare void @foo3(i32 noundef signext)
+
+declare void @foo4(i32 noundef signext)
More information about the llvm-commits
mailing list