[llvm] [GlobalISel][IRTranslator] Port switch binary tree search optimization. (PR #77279)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 7 23:31:15 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
@llvm/pr-subscribers-llvm-globalisel
Author: Amara Emerson (aemerson)
<details>
<summary>Changes</summary>
This re-uses some code extracted earlier from SelectionDAG into SwitchLoweringUtils
Much of the code is a straight port from SDAG's splitWorkItem(), with minor changes needed for GISel.
---
Full diff: https://github.com/llvm/llvm-project/pull/77279.diff
3 Files Affected:
- (modified) llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h (+4)
- (modified) llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp (+77-2)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-switch-split.ll (+18-16)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index bffc03ed0187e6..1b094d9d9fe771 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -366,6 +366,10 @@ class IRTranslator : public MachineFunctionPass {
BranchProbability BranchProbToNext, Register Reg,
SwitchCG::BitTestCase &B, MachineBasicBlock *SwitchBB);
+ void splitWorkItem(SwitchCG::SwitchWorkList &WorkList,
+ const SwitchCG::SwitchWorkListItem &W, Value *Cond,
+ MachineBasicBlock *SwitchMBB, MachineIRBuilder &MIB);
+
bool lowerJumpTableWorkItem(
SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB,
MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB,
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 9c11113902a24e..6708f2baa5ed5f 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -751,16 +751,91 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB);
WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
- // FIXME: At the moment we don't do any splitting optimizations here like
- // SelectionDAG does, so this worklist only has one entry.
while (!WorkList.empty()) {
SwitchWorkListItem W = WorkList.pop_back_val();
+
+ unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
+ // For optimized builds, lower large range as a balanced binary tree.
+ if (NumClusters > 3 &&
+ MF->getTarget().getOptLevel() != CodeGenOptLevel::None &&
+ !DefaultMBB->getParent()->getFunction().hasMinSize()) {
+ splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB, MIB);
+ continue;
+ }
+
if (!lowerSwitchWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB, MIB))
return false;
}
return true;
}
+void IRTranslator::splitWorkItem(SwitchCG::SwitchWorkList &WorkList,
+ const SwitchCG::SwitchWorkListItem &W,
+ Value *Cond, MachineBasicBlock *SwitchMBB,
+ MachineIRBuilder &MIB) {
+ using namespace SwitchCG;
+ assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) &&
+ "Clusters not sorted?");
+ assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!");
+
+ auto [LastLeft, FirstRight, LeftProb, RightProb] =
+ SL->computeSplitWorkItemInfo(W);
+
+ // Use the first element on the right as pivot since we will make less-than
+ // comparisons against it.
+ CaseClusterIt PivotCluster = FirstRight;
+ assert(PivotCluster > W.FirstCluster);
+ assert(PivotCluster <= W.LastCluster);
+
+ CaseClusterIt FirstLeft = W.FirstCluster;
+ CaseClusterIt LastRight = W.LastCluster;
+
+ const ConstantInt *Pivot = PivotCluster->Low;
+
+ // New blocks will be inserted immediately after the current one.
+ MachineFunction::iterator BBI(W.MBB);
+ ++BBI;
+
+ // We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
+ // we can branch to its destination directly if it's squeezed exactly in
+ // between the known lower bound and Pivot - 1.
+ MachineBasicBlock *LeftMBB;
+ if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range &&
+ FirstLeft->Low == W.GE &&
+ (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) {
+ LeftMBB = FirstLeft->MBB;
+ } else {
+ LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
+ FuncInfo.MF->insert(BBI, LeftMBB);
+ WorkList.push_back(
+ {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2});
+ }
+
+ // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a
+ // single cluster, RHS.Low == Pivot, and we can branch to its destination
+ // directly if RHS.High equals the current upper bound.
+ MachineBasicBlock *RightMBB;
+ if (FirstRight == LastRight && FirstRight->Kind == CC_Range && W.LT &&
+ (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) {
+ RightMBB = FirstRight->MBB;
+ } else {
+ RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
+ FuncInfo.MF->insert(BBI, RightMBB);
+ WorkList.push_back(
+ {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2});
+ }
+
+ // Create the CaseBlock record that will be used to lower the branch.
+ CaseBlock CB(ICmpInst::Predicate::ICMP_SLT, false, Cond, Pivot, nullptr,
+ LeftMBB, RightMBB, W.MBB, MIB.getDebugLoc(), LeftProb,
+ RightProb);
+
+ if (W.MBB == SwitchMBB)
+ emitSwitchCase(CB, SwitchMBB, MIB);
+ else
+ SL->SwitchCases.push_back(CB);
+}
+
void IRTranslator::emitJumpTable(SwitchCG::JumpTable &JT,
MachineBasicBlock *MBB) {
// Emit the code for the jump table
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-switch-split.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-switch-split.ll
index 54c8eb913d5d40..55cf48ed2245fa 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-switch-split.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-switch-split.ll
@@ -17,31 +17,33 @@ define i32 @scanfile(i32 %call148) {
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: cmp w0, #1
; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: cbz w8, LBB0_7
+; CHECK-NEXT: b.ge LBB0_3
; CHECK-NEXT: ; %bb.1: ; %entry
-; CHECK-NEXT: cmp w8, #1
-; CHECK-NEXT: b.eq LBB0_7
-; CHECK-NEXT: ; %bb.2: ; %entry
+; CHECK-NEXT: cbnz w8, LBB0_7
+; CHECK-NEXT: LBB0_2: ; %common.ret1
+; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-NEXT: ret
+; CHECK-NEXT: LBB0_3: ; %entry
+; CHECK-NEXT: b.eq LBB0_2
+; CHECK-NEXT: ; %bb.4: ; %entry
; CHECK-NEXT: cmp w8, #2
-; CHECK-NEXT: b.eq LBB0_4
-; CHECK-NEXT: ; %bb.3: ; %entry
+; CHECK-NEXT: b.eq LBB0_6
+; CHECK-NEXT: ; %bb.5: ; %entry
; CHECK-NEXT: cmp w8, #3
-; CHECK-NEXT: b.ne LBB0_5
-; CHECK-NEXT: LBB0_4: ; %sw.bb300
+; CHECK-NEXT: b.ne LBB0_2
+; CHECK-NEXT: LBB0_6: ; %sw.bb300
; CHECK-NEXT: bl _logg
; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-NEXT: ret
-; CHECK-NEXT: LBB0_5: ; %entry
+; CHECK-NEXT: LBB0_7: ; %entry
; CHECK-NEXT: cmn w8, #2
-; CHECK-NEXT: b.eq LBB0_8
-; CHECK-NEXT: ; %bb.6: ; %entry
+; CHECK-NEXT: b.eq LBB0_9
+; CHECK-NEXT: ; %bb.8: ; %entry
; CHECK-NEXT: cmn w8, #1
-; CHECK-NEXT: b.eq LBB0_8
-; CHECK-NEXT: LBB0_7: ; %common.ret1
-; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; CHECK-NEXT: ret
-; CHECK-NEXT: LBB0_8: ; %sw.bb150
+; CHECK-NEXT: b.ne LBB0_2
+; CHECK-NEXT: LBB0_9: ; %sw.bb150
; CHECK-NEXT: bl _logg
; CHECK-NEXT: brk #0x1
entry:
``````````
</details>
https://github.com/llvm/llvm-project/pull/77279
More information about the llvm-commits
mailing list