[llvm] r318202 - [CodeGen] Peel off the dominant case in switch statement in lowering
Rong Xu via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 14 13:44:09 PST 2017
Author: xur
Date: Tue Nov 14 13:44:09 2017
New Revision: 318202
URL: http://llvm.org/viewvc/llvm-project?rev=318202&view=rev
Log:
[CodeGen] Peel off the dominant case in switch statement in lowering
This patch peels off the top case in switch statement into a branch if the
probability exceeds a threshold. This will help the branch prediction and
avoids the extra compares when lowering into chain of branches.
Differential Revision: http://reviews.llvm.org/D39262
Added:
llvm/trunk/test/CodeGen/X86/switch-lower-peel-top-case.ll
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
llvm/trunk/test/CodeGen/Generic/MachineBranchProb.ll
llvm/trunk/test/CodeGen/SystemZ/loop-03.ll
llvm/trunk/test/CodeGen/X86/switch-bt.ll
llvm/trunk/test/CodeGen/X86/switch.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=318202&r1=318201&r2=318202&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Tue Nov 14 13:44:09 2017
@@ -134,6 +134,12 @@ LimitFPPrecision("limit-float-precision"
cl::location(LimitFloatPrecision),
cl::init(0));
+static cl::opt<unsigned> SwitchPeelThreshold(
+ "switch-peel-threshold", cl::Hidden, cl::init(66),
+ cl::desc("Set the case probability threshold for peeling the case from a "
+ "switch statement. A value greater than 100 will void this "
+ "optimization"));
+
// Limit the width of DAG chains. This is important in general to prevent
// DAG-based analysis from blowing up. For example, alias analysis and
// load clustering may not complete in reasonable time. It is difficult to
@@ -9834,6 +9840,74 @@ void SelectionDAGBuilder::splitWorkItem(
SwitchCases.push_back(CB);
}
+// Scale CaseProb after peeling a case with the probablity of PeeledCaseProb
+// from the swith statement.
+static BranchProbability scaleCaseProbality(BranchProbability CaseProb,
+ BranchProbability PeeledCaseProb) {
+ if (PeeledCaseProb == BranchProbability::getOne())
+ return BranchProbability::getZero();
+ BranchProbability SwitchProb = PeeledCaseProb.getCompl();
+ return BranchProbability(CaseProb.getNumerator(),
+ SwitchProb.scale(CaseProb.getDenominator()));
+}
+
+// Try to peel the top probability case if it exceeds the threshold.
+// Return current MachineBasicBlock for the switch statement if the peeling
+// does not occur.
+// If the peeling is performed, return the newly created MachineBasicBlock
+// for the peeled switch statement. Also update Clusters to remove the peeled
+// case. PeeledCaseProb is the BranchProbability for the peeled case.
+MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
+ const SwitchInst &SI, CaseClusterVector &Clusters,
+ BranchProbability &PeeledCaseProb) {
+ MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
+ // Don't perform if there is only one cluster or optimizing for size.
+ if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 ||
+ TM.getOptLevel() == CodeGenOpt::None ||
+ SwitchMBB->getParent()->getFunction()->optForMinSize())
+ return SwitchMBB;
+
+ BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100);
+ unsigned PeeledCaseIndex = 0;
+ bool SwitchPeeled = false;
+ for (unsigned Index = 0; Index < Clusters.size(); ++Index) {
+ CaseCluster &CC = Clusters[Index];
+ if (CC.Prob < TopCaseProb)
+ continue;
+ TopCaseProb = CC.Prob;
+ PeeledCaseIndex = Index;
+ SwitchPeeled = true;
+ }
+ if (!SwitchPeeled)
+ return SwitchMBB;
+
+ DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: " << TopCaseProb
+ << "\n");
+
+ // Record the MBB for the peeled switch statement.
+ MachineFunction::iterator BBI(SwitchMBB);
+ ++BBI;
+ MachineBasicBlock *PeeledSwitchMBB =
+ FuncInfo.MF->CreateMachineBasicBlock(SwitchMBB->getBasicBlock());
+ FuncInfo.MF->insert(BBI, PeeledSwitchMBB);
+
+ ExportFromCurrentBlock(SI.getCondition());
+ auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex;
+ SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt,
+ nullptr, nullptr, TopCaseProb.getCompl()};
+ lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB);
+
+ Clusters.erase(PeeledCaseIt);
+ for (CaseCluster &CC : Clusters) {
+ DEBUG(dbgs() << "Scale the probablity for one cluster, before scaling: "
+ << CC.Prob << "\n");
+ CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb);
+ DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n");
+ }
+ PeeledCaseProb = TopCaseProb;
+ return PeeledSwitchMBB;
+}
+
void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
// Extract cases from the switch.
BranchProbabilityInfo *BPI = FuncInfo.BPI;
@@ -9887,9 +9961,15 @@ void SelectionDAGBuilder::visitSwitch(co
}
}
+ // The branch probablity of the peeled case.
+ BranchProbability PeeledCaseProb = BranchProbability::getZero();
+ MachineBasicBlock *PeeledSwitchMBB =
+ peelDominantCaseCluster(SI, Clusters, PeeledCaseProb);
+
// If there is only the default destination, jump there directly.
MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
if (Clusters.empty()) {
+ assert(PeeledSwitchMBB == SwitchMBB);
SwitchMBB->addSuccessor(DefaultMBB);
if (DefaultMBB != NextBlock(SwitchMBB)) {
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
@@ -9921,8 +10001,14 @@ void SelectionDAGBuilder::visitSwitch(co
SwitchWorkList WorkList;
CaseClusterIt First = Clusters.begin();
CaseClusterIt Last = Clusters.end() - 1;
- auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB);
- WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
+ auto DefaultProb = getEdgeProbability(PeeledSwitchMBB, DefaultMBB);
+ // Scale the branchprobability for DefaultMBB if the peel occurs and
+ // DefaultMBB is not replaced.
+ if (PeeledCaseProb != BranchProbability::getZero() &&
+ DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()])
+ DefaultProb = scaleCaseProbality(DefaultProb, PeeledCaseProb);
+ WorkList.push_back(
+ {PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
while (!WorkList.empty()) {
SwitchWorkListItem W = WorkList.back();
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h?rev=318202&r1=318201&r2=318202&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h Tue Nov 14 13:44:09 2017
@@ -369,6 +369,10 @@ private:
MachineBasicBlock *SwitchMBB,
MachineBasicBlock *DefaultMBB);
+ /// Peel the top probability case if it exceeds the threshold
+ MachineBasicBlock *peelDominantCaseCluster(const SwitchInst &SI,
+ CaseClusterVector &Clusters,
+ BranchProbability &PeeledCaseProb);
/// A class which encapsulates all of the information needed to generate a
/// stack protector check and signals to isel via its state being initialized
Modified: llvm/trunk/test/CodeGen/Generic/MachineBranchProb.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Generic/MachineBranchProb.ll?rev=318202&r1=318201&r2=318202&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Generic/MachineBranchProb.ll (original)
+++ llvm/trunk/test/CodeGen/Generic/MachineBranchProb.ll Tue Nov 14 13:44:09 2017
@@ -19,12 +19,15 @@ entry:
i64 1, label %sw.bb
i64 4, label %sw.bb
i64 5, label %sw.bb1
+ i64 15, label %sw.bb
], !prof !0
; CHECK: BB#0: derived from LLVM BB %entry
-; CHECK: Successors according to CFG: BB#2({{[0-9a-fx/= ]+}}75.29%) BB#4({{[0-9a-fx/= ]+}}24.71%)
+; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}92.17%) BB#4({{[0-9a-fx/= ]+}}7.83%)
; CHECK: BB#4: derived from LLVM BB %entry
-; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}47.62%) BB#5({{[0-9a-fx/= ]+}}52.38%)
+; CHECK: Successors according to CFG: BB#2({{[0-9a-fx/= ]+}}75.29%) BB#5({{[0-9a-fx/= ]+}}24.71%)
; CHECK: BB#5: derived from LLVM BB %entry
+; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}47.62%) BB#6({{[0-9a-fx/= ]+}}52.38%)
+; CHECK: BB#6: derived from LLVM BB %entry
; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}36.36%) BB#3({{[0-9a-fx/= ]+}}63.64%)
sw.bb:
@@ -40,7 +43,7 @@ return:
ret i32 %retval.0
}
-!0 = !{!"branch_weights", i32 7, i32 6, i32 4, i32 4, i32 64}
+!0 = !{!"branch_weights", i32 7, i32 6, i32 4, i32 4, i32 64, i21 1000}
declare void @g(i32)
Modified: llvm/trunk/test/CodeGen/SystemZ/loop-03.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/loop-03.ll?rev=318202&r1=318201&r2=318202&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/loop-03.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/loop-03.ll Tue Nov 14 13:44:09 2017
@@ -3,7 +3,7 @@
; FP128 registers part of the callee saved registers list in order to avoid
; spilling / reloading.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+; RUN: llc -switch-peel-threshold=101 < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
%0 = type { %0*, %0*, %0*, i32, %1*, i64, i64, i64, i64, i64, i64, %2, %5, %7 }
%1 = type { i32, i32, i32 (%1*, i64, i32)*, i32 (%1*, i64, i64, i32, i8**)*, i32 (%1*, i64, i64, i64, i32)*, i32 (%1*)*, void (i8*)*, i8*, i8* }
Modified: llvm/trunk/test/CodeGen/X86/switch-bt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/switch-bt.ll?rev=318202&r1=318201&r2=318202&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/switch-bt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/switch-bt.ll Tue Nov 14 13:44:09 2017
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-- -asm-verbose=false < %s -jump-table-density=40 | FileCheck %s
+; RUN: llc -mtriple=x86_64-- -asm-verbose=false < %s -jump-table-density=40 -switch-peel-threshold=101 | FileCheck %s
; This switch should use bit tests, and the third bit test case is just
; testing for one possible value, so it doesn't need a bt.
Added: llvm/trunk/test/CodeGen/X86/switch-lower-peel-top-case.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/switch-lower-peel-top-case.ll?rev=318202&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/switch-lower-peel-top-case.ll (added)
+++ llvm/trunk/test/CodeGen/X86/switch-lower-peel-top-case.ll Tue Nov 14 13:44:09 2017
@@ -0,0 +1,135 @@
+; RUN: llc -stop-after=isel < %s | FileCheck %s
+
+define i32 @foo(i32 %n) !prof !1 {
+entry:
+ switch i32 %n, label %bb_default [
+ i32 8, label %bb1
+ i32 -8826, label %bb2
+ i32 18312, label %bb3
+ i32 18568, label %bb4
+ i32 129, label %bb5
+ ], !prof !2
+
+; CHECK: successors: %[[PEELED_CASE_LABEL:.*]](0x5999999a), %[[PEELED_SWITCH_LABEL:.*]](0x26666666)
+; CHECK: %[[VAL:[0-9]+]]:gr32 = COPY %edi
+; CHECK: %{{[0-9]+}}:gr32 = SUB32ri %[[VAL]], 18568, implicit-def %eflags
+; CHECK: JE_1 %[[PEELED_CASE_LABEL]], implicit %eflags
+; CHECK: JMP_1 %[[PEELED_SWITCH_LABEL]]
+; CHECK: [[PEELED_SWITCH_LABEL]]:
+; CHECK: successors: %[[BB1_LABEL:.*]](0x0206d3a0), %[[BB2_LABEL:.*]](0x7df92c60)
+; CHECK: %{{[0-9]+}}:gr32 = SUB32ri %[[VAL]], 18311, implicit-def %eflags
+; CHECK: JG_1 %[[BB2_LABEL]], implicit %eflags
+; CHECK: JMP_1 %[[BB1_LABEL]]
+; CHECK: [[BB1_LABEL]]:
+; CHECK: successors: %[[CASE2_LABEL:.*]](0x35e50d5b), %[[BB3_LABEL:.*]](0x4a1af2a5)
+; CHECK: %{{[0-9]+}}:gr32 = SUB32ri %[[VAL]], -8826, implicit-def %eflags
+; CHECK: JE_1 %[[CASE2_LABEL]], implicit %eflags
+; CHECK: JMP_1 %[[BB3_LABEL]]
+; CHECK: [[BB3_LABEL]]
+; CHECK: successors: %[[CASE5_LABEL:.*]](0x45d173c8), %[[BB4_LABEL:.*]](0x3a2e8c38)
+; CHECK: %{{[0-9]+}}:gr32 = SUB32ri %[[VAL]], 129, implicit-def %eflags
+; CHECK: JE_1 %[[CASE5_LABEL]], implicit %eflags
+; CHECK: JMP_1 %[[BB4_LABEL]]
+; CHECK: [[BB4_LABEL:.*]]:
+; CHECK: successors: %[[CASE1_LABEL:.*]](0x66666666), %[[DEFAULT_BB_LABEL:.*]](0x1999999a)
+; CHECK: %{{[0-9]+}}:gr32 = SUB32ri8 %[[VAL]], 8, implicit-def %eflags
+; CHECK: JE_1 %[[CASE1_LABEL]], implicit %eflags
+; CHECK: JMP_1 %[[DEFAULT_BB_LABEL]]
+; CHECK: [[BB2_LABEL]]:
+; CHECK: successors: %[[CASE3_LABEL:.*]](0x7fe44107), %[[DEFAULT_BB_LABEL]](0x001bbef9)
+; CHECK: %{{[0-9]+}}:gr32 = SUB32ri %[[VAL]], 18312, implicit-def %eflags
+; CHECK: JE_1 %[[CASE3_LABEL]], implicit %eflags
+; CHECK: JMP_1 %[[DEFAULT_BB_LABEL]]
+
+bb1:
+ br label %return
+bb2:
+ br label %return
+bb3:
+ br label %return
+bb4:
+ br label %return
+bb5:
+ br label %return
+bb_default:
+ br label %return
+
+return:
+ %retval = phi i32 [ 0, %bb_default ], [ 5, %bb5 ], [ 4, %bb4 ], [ 3, %bb3 ], [ 2, %bb2 ], [ 1, %bb1 ]
+ ret i32 %retval
+}
+
+; Test the peeling of the merged cases value 85 and 86.
+define i32 @foo1(i32 %n) !prof !1 {
+entry:
+ switch i32 %n, label %bb_default [
+ i32 -40, label %bb1
+ i32 86, label %bb2
+ i32 85, label %bb2
+ i32 1, label %bb3
+ i32 5, label %bb4
+ i32 7, label %bb5
+ i32 49, label %bb6
+ ], !prof !3
+
+; CHECK: successors: %[[PEELED_CASE_LABEL:.*]](0x59999999), %[[PEELED_SWITCH_LABEL:.*]](0x26666667)
+; CHECK: %[[VAL:[0-9]+]]:gr32 = COPY %edi
+; CHECK: %{{[0-9]+}}:gr32 = ADD32ri8 %{{[0-9]+}}, -85, implicit-def dead %eflags
+; CHECK: %{{[0-9]+}}:gr32 = SUB32ri8 %{{[0-9]+}}, 2, implicit-def %eflags
+; CHECK: JB_1 %[[PEELED_CASE_LABEL]], implicit %eflags
+; CHECK: JMP_1 %[[PEELED_SWITCH_LABEL]]
+; CHECK: [[PEELED_SWITCH_LABEL]]:
+; CHECK: successors: %[[BB1_LABEL:.*]](0x0088888a), %[[BB2_LABEL:.*]](0x7f777776)
+; CHECK: %{{[0-9]+}}:gr32 = SUB32ri8 %[[VAL]], 4, implicit-def %eflags
+; CHECK: JG_1 %[[BB2_LABEL]], implicit %eflags
+; CHECK: JMP_1 %[[BB1_LABEL]]
+; CHECK: [[BB1_LABEL]]:
+; CHECK: successors: %[[CASE4_LABEL:.*]](0x7f775a4f), %[[BB3_LABEL:.*]](0x0088a5b1)
+; CHECK: %{{[0-9]+}}:gr32 = SUB32ri8 %[[VAL]], 1, implicit-def %eflags
+; CHECK: JE_1 %[[CASE4_LABEL]], implicit %eflags
+; CHECK: JMP_1 %[[BB3_LABEL]]
+; CHECK: [[BB3_LABEL]]:
+; CHECK: successors: %[[CASE1_LABEL:.*]](0x66666666), %[[DEFAULT_BB_LABEL:.*]](0x1999999a)
+; CHECK: %{{[0-9]+}}:gr32 = SUB32ri8 %[[VAL]], -40, implicit-def %eflags
+; CHECK: JE_1 %[[CASE1_LABEL]], implicit %eflags
+; CHECK: JMP_1 %[[DEFAULT_BB_LABEL]]
+; CHECK: [[BB2_LABEL]]:
+; CHECK: successors: %[[CASE5_LABEL:.*]](0x00000000), %[[BB4_LABEL:.*]](0x80000000)
+; CHECK: %{{[0-9]+}}:gr32 = SUB32ri8 %[[VAL]], 5, implicit-def %eflags
+; CHECK: JE_1 %[[CASE5_LABEL]], implicit %eflags
+; CHECK: JMP_1 %[[BB4_LABEL]]
+; CHECK: [[BB4_LABEL]]:
+; CHECK: successors: %[[CASE6_LABEL:.*]](0x00000000), %[[BB5_LABEL:.*]](0x80000000)
+; CHECK: %{{[0-9]+}}:gr32 = SUB32ri8 %[[VAL]], 7, implicit-def %eflags
+; CHECK: JE_1 %[[CASE6_LABEL]], implicit %eflags
+; CHECK: JMP_1 %[[BB5_LABEL]]
+; CHECK: [[BB5_LABEL]]:
+; CHECK: successors: %[[CASE7_LABEL:.*]](0x00000000), %[[DEFAULT_BB_LABEL]](0x80000000)
+; CHECK: %{{[0-9]+}}:gr32 = SUB32ri8 %[[VAL]], 49, implicit-def %eflags
+; CHECK: JE_1 %[[CASE7_LABEL]], implicit %eflags
+; CHECK: JMP_1 %[[DEFAULT_BB_LABEL]]
+
+
+bb1:
+ br label %return
+bb2:
+ br label %return
+bb3:
+ br label %return
+bb4:
+ br label %return
+bb5:
+ br label %return
+bb6:
+ br label %return
+bb_default:
+ br label %return
+
+return:
+ %retval = phi i32 [ 0, %bb_default ], [ 6, %bb6 ], [ 5, %bb5 ], [ 4, %bb4 ], [ 3, %bb3 ], [ 2, %bb2 ], [ 1, %bb1 ]
+ ret i32 %retval
+}
+!1 = !{!"function_entry_count", i64 100000}
+!2 = !{!"branch_weights", i32 50, i32 100, i32 200, i32 29500, i32 70000, i32 150}
+!3 = !{!"branch_weights", i32 50, i32 100, i32 500, i32 69500, i32 29850, i32 0, i32 0, i32 0}
+
Modified: llvm/trunk/test/CodeGen/X86/switch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/switch.ll?rev=318202&r1=318201&r2=318202&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/switch.ll (original)
+++ llvm/trunk/test/CodeGen/X86/switch.ll Tue Nov 14 13:44:09 2017
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -jump-table-density=40 -verify-machineinstrs | FileCheck %s
+; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -jump-table-density=40 -switch-peel-threshold=101 -verify-machineinstrs | FileCheck %s
; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -O0 -jump-table-density=40 -verify-machineinstrs | FileCheck --check-prefix=NOOPT %s
declare void @g(i32)
More information about the llvm-commits
mailing list