[llvm] 2a2720a - [CSSPGO] Move pseudo probes to the beginning of a block to unblock SelectionDAG combine.
Hongtao Yu via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 7 22:45:50 PDT 2021
Author: Hongtao Yu
Date: 2021-04-07T22:45:35-07:00
New Revision: 2a2720a2dec4ad4fdc7ae58939448e51824a12c4
URL: https://github.com/llvm/llvm-project/commit/2a2720a2dec4ad4fdc7ae58939448e51824a12c4
DIFF: https://github.com/llvm/llvm-project/commit/2a2720a2dec4ad4fdc7ae58939448e51824a12c4.diff
LOG: [CSSPGO] Move pseudo probes to the beginning of a block to unblock SelectionDAG combine.
Pseudo probes, when scattered in a block, can be chained dependencies of other regular DAG nodes and block DAG combine optimizations. To fix this, scattered probes in a block are grouped and placed at the beginning of the block. This shouldn't affect the profile quality.
Test Plan:
Reviewed By: wenlei, wmi
Differential Revision: https://reviews.llvm.org/D100002
Added:
llvm/test/Transforms/SampleProfile/pseudo-probe-selectionDAG.ll
Modified:
llvm/lib/CodeGen/CodeGenPrepare.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index d4702820aa3b..156250aec9f4 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -405,6 +405,7 @@ class TypePromotionTransaction;
bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT);
bool fixupDbgValue(Instruction *I);
bool placeDbgValues(Function &F);
+ bool placePseudoProbes(Function &F);
bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
bool tryToPromoteExts(TypePromotionTransaction &TPT,
@@ -611,6 +612,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// Do this last to clean up use-before-def scenarios introduced by other
// preparatory transforms.
EverMadeChange |= placeDbgValues(F);
+ EverMadeChange |= placePseudoProbes(F);
#ifndef NDEBUG
if (VerifyBFIUpdates)
@@ -7989,6 +7991,28 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
return MadeChange;
}
+// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered
+// probes can be chained dependencies of other regular DAG nodes and block DAG
+// combine optimizations.
+bool CodeGenPrepare::placePseudoProbes(Function &F) {
+ bool MadeChange = false;
+ for (auto &Block : F) {
+ // Move the rest probes to the beginning of the block.
+ auto FirstInst = Block.getFirstInsertionPt();
+ while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst())
+ ++FirstInst;
+ BasicBlock::iterator I(FirstInst);
+ I++;
+ while (I != Block.end()) {
+ if (auto *II = dyn_cast<PseudoProbeInst>(I++)) {
+ II->moveBefore(&*FirstInst);
+ MadeChange = true;
+ }
+ }
+ }
+ return MadeChange;
+}
+
/// Scale down both weights to fit into uint32_t.
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-selectionDAG.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-selectionDAG.ll
new file mode 100644
index 000000000000..1d16b853acaf
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-selectionDAG.ll
@@ -0,0 +1,39 @@
+; REQUIRES: x86_64-linux
+; RUN: opt < %s -codegenprepare -mtriple=x86_64 -S -o %t
+; RUN: FileCheck %s < %t --check-prefix=IR
+; RUN: llc -mtriple=x86_64-- -stop-after=finalize-isel %t -o - | FileCheck %s --check-prefix=MIR
+
+define internal i32 @arc_compare() {
+entry:
+ %0 = load i64, i64* undef, align 8
+ br i1 undef, label %return, label %if.end
+
+if.end: ; preds = %entry
+;; Check pseudo probes are next to each other at the beginning of this block.
+; IR-label: if.end
+; IR: call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
+; IR: call void @llvm.pseudoprobe(i64 5116412291814990879, i64 3, i32 0, i64 -1)
+ call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
+ %1 = load i16, i16* undef, align 8
+ call void @llvm.pseudoprobe(i64 5116412291814990879, i64 3, i32 0, i64 -1)
+ %2 = and i16 %1, 16
+ %3 = icmp eq i16 %2, 0
+;; Check the load-and-cmp sequence is fold into a test instruction.
+; MIR-label: bb.1.if.end
+; MIR: %[[#REG:]]:gr64 = IMPLICIT_DEF
+; MIR: TEST8mi killed %[[#REG]], 1, $noreg, 0, $noreg, 16
+; MIR: JCC_1
+ br i1 %3, label %return, label %if.end6
+
+if.end6: ; preds = %if.end
+ call void @llvm.pseudoprobe(i64 5116412291814990879, i64 5, i32 0, i64 -1)
+ br label %return
+
+return: ; preds = %if.end6, %if.end, %entry
+ ret i32 undef
+}
+
+; Function Attrs: inaccessiblememonly nounwind willreturn
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0
+
+attributes #0 = { inaccessiblememonly nounwind willreturn }
More information about the llvm-commits
mailing list