[llvm] 80b78a4 - [MachinePipeliner] Add ORE for MachinePipeliner
Jinsong Ji via llvm-commits
llvm-commits at lists.llvm.org
Tue May 5 09:12:34 PDT 2020
Author: Jinsong Ji
Date: 2020-05-05T16:04:53Z
New Revision: 80b78a47e5370ca3a0d2b3cc8e30c3ce1674b298
URL: https://github.com/llvm/llvm-project/commit/80b78a47e5370ca3a0d2b3cc8e30c3ce1674b298
DIFF: https://github.com/llvm/llvm-project/commit/80b78a47e5370ca3a0d2b3cc8e30c3ce1674b298.diff
LOG: [MachinePipeliner] Add ORE for MachinePipeliner
This patch adds ORE for MachinePipeliner, so that people can anaylyze
their code using opt-viewer or other tools, then optimize the code to
catch more piplining opportunities.
Reviewed By: bcahoon
Differential Revision: https://reviews.llvm.org/D79368
Added:
llvm/test/CodeGen/PowerPC/sms-remark.ll
Modified:
llvm/include/llvm/CodeGen/MachinePipeliner.h
llvm/lib/CodeGen/MachinePipeliner.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h
index 49276fb1a94d..8b2c27e7b888 100644
--- a/llvm/include/llvm/CodeGen/MachinePipeliner.h
+++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h
@@ -43,6 +43,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -60,6 +61,7 @@ extern cl::opt<bool> SwpEnableCopyToPhi;
class MachinePipeliner : public MachineFunctionPass {
public:
MachineFunction *MF = nullptr;
+ MachineOptimizationRemarkEmitter *ORE = nullptr;
const MachineLoopInfo *MLI = nullptr;
const MachineDominatorTree *MDT = nullptr;
const InstrItineraryData *InstrItins;
@@ -96,6 +98,7 @@ class MachinePipeliner : public MachineFunctionPass {
AU.addRequired<MachineLoopInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<LiveIntervals>();
+ AU.addRequired<MachineOptimizationRemarkEmitterPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 3465aaada873..ef4b02ca9e3e 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -217,6 +217,7 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
MLI = &getAnalysis<MachineLoopInfo>();
MDT = &getAnalysis<MachineDominatorTree>();
+ ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
TII = MF->getSubtarget().getInstrInfo();
RegClassInfo.runOnMachineFunction(*MF);
@@ -248,6 +249,12 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
setPragmaPipelineOptions(L);
if (!canPipelineLoop(L)) {
LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkMissed(DEBUG_TYPE, "canPipelineLoop",
+ L.getStartLoc(), L.getHeader())
+ << "Failed to pipeline loop";
+ });
+
return Changed;
}
@@ -309,11 +316,24 @@ void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
/// restricted to loops with a single basic block. Make sure that the
/// branch in the loop can be analyzed.
bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
- if (L.getNumBlocks() != 1)
+ if (L.getNumBlocks() != 1) {
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
+ L.getStartLoc(), L.getHeader())
+ << "Not a single basic block: "
+ << ore::NV("NumBlocks", L.getNumBlocks());
+ });
return false;
+ }
- if (disabledByPragma)
+ if (disabledByPragma) {
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
+ L.getStartLoc(), L.getHeader())
+ << "Disabled by Pragma.";
+ });
return false;
+ }
// Check if the branch can't be understood because we can't do pipelining
// if that's the case.
@@ -321,25 +341,37 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
LI.FBB = nullptr;
LI.BrCond.clear();
if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) {
- LLVM_DEBUG(
- dbgs() << "Unable to analyzeBranch, can NOT pipeline current Loop\n");
+ LLVM_DEBUG(dbgs() << "Unable to analyzeBranch, can NOT pipeline Loop\n");
NumFailBranch++;
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
+ L.getStartLoc(), L.getHeader())
+ << "The branch can't be understood";
+ });
return false;
}
LI.LoopInductionVar = nullptr;
LI.LoopCompare = nullptr;
if (!TII->analyzeLoopForPipelining(L.getTopBlock())) {
- LLVM_DEBUG(
- dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n");
+ LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n");
NumFailLoop++;
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
+ L.getStartLoc(), L.getHeader())
+ << "The loop structure is not supported";
+ });
return false;
}
if (!L.getLoopPreheader()) {
- LLVM_DEBUG(
- dbgs() << "Preheader not found, can NOT pipeline current Loop\n");
+ LLVM_DEBUG(dbgs() << "Preheader not found, can NOT pipeline Loop\n");
NumFailPreheader++;
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
+ L.getStartLoc(), L.getHeader())
+ << "No loop preheader found";
+ });
return false;
}
@@ -457,10 +489,13 @@ void SwingSchedulerDAG::schedule() {
// Can't schedule a loop without a valid MII.
if (MII == 0) {
- LLVM_DEBUG(
- dbgs()
- << "0 is not a valid Minimal Initiation Interval, can NOT schedule\n");
+ LLVM_DEBUG(dbgs() << "Invalid Minimal Initiation Interval: 0\n");
NumFailZeroMII++;
+ Pass.ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(
+ DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+ << "Invalid Minimal Initiation Interval: 0";
+ });
return;
}
@@ -469,6 +504,14 @@ void SwingSchedulerDAG::schedule() {
LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii
<< ", we don't pipleline large loops\n");
NumFailLargeMaxMII++;
+ Pass.ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(
+ DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+ << "Minimal Initiation Interval too large: "
+ << ore::NV("MII", (int)MII) << " > "
+ << ore::NV("SwpMaxMii", SwpMaxMii) << "."
+ << "Refer to -pipeliner-max-mii.";
+ });
return;
}
@@ -511,15 +554,24 @@ void SwingSchedulerDAG::schedule() {
if (!Scheduled){
LLVM_DEBUG(dbgs() << "No schedule found, return\n");
NumFailNoSchedule++;
+ Pass.ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(
+ DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+ << "Unable to find schedule";
+ });
return;
}
unsigned numStages = Schedule.getMaxStageCount();
// No need to generate pipeline if there are no overlapped iterations.
if (numStages == 0) {
- LLVM_DEBUG(
- dbgs() << "No overlapped iterations, no need to generate pipeline\n");
+ LLVM_DEBUG(dbgs() << "No overlapped iterations, skip.\n");
NumFailZeroStage++;
+ Pass.ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(
+ DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+ << "No need to pipeline - no overlapped iterations in schedule.";
+ });
return;
}
// Check that the maximum stage count is less than user-defined limit.
@@ -527,9 +579,23 @@ void SwingSchedulerDAG::schedule() {
LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages
<< " : too many stages, abort\n");
NumFailLargeMaxStage++;
+ Pass.ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(
+ DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+ << "Too many stages in schedule: "
+ << ore::NV("numStages", (int)numStages) << " > "
+ << ore::NV("SwpMaxStages", SwpMaxStages)
+ << ". Refer to -pipeliner-max-stages.";
+ });
return;
}
+ Pass.ORE->emit([&]() {
+ return MachineOptimizationRemark(DEBUG_TYPE, "schedule", Loop.getStartLoc(),
+ Loop.getHeader())
+ << "Pipelined succesfully!";
+ });
+
// Generate the schedule as a ModuloSchedule.
DenseMap<MachineInstr *, int> Cycles, Stages;
std::vector<MachineInstr *> OrderedInsts;
@@ -1080,7 +1146,7 @@ unsigned SwingSchedulerDAG::calculateResMII() {
}
}
int Resmii = Resources.size();
- LLVM_DEBUG(dbgs() << "Retrun Res MII:" << Resmii << "\n");
+ LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n");
// Delete the memory for each of the DFAs that were created earlier.
for (ResourceManager *RI : Resources) {
ResourceManager *D = RI;
@@ -2052,9 +2118,16 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << " (II=" << II
<< ")\n");
- if (scheduleFound)
+ if (scheduleFound) {
Schedule.finalizeSchedule(this);
- else
+ Pass.ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(
+ DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+ << "Schedule found with Initiation Interval: " << ore::NV("II", II)
+ << ", MaxStageCount: "
+ << ore::NV("MaxStageCount", Schedule.getMaxStageCount());
+ });
+ } else
Schedule.reset();
return scheduleFound && Schedule.getMaxStageCount() > 0;
diff --git a/llvm/test/CodeGen/PowerPC/sms-remark.ll b/llvm/test/CodeGen/PowerPC/sms-remark.ll
new file mode 100644
index 000000000000..647b56fa7fcd
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/sms-remark.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -verify-machineinstrs -ppc-asm-full-reg-names -mcpu=pwr9 --ppc-enable-pipeliner \
+; RUN: -pass-remarks-analysis=pipeliner -pass-remarks=pipeliner -o /dev/null 2>&1 \
+; RUN: | FileCheck %s
+
+ at x = dso_local local_unnamed_addr global <{ i32, i32, i32, i32, [1020 x i32] }> <{ i32 1, i32 2, i32 3, i32 4, [1020 x i32] zeroinitializer }>, align 4
+ at y = dso_local global [1024 x i32] zeroinitializer, align 4
+
+define dso_local i32* @foo() local_unnamed_addr {
+;CHECK: Schedule found with Initiation Interval
+;CHECK: Pipelined succesfully!
+entry:
+ %.pre = load i32, i32* getelementptr inbounds ([1024 x i32], [1024 x i32]* @y, i64 0, i64 0), align 4
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret i32* getelementptr inbounds ([1024 x i32], [1024 x i32]* @y, i64 0, i64 0)
+
+for.body: ; preds = %for.body, %entry
+ %0 = phi i32 [ %.pre, %entry ], [ %add.2, %for.body ]
+ %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next.2, %for.body ]
+ %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* bitcast (<{ i32, i32, i32, i32, [1020 x i32] }>* @x to [1024 x i32]*), i64 0, i64 %indvars.iv
+ %1 = load i32, i32* %arrayidx2, align 4
+ %mul = mul nsw i32 %1, %1
+ %add = add nsw i32 %mul, %0
+ %arrayidx6 = getelementptr inbounds [1024 x i32], [1024 x i32]* @y, i64 0, i64 %indvars.iv
+ store i32 %add, i32* %arrayidx6, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %arrayidx2.1 = getelementptr inbounds [1024 x i32], [1024 x i32]* bitcast (<{ i32, i32, i32, i32, [1020 x i32] }>* @x to [1024 x i32]*), i64 0, i64 %indvars.iv.next
+ %2 = load i32, i32* %arrayidx2.1, align 4
+ %mul.1 = mul nsw i32 %2, %2
+ %add.1 = add nsw i32 %mul.1, %add
+ %arrayidx6.1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @y, i64 0, i64 %indvars.iv.next
+ store i32 %add.1, i32* %arrayidx6.1, align 4
+ %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv, 2
+ %arrayidx2.2 = getelementptr inbounds [1024 x i32], [1024 x i32]* bitcast (<{ i32, i32, i32, i32, [1020 x i32] }>* @x to [1024 x i32]*), i64 0, i64 %indvars.iv.next.1
+ %3 = load i32, i32* %arrayidx2.2, align 4
+ %mul.2 = mul nsw i32 %3, %3
+ %add.2 = add nsw i32 %mul.2, %add.1
+ %arrayidx6.2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @y, i64 0, i64 %indvars.iv.next.1
+ store i32 %add.2, i32* %arrayidx6.2, align 4
+ %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv, 3
+ %exitcond.2 = icmp eq i64 %indvars.iv.next.2, 1024
+ br i1 %exitcond.2, label %for.cond.cleanup, label %for.body
+}
More information about the llvm-commits
mailing list