[llvm] [AMDGPU] Optionally Use GCNRPTrackers during scheduling (PR #93090)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Wed May 22 12:00:15 PDT 2024
https://github.com/jrbyrnes created https://github.com/llvm/llvm-project/pull/93090
This is part of a series of PRs which enable using the AMDGPU/GCN RPTrackers during scheduling. I've split them up to (hopefully) make reviewing easier. For context see https://github.com/llvm/llvm-project/pull/88797 . Since this is the final PR in the series, any high level comments should go here.
This PR adds the scheduling changes to: maintain the GCNRPTrackers during scheduling, and use them when making per-instruction scheduling decisions.
>From f5423aba333b33d2c2515817bb9c6c1bdb260597 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 21 May 2024 12:55:07 -0700
Subject: [PATCH 1/3] [AMDGPU] NFC: Add BBLiveOutMap & LiveOut Cache
Change-Id: I63cfd44e635cc4bee0e6780ca43b692c46e940b7
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 56 ++++++++++++++++++++-
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 7 +++
2 files changed, 62 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 94d93390d0916..a4d05f62a7f74 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -58,6 +58,11 @@ static cl::opt<bool>
"Wave Limited (amdgpu-limit-wave-threshold)."),
cl::init(false));
+static cl::opt<bool> GCNTrackers(
+ "amdgpu-use-amdgpu-trackers", cl::Hidden,
+ cl::desc("Use the AMDGPU specific RPTrackers during scheduling"),
+ cl::init(false));
+
const unsigned ScheduleMetrics::ScaleFactor = 100;
GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
@@ -526,6 +531,19 @@ GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
return RPTracker.moveMaxPressure();
}
+static MachineInstr *getLastMIForRegion(MachineBasicBlock::iterator RegionBegin,
+ MachineBasicBlock::iterator RegionEnd) {
+ MachineInstr *LastMI;
+ auto *BB = RegionBegin->getParent();
+ if (RegionEnd != BB->end() && !RegionEnd->isDebugInstr())
+ LastMI = &*RegionEnd;
+ else if (RegionEnd == BB->end())
+ LastMI = &*prev_nodbg(RegionEnd, RegionBegin);
+ else
+ LastMI = &*skipDebugInstructionsBackward(RegionEnd, RegionBegin);
+ return LastMI;
+}
+
void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
const MachineBasicBlock *MBB) {
GCNDownwardRPTracker RPTracker(*LIS);
@@ -597,6 +615,16 @@ void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
RPTracker.advanceBeforeNext();
MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
}
+
+ if (GCNTrackers) {
+ assert(LiveOuts.size() == Regions.size());
+ for (unsigned RegionIdx = 0; RegionIdx < Regions.size(); RegionIdx++) {
+ auto RegionBegin = Regions[RegionIdx].first;
+ auto RegionEnd = Regions[RegionIdx].second;
+ MachineInstr *LastMI = getLastMIForRegion(RegionBegin, RegionEnd);
+ LiveOuts[RegionIdx] = BBLiveOutMap.lookup(LastMI);
+ }
+ }
}
DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
@@ -616,11 +644,24 @@ GCNScheduleDAGMILive::getBBLiveInMap() const {
return getLiveRegMap(BBStarters, false /*After*/, *LIS);
}
+DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
+GCNScheduleDAGMILive::getBBLiveOutMap() const {
+ assert(!Regions.empty());
+ std::vector<MachineInstr *> BBEnders;
+ BBEnders.reserve(Regions.size());
+ auto I = Regions.rbegin(), E = Regions.rend();
+ for (; I != E; I++)
+ BBEnders.push_back(getLastMIForRegion(I->first, I->second));
+
+ return getLiveRegMap(BBEnders, true /*After*/, *LIS);
+}
+
void GCNScheduleDAGMILive::finalizeSchedule() {
// Start actual scheduling here. This function is called by the base
// MachineScheduler after all regions have been recorded by
// GCNScheduleDAGMILive::schedule().
LiveIns.resize(Regions.size());
+ LiveOuts.resize(Regions.size());
Pressure.resize(Regions.size());
RescheduleRegions.resize(Regions.size());
RegionsWithHighRP.resize(Regions.size());
@@ -639,8 +680,12 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
void GCNScheduleDAGMILive::runSchedStages() {
LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
- if (!Regions.empty())
+ if (!Regions.empty()) {
BBLiveInMap = getBBLiveInMap();
+ if (GCNTrackers) {
+ BBLiveOutMap = getBBLiveOutMap();
+ }
+ }
GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
while (S.advanceStage()) {
@@ -1499,6 +1544,15 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
DAG.Regions = NewRegions;
DAG.RescheduleRegions = NewRescheduleRegions;
+ if (GCNTrackers) {
+ DAG.BBLiveOutMap = DAG.getBBLiveOutMap();
+ auto I = DAG.Regions.begin(), E = DAG.Regions.end();
+ for (; I != E; I++) {
+ MachineInstr *LastMI = getLastMIForRegion(I->first, I->second);
+ DAG.LiveOuts.push_back(DAG.BBLiveOutMap.lookup(LastMI));
+ }
+ }
+
SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 2084aae4128ff..243bb7f0c094d 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -205,6 +205,9 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
// Region live-in cache.
SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns;
+ // Region live-out cache.
+ SmallVector<GCNRPTracker::LiveRegSet, 32> LiveOuts;
+
// Region pressure cache.
SmallVector<GCNRegPressure, 32> Pressure;
@@ -215,6 +218,10 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveInMap() const;
+ DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveOutMap;
+
+ DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveOutMap() const;
+
// Return current region pressure.
GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
>From f151fcbf83383d9efc19bce18e23a908cdef0a31 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 21 May 2024 13:34:59 -0700
Subject: [PATCH 2/3] [AMDGPU] NFC: Provide RPTracker interface for external
iterators
Change-Id: I79b54722e6e858961486248d94766c3f3c161160
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 70 +++++++++++++++--------
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 18 +++---
2 files changed, 56 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 5c394e6d6296d..f1c4c8b397ddc 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -343,24 +343,25 @@ void GCNRPTracker::reset(const MachineInstr &MI,
MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs);
}
-////////////////////////////////////////////////////////////////////////////////
-// GCNUpwardRPTracker
-
-void GCNUpwardRPTracker::reset(const MachineRegisterInfo &MRI_,
- const LiveRegSet &LiveRegs_) {
+void GCNRPTracker::reset(const MachineRegisterInfo &MRI_,
+ const LiveRegSet &LiveRegs_) {
MRI = &MRI_;
LiveRegs = LiveRegs_;
LastTrackedMI = nullptr;
MaxPressure = CurPressure = getRegPressure(MRI_, LiveRegs_);
}
-void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
+////////////////////////////////////////////////////////////////////////////////
+// GCNUpwardRPTracker
+
+bool GCNUpwardRPTracker::recede(const MachineInstr &MI, bool ShouldTrackIt) {
assert(MRI && "call reset first");
- LastTrackedMI = &MI;
+ if (ShouldTrackIt)
+ LastTrackedMI = &MI;
if (MI.isDebugInstr())
- return;
+ return false;
// Kill all defs.
GCNRegPressure DefPressure, ECDefPressure;
@@ -412,6 +413,7 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
: max(CurPressure, MaxPressure);
assert(CurPressure == getRegPressure(*MRI, LiveRegs));
+ return false;
}
////////////////////////////////////////////////////////////////////////////////
@@ -430,28 +432,44 @@ bool GCNDownwardRPTracker::reset(const MachineInstr &MI,
return true;
}
-bool GCNDownwardRPTracker::advanceBeforeNext() {
+bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
+ bool ShouldTrackIt,
+ LiveIntervals *TheLIS) {
assert(MRI && "call reset first");
- if (!LastTrackedMI)
- return NextMI == MBBEnd;
+ SlotIndex SI;
+ LiveIntervals *CurrLIS;
+ MachineInstr *CurrMI;
+ if (ShouldTrackIt) {
+ if (!LastTrackedMI)
+ return NextMI == MBBEnd;
+
+ assert(NextMI == MBBEnd || !NextMI->isDebugInstr());
+ CurrLIS = const_cast<LiveIntervals *>(&LIS);
+ CurrMI = const_cast<MachineInstr *>(LastTrackedMI);
+
+ SI = NextMI == MBBEnd
+ ? CurrLIS->getInstructionIndex(*LastTrackedMI).getDeadSlot()
+ : CurrLIS->getInstructionIndex(*NextMI).getBaseIndex();
+ }
- assert(NextMI == MBBEnd || !NextMI->isDebugInstr());
+ else { //! ShouldTrackIt
+ CurrLIS = TheLIS;
+ SI = CurrLIS->getInstructionIndex(*MI).getBaseIndex();
+ CurrMI = MI;
+ }
- SlotIndex SI = NextMI == MBBEnd
- ? LIS.getInstructionIndex(*LastTrackedMI).getDeadSlot()
- : LIS.getInstructionIndex(*NextMI).getBaseIndex();
assert(SI.isValid());
// Remove dead registers or mask bits.
SmallSet<Register, 8> SeenRegs;
- for (auto &MO : LastTrackedMI->operands()) {
+ for (auto &MO : CurrMI->operands()) {
if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
if (MO.isUse() && !MO.readsReg())
continue;
if (!SeenRegs.insert(MO.getReg()).second)
continue;
- const LiveInterval &LI = LIS.getInterval(MO.getReg());
+ const LiveInterval &LI = CurrLIS->getInterval(MO.getReg());
if (LI.hasSubRanges()) {
auto It = LiveRegs.end();
for (const auto &S : LI.subranges()) {
@@ -481,15 +499,18 @@ bool GCNDownwardRPTracker::advanceBeforeNext() {
LastTrackedMI = nullptr;
- return NextMI == MBBEnd;
+ return ShouldTrackIt && (NextMI == MBBEnd);
}
-void GCNDownwardRPTracker::advanceToNext() {
+void GCNDownwardRPTracker::advanceToNext(MachineInstr *MI, bool ShouldTrackIt) {
LastTrackedMI = &*NextMI++;
NextMI = skipDebugInstructionsForward(NextMI, MBBEnd);
+ MachineInstr *CurrMI =
+ ShouldTrackIt ? const_cast<MachineInstr *>(LastTrackedMI) : MI;
+
// Add new registers or mask bits.
- for (const auto &MO : LastTrackedMI->all_defs()) {
+ for (const auto &MO : CurrMI->all_defs()) {
Register Reg = MO.getReg();
if (!Reg.isVirtual())
continue;
@@ -502,11 +523,12 @@ void GCNDownwardRPTracker::advanceToNext() {
MaxPressure = max(MaxPressure, CurPressure);
}
-bool GCNDownwardRPTracker::advance() {
- if (NextMI == MBBEnd)
+bool GCNDownwardRPTracker::advance(MachineInstr *MI, bool ShouldTrackIt,
+ LiveIntervals *TheLIS) {
+ if (ShouldTrackIt && NextMI == MBBEnd)
return false;
- advanceBeforeNext();
- advanceToNext();
+ advanceBeforeNext(MI, ShouldTrackIt, TheLIS);
+ advanceToNext(MI, ShouldTrackIt);
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 752f53752fa68..8abbce138cf16 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -160,6 +160,9 @@ class GCNRPTracker {
bool After);
public:
+ // reset tracker and set live register set to the specified value.
+ void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_);
+
// live regs for the current state
const decltype(LiveRegs) &getLiveRegs() const { return LiveRegs; }
const MachineInstr *getLastTrackedMI() const { return LastTrackedMI; }
@@ -180,12 +183,9 @@ class GCNUpwardRPTracker : public GCNRPTracker {
public:
GCNUpwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {}
- // reset tracker and set live register set to the specified value.
- void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_);
-
// reset tracker at the specified slot index.
void reset(const MachineRegisterInfo &MRI, SlotIndex SI) {
- reset(MRI, llvm::getLiveRegs(SI, LIS, MRI));
+ GCNRPTracker::reset(MRI, llvm::getLiveRegs(SI, LIS, MRI));
}
// reset tracker to the end of the MBB.
@@ -200,7 +200,7 @@ class GCNUpwardRPTracker : public GCNRPTracker {
}
// move to the state just before the MI (in program order).
- void recede(const MachineInstr &MI);
+ bool recede(const MachineInstr &MI, bool ShouldTrackIt = true);
// checks whether the tracker's state after receding MI corresponds
// to reported by LIS.
@@ -242,13 +242,15 @@ class GCNDownwardRPTracker : public GCNRPTracker {
// Move to the state right before the next MI or after the end of MBB.
// Returns false if reached end of the block.
- bool advanceBeforeNext();
+ bool advanceBeforeNext(MachineInstr *MI = nullptr, bool ShouldTrackIt = true,
+ LiveIntervals *TheLIS = nullptr);
// Move to the state at the MI, advanceBeforeNext has to be called first.
- void advanceToNext();
+ void advanceToNext(MachineInstr *MI = nullptr, bool ShouldTrackIt = true);
// Move to the state at the next MI. Returns false if reached end of block.
- bool advance();
+ bool advance(MachineInstr *MI = nullptr, bool ShouldTrackIt = true,
+ LiveIntervals *TheLIS = nullptr);
// Advance instructions until before End.
bool advance(MachineBasicBlock::const_iterator End);
>From 1955aca4fdc0564e21acbc7ab31d8b7cb2952626 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 21 May 2024 18:04:25 -0700
Subject: [PATCH 3/3] [AMDGPU] Optionally Use AMDGPU RPTrackers during
scheduling
Change-Id: I6ae56149c1eb49ea85362267174cc6274c416330
---
.../Target/AMDGPU/GCNIterativeScheduler.cpp | 2 +-
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 1 -
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 101 ++++++++++++++----
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 19 +++-
4 files changed, 98 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index aebfe154b3139..ccee5db9a3bb6 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -480,7 +480,7 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
"target occupancy = "
<< TgtOcc << '\n');
- GCNMaxOccupancySchedStrategy LStrgy(Context);
+ GCNMaxOccupancySchedStrategy LStrgy(Context, /*IsLegacyScheduler*/ true);
unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
for (int I = 0; I < NumPasses; ++I) {
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 8abbce138cf16..044d03154f19b 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -162,7 +162,6 @@ class GCNRPTracker {
public:
// reset tracker and set live register set to the specified value.
void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_);
-
// live regs for the current state
const decltype(LiveRegs) &getLiveRegs() const { return LiveRegs; }
const MachineInstr *getLastTrackedMI() const { return LastTrackedMI; }
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index a4d05f62a7f74..d568b58a075c8 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -67,6 +67,7 @@ const unsigned ScheduleMetrics::ScaleFactor = 100;
GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
: GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
+ TheTracker(*C->LIS), TheUpwardTracker(*C->LIS),
HasHighPressure(false) {}
void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
@@ -133,23 +134,46 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
if (!DAG->isTrackingPressure())
return;
- // getDownwardPressure() and getUpwardPressure() make temporary changes to
- // the tracker, so we need to pass those function a non-const copy.
- RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
+ unsigned NewSGPRPressure, NewVGPRPressure;
+ if (!GCNTrackers) {
+ // getDownwardPressure() and getUpwardPressure() make temporary changes to
+ // the tracker, so we need to pass those function a non-const copy.
+ RegPressureTracker &TempTracker =
+ const_cast<RegPressureTracker &>(RPTracker);
+
+ Pressure.clear();
+ MaxPressure.clear();
+
+ if (AtTop)
+ TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
+ else {
+ // FIXME: I think for bottom up scheduling, the register pressure is
+ // cached and can be retrieved by DAG->getPressureDif(SU).
+ TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
+ }
+ NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
+ NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
+ }
- Pressure.clear();
- MaxPressure.clear();
+ if (GCNTrackers) {
+ if (AtTop) {
+ GCNDownwardRPTracker TempTopTracker(TheTracker);
+ auto MI = SU->getInstr();
+ TempTopTracker.advance(MI, true, DAG->getLIS());
- if (AtTop)
- TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
- else {
- // FIXME: I think for bottom up scheduling, the register pressure is cached
- // and can be retrieved by DAG->getPressureDif(SU).
- TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
- }
+ NewSGPRPressure = TempTopTracker.getPressure().getSGPRNum();
+ NewVGPRPressure = TempTopTracker.getPressure().getVGPRNum(false);
+ }
- unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
- unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
+ else {
+ GCNUpwardRPTracker TempBotTracker(TheUpwardTracker);
+ auto MI = SU->getInstr();
+ TempBotTracker.recede(*MI, true);
+
+ NewSGPRPressure = TempBotTracker.getPressure().getSGPRNum();
+ NewVGPRPressure = TempBotTracker.getPressure().getVGPRNum(false);
+ }
+ }
// If two instructions increase the pressure of different register sets
// by the same amount, the generic scheduler will prefer to schedule the
@@ -218,8 +242,16 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
unsigned SGPRPressure = 0;
unsigned VGPRPressure = 0;
if (DAG->isTrackingPressure()) {
- SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
- VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
+ SGPRPressure =
+ GCNTrackers
+ ? (Zone.isTop() ? TheTracker.getPressure().getSGPRNum()
+ : TheUpwardTracker.getPressure().getSGPRNum())
+ : Pressure[AMDGPU::RegisterPressureSets::SReg_32];
+ VGPRPressure =
+ GCNTrackers
+ ? (Zone.isTop() ? TheTracker.getPressure().getVGPRNum(false)
+ : TheUpwardTracker.getPressure().getVGPRNum(false))
+ : Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
}
ReadyQueue &Q = Zone.Available;
for (SUnit *SU : Q) {
@@ -362,6 +394,16 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
return SU;
}
+void GCNSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
+ if (GCNTrackers) {
+ MachineInstr *MI = SU->getInstr();
+ IsTopNode ? TheTracker.advance(MI, true, DAG->getLIS())
+ : TheUpwardTracker.recede(*MI, true);
+ }
+
+ return GenericScheduler::schedNode(SU, IsTopNode);
+}
+
GCNSchedStageID GCNSchedStrategy::getCurrentStage() {
assert(CurrentStage && CurrentStage != SchedStages.end());
return *CurrentStage;
@@ -388,12 +430,13 @@ GCNSchedStageID GCNSchedStrategy::getNextStage() const {
}
GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
- const MachineSchedContext *C)
+ const MachineSchedContext *C, bool IsLegacyScheduler)
: GCNSchedStrategy(C) {
SchedStages.push_back(GCNSchedStageID::OccInitialSchedule);
SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule);
SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule);
SchedStages.push_back(GCNSchedStageID::PreRARematerialize);
+ GCNTrackers = GCNTrackers & !IsLegacyScheduler;
}
GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy(const MachineSchedContext *C)
@@ -682,9 +725,8 @@ void GCNScheduleDAGMILive::runSchedStages() {
if (!Regions.empty()) {
BBLiveInMap = getBBLiveInMap();
- if (GCNTrackers) {
+ if (GCNTrackers)
BBLiveOutMap = getBBLiveOutMap();
- }
}
GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
@@ -703,6 +745,21 @@ void GCNScheduleDAGMILive::runSchedStages() {
continue;
}
+ if (GCNTrackers) {
+ GCNDownwardRPTracker *TheTracker = S.getTracker();
+ GCNUpwardRPTracker *TheUpwardTracker = S.getUpwardTracker();
+ GCNRPTracker::LiveRegSet *RegionLiveIns = &LiveIns[Stage->getRegionIdx()];
+ GCNRPTracker::LiveRegSet *RegionLiveOuts = &LiveOuts[Stage->getRegionIdx()];
+
+ reinterpret_cast<GCNRPTracker *>(TheTracker)->reset(
+ Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
+ *RegionLiveIns);
+ reinterpret_cast<GCNRPTracker *>(TheUpwardTracker)->reset(
+ Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
+ *RegionLiveOuts);
+
+ }
+
ScheduleDAGMILive::schedule();
Stage->finalizeGCNRegion();
}
@@ -973,6 +1030,7 @@ void GCNSchedStage::finalizeGCNRegion() {
void GCNSchedStage::checkScheduling() {
// Check the results of scheduling.
PressureAfter = DAG.getRealRegPressure(RegionIdx);
+
LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));
LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
@@ -1524,9 +1582,6 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
MachineInstr *MI = Entry.first;
MachineInstr *OldMI = Entry.second;
- // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
- DAG.BBLiveInMap.erase(OldMI);
-
// Remove OldMI and update LIS
Register Reg = MI->getOperand(0).getReg();
LIS->RemoveMachineInstrFromMaps(*OldMI);
@@ -1544,6 +1599,8 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
DAG.Regions = NewRegions;
DAG.RescheduleRegions = NewRescheduleRegions;
+ DAG.BBLiveInMap = DAG.getBBLiveInMap();
+
if (GCNTrackers) {
DAG.BBLiveOutMap = DAG.getBBLiveOutMap();
auto I = DAG.Regions.begin(), E = DAG.Regions.end();
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 243bb7f0c094d..b666da267d117 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -70,6 +70,12 @@ class GCNSchedStrategy : public GenericScheduler {
// Pointer to the current SchedStageID.
SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr;
+ // GCN RP Tracker for top-down scheduling
+ mutable GCNDownwardRPTracker TheTracker;
+
+ // GCN RP Tracker for botttom-up scheduling
+ mutable GCNUpwardRPTracker TheUpwardTracker;
+
public:
// schedule() have seen register pressure over the critical limits and had to
// track register pressure for actual scheduling heuristics.
@@ -102,6 +108,8 @@ class GCNSchedStrategy : public GenericScheduler {
SUnit *pickNode(bool &IsTopNode) override;
+ void schedNode(SUnit *SU, bool IsTopNode) override;
+
void initialize(ScheduleDAGMI *DAG) override;
unsigned getTargetOccupancy() { return TargetOccupancy; }
@@ -116,13 +124,19 @@ class GCNSchedStrategy : public GenericScheduler {
bool hasNextStage() const;
GCNSchedStageID getNextStage() const;
+
+ GCNDownwardRPTracker *getTracker() { return &TheTracker; }
+
+ GCNUpwardRPTracker *getUpwardTracker() { return &TheUpwardTracker; }
+
};
/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
/// maximum number of waves per simd).
class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy {
public:
- GCNMaxOccupancySchedStrategy(const MachineSchedContext *C);
+ GCNMaxOccupancySchedStrategy(const MachineSchedContext *C,
+ bool IsLegacyScheduler = false);
};
/// The goal of this scheduling strategy is to maximize ILP for a single wave
@@ -317,6 +331,9 @@ class GCNSchedStage {
bool isRegionWithExcessRP() const {
return DAG.RegionsWithExcessRP[RegionIdx];
}
+
+ // The region number this stage is currently working on
+ unsigned getRegionIdx() { return RegionIdx; }
// Returns true if the new schedule may result in more spilling.
bool mayCauseSpilling(unsigned WavesAfter);
More information about the llvm-commits
mailing list