[llvm] [AMDGPU] Add and optionally use GCNIterativeRPTrackers (PR #88797)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Tue May 7 15:02:14 PDT 2024
https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/88797
>From afb45a55609e3e8386cc8c2a011ef91e76565a1d Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Wed, 10 Apr 2024 11:57:26 -0700
Subject: [PATCH 1/9] [AMDGPU] NFCI: Use RegionIdx as key of BBLiveInMap
Change-Id: I433c6d19d79e7bf8ee1fa1d99ca948d5e1411ff8
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 42 +++++++++++++++++++++
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 40 +-------------------
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 25 +++++-------
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 4 +-
4 files changed, 55 insertions(+), 56 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 5c394e6d6296d..5085b540459df 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -343,6 +343,48 @@ void GCNRPTracker::reset(const MachineInstr &MI,
MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs);
}
+DenseMap<int, GCNRPTracker::LiveRegSet>
+llvm::getLiveRegMap(DenseMap<MachineInstr *, int> &R, bool After,
+ LiveIntervals &LIS) {
+ std::vector<SlotIndex> Indexes;
+ // Indexes.reserve(R.size());
+ auto &SII = *LIS.getSlotIndexes();
+ for (std::pair<MachineInstr *, int> &Entry : R) {
+ auto SI = SII.getInstructionIndex(*Entry.first);
+ Indexes.push_back(After ? SI.getDeadSlot() : SI.getBaseIndex());
+ }
+ llvm::sort(Indexes);
+
+ auto &MRI = (*R.begin()).first->getParent()->getParent()->getRegInfo();
+ DenseMap<int, GCNRPTracker::LiveRegSet> LiveRegMap;
+ SmallVector<SlotIndex, 32> LiveIdxs, SRLiveIdxs;
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ auto Reg = Register::index2VirtReg(I);
+ if (!LIS.hasInterval(Reg))
+ continue;
+ auto &LI = LIS.getInterval(Reg);
+ LiveIdxs.clear();
+ if (!LI.findIndexesLiveAt(Indexes, std::back_inserter(LiveIdxs)))
+ continue;
+ if (!LI.hasSubRanges()) {
+ for (auto SI : LiveIdxs) {
+ auto Idx = R[SII.getInstructionFromIndex(SI)];
+ LiveRegMap[Idx][Reg] = MRI.getMaxLaneMaskForVReg(Reg);
+ }
+ } else
+ for (const auto &S : LI.subranges()) {
+ // constrain search for subranges by indexes live at main range
+ SRLiveIdxs.clear();
+ S.findIndexesLiveAt(LiveIdxs, std::back_inserter(SRLiveIdxs));
+ for (auto SI : SRLiveIdxs) {
+ auto Idx = R[SII.getInstructionFromIndex(SI)];
+ LiveRegMap[Idx][Reg] |= S.LaneMask;
+ }
+ }
+ }
+ return LiveRegMap;
+}
+
////////////////////////////////////////////////////////////////////////////////
// GCNUpwardRPTracker
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 752f53752fa68..465ec2f7c3278 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -275,44 +275,8 @@ GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
/// After - upon entry or exit of every instruction
/// Note: there is no entry in the map for instructions with empty live reg set
/// Complexity = O(NumVirtRegs * averageLiveRangeSegmentsPerReg * lg(R))
-template <typename Range>
-DenseMap<MachineInstr*, GCNRPTracker::LiveRegSet>
-getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) {
- std::vector<SlotIndex> Indexes;
- Indexes.reserve(std::distance(R.begin(), R.end()));
- auto &SII = *LIS.getSlotIndexes();
- for (MachineInstr *I : R) {
- auto SI = SII.getInstructionIndex(*I);
- Indexes.push_back(After ? SI.getDeadSlot() : SI.getBaseIndex());
- }
- llvm::sort(Indexes);
-
- auto &MRI = (*R.begin())->getParent()->getParent()->getRegInfo();
- DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> LiveRegMap;
- SmallVector<SlotIndex, 32> LiveIdxs, SRLiveIdxs;
- for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- auto Reg = Register::index2VirtReg(I);
- if (!LIS.hasInterval(Reg))
- continue;
- auto &LI = LIS.getInterval(Reg);
- LiveIdxs.clear();
- if (!LI.findIndexesLiveAt(Indexes, std::back_inserter(LiveIdxs)))
- continue;
- if (!LI.hasSubRanges()) {
- for (auto SI : LiveIdxs)
- LiveRegMap[SII.getInstructionFromIndex(SI)][Reg] =
- MRI.getMaxLaneMaskForVReg(Reg);
- } else
- for (const auto &S : LI.subranges()) {
- // constrain search for subranges by indexes live at main range
- SRLiveIdxs.clear();
- S.findIndexesLiveAt(LiveIdxs, std::back_inserter(SRLiveIdxs));
- for (auto SI : SRLiveIdxs)
- LiveRegMap[SII.getInstructionFromIndex(SI)][Reg] |= S.LaneMask;
- }
- }
- return LiveRegMap;
-}
+DenseMap<int, GCNRPTracker::LiveRegSet>
+getLiveRegMap(DenseMap<MachineInstr *, int> &R, bool After, LiveIntervals &LIS);
inline GCNRPTracker::LiveRegSet getLiveRegsAfter(const MachineInstr &MI,
const LiveIntervals &LIS) {
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 94d93390d0916..301e00cb3567d 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -565,7 +565,7 @@ void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
MBBLiveIns.erase(LiveInIt);
} else {
I = Rgn.first;
- auto LRS = BBLiveInMap.lookup(NonDbgMI);
+ auto LRS = BBLiveInMap.lookup(CurRegion);
#ifdef EXPENSIVE_CHECKS
assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
#endif
@@ -599,20 +599,15 @@ void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
}
}
-DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
+DenseMap<int, GCNRPTracker::LiveRegSet>
GCNScheduleDAGMILive::getBBLiveInMap() const {
assert(!Regions.empty());
- std::vector<MachineInstr *> BBStarters;
- BBStarters.reserve(Regions.size());
- auto I = Regions.rbegin(), E = Regions.rend();
- auto *BB = I->first->getParent();
- do {
- auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
- BBStarters.push_back(MI);
- do {
- ++I;
- } while (I != E && I->first->getParent() == BB);
- } while (I != E);
+ DenseMap<MachineInstr *, int> BBStarters;
+ for (int I = Regions.size() - 1; I >= 0; I--) {
+ auto Rgn = Regions[I];
+ auto *MI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
+ BBStarters.insert({MI, I});
+ }
return getLiveRegMap(BBStarters, false /*After*/, *LIS);
}
@@ -1479,9 +1474,6 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
MachineInstr *MI = Entry.first;
MachineInstr *OldMI = Entry.second;
- // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
- DAG.BBLiveInMap.erase(OldMI);
-
// Remove OldMI and update LIS
Register Reg = MI->getOperand(0).getReg();
LIS->RemoveMachineInstrFromMaps(*OldMI);
@@ -1493,6 +1485,7 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
// Update live-ins, register pressure, and regions caches.
for (auto Idx : ImpactedRegions) {
DAG.LiveIns[Idx] = NewLiveIns[Idx];
+ DAG.BBLiveInMap[Idx] = NewLiveIns[Idx];
DAG.Pressure[Idx] = NewPressure[Idx];
DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent());
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 2084aae4128ff..fccb787d49672 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -211,9 +211,9 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
// Temporary basic block live-in cache.
DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns;
- DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;
+ DenseMap<int, GCNRPTracker::LiveRegSet> BBLiveInMap;
- DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveInMap() const;
+ DenseMap<int, GCNRPTracker::LiveRegSet> getBBLiveInMap() const;
// Return current region pressure.
GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
>From 92a0531b1c8c648f529d4905a938ba5c52e575b7 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 12 Apr 2024 16:09:49 -0700
Subject: [PATCH 2/9] [AMDGPU] Add GCNIterativeRPTrackers
Change-Id: I3e184df1ca349433db6abbeb9d28eed2fea5640b
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 118 ++++++++++++++++++++++
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 35 +++++++
2 files changed, 153 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 5085b540459df..fb82f88d25642 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -612,6 +612,124 @@ bool GCNUpwardRPTracker::isValid() const {
return true;
}
+////////////////////////////////////////////////////////////////////////////////
+// GCNIterativeRPTrackers
+
+void GCNIterativeRPTracker::reset(const MachineRegisterInfo *MRI_,
+ const LiveRegSet *LiveRegsCopy) {
+
+ MRI = MRI_;
+ if (LiveRegsCopy && &LiveRegs != LiveRegsCopy)
+ LiveRegs = *LiveRegsCopy;
+ if (!LiveRegsCopy)
+ LiveRegs.clear();
+ MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs);
+}
+
+void GCNIterativeUpwardRPTracker::recede(const MachineInstr &MI,
+ LiveIntervals *LIS) {
+ assert(MRI && "call reset first");
+
+ if (MI.isDebugInstr())
+ return;
+
+ SmallVector<RegisterMaskPair, 8> RegUses;
+ collectVirtualRegUses(RegUses, MI, *LIS, *MRI);
+
+ // calc pressure at the MI (defs + uses)
+ auto AtMIPressure = CurPressure;
+ for (const auto &U : RegUses) {
+ auto LiveMask = LiveRegs[U.RegUnit];
+ AtMIPressure.inc(U.RegUnit, LiveMask, LiveMask | U.LaneMask, *MRI);
+ }
+ // update max pressure
+ MaxPressure = max(AtMIPressure, MaxPressure);
+
+ for (const auto &MO : MI.all_defs()) {
+ if (!MO.getReg().isVirtual() || MO.isDead())
+ continue;
+
+ auto Reg = MO.getReg();
+ auto I = LiveRegs.find(Reg);
+ if (I == LiveRegs.end())
+ continue;
+ auto &LiveMask = I->second;
+ auto PrevMask = LiveMask;
+ LiveMask &= ~getDefRegMask(MO, *MRI);
+ CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
+ if (LiveMask.none())
+ LiveRegs.erase(I);
+ }
+ for (const auto &U : RegUses) {
+ auto &LiveMask = LiveRegs[U.RegUnit];
+ auto PrevMask = LiveMask;
+ LiveMask |= U.LaneMask;
+ CurPressure.inc(U.RegUnit, PrevMask, LiveMask, *MRI);
+ }
+ assert(CurPressure == getRegPressure(*MRI, LiveRegs));
+}
+
+void GCNIterativeDownwardRPTracker::advance(const MachineInstr &MI,
+ LiveIntervals *LIS) {
+ assert(MRI && "call reset first");
+ // Add new registers or mask bits.
+ for (const auto &MO : MI.all_defs()) {
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual())
+ continue;
+ if (MO.isDead())
+ continue;
+ auto &LiveMask = LiveRegs[Reg];
+ auto PrevMask = LiveMask;
+ LiveMask |= getDefRegMask(MO, *MRI);
+ CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
+ }
+
+ SlotIndex SI = LIS->getInstructionIndex(MI).getBoundaryIndex();
+ assert(SI.isValid());
+
+ // Remove dead registers or mask bits.
+ SmallSet<Register, 8> SeenRegs;
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.getReg().isVirtual())
+ continue;
+ if (MO.isUse() && !MO.readsReg())
+ continue;
+ if (MO.isDef())
+ continue;
+ if (!SeenRegs.insert(MO.getReg()).second)
+ continue;
+
+ const LiveInterval &LI = LIS->getInterval(MO.getReg());
+ if (LI.hasSubRanges()) {
+ auto It = LiveRegs.end();
+ for (const auto &S : LI.subranges()) {
+ if (S.expiredAt(SI)) {
+ if (It == LiveRegs.end()) {
+ It = LiveRegs.find(MO.getReg());
+ if (It == LiveRegs.end())
+ llvm_unreachable("register isn't live");
+ }
+ auto PrevMask = It->second;
+ It->second &= ~S.LaneMask;
+ CurPressure.inc(MO.getReg(), PrevMask, It->second, *MRI);
+ }
+ }
+ if (It != LiveRegs.end() && It->second.none()) {
+ LiveRegs.erase(It);
+ }
+ } else if (LI.expiredAt(SI)) {
+ auto It = LiveRegs.find(MO.getReg());
+ if (It == LiveRegs.end())
+ llvm_unreachable("register isn't live");
+ CurPressure.inc(MO.getReg(), It->second, LaneBitmask::getNone(), *MRI);
+ LiveRegs.erase(It);
+ }
+ }
+
+ MaxPressure = max(MaxPressure, CurPressure);
+}
+
Printable llvm::print(const GCNRPTracker::LiveRegSet &LiveRegs,
const MachineRegisterInfo &MRI) {
return Printable([&LiveRegs, &MRI](raw_ostream &OS) {
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 465ec2f7c3278..3991a51ff09a3 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -259,6 +259,41 @@ class GCNDownwardRPTracker : public GCNRPTracker {
const LiveRegSet *LiveRegsCopy = nullptr);
};
+class GCNIterativeRPTracker {
+public:
+ using LiveRegSet = DenseMap<unsigned, LaneBitmask>;
+
+protected:
+ LiveRegSet LiveRegs;
+ GCNRegPressure CurPressure, MaxPressure;
+
+ mutable const MachineRegisterInfo *MRI = nullptr;
+
+ GCNIterativeRPTracker() {};
+
+public:
+ void reset(const MachineRegisterInfo *MRI_, const LiveRegSet *LiveRegsCopy);
+
+ GCNRegPressure getPressure() const { return CurPressure; }
+ GCNRegPressure getMaxPressure() const { return MaxPressure; }
+};
+
+class GCNIterativeUpwardRPTracker : public GCNIterativeRPTracker {
+public:
+ GCNIterativeUpwardRPTracker() {};
+
+ // Move to the state just before the MI.
+ void recede(const MachineInstr &MI, LiveIntervals *TheLIS);
+};
+
+class GCNIterativeDownwardRPTracker : public GCNIterativeRPTracker {
+public:
+ GCNIterativeDownwardRPTracker() {};
+
+ // Move to the state just after the MI.
+ void advance(const MachineInstr &MI, LiveIntervals *TheLIS);
+};
+
LaneBitmask getLiveLaneMask(unsigned Reg,
SlotIndex SI,
const LiveIntervals &LIS,
>From 325b39e50023339f52300c0c84e19fa37e7d00ae Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 12 Apr 2024 16:14:42 -0700
Subject: [PATCH 3/9] [AMDGPU] Introduce getLiveOutMap
Change-Id: I194d9121d725d55cb5fba609267b86031dc3a179
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 24 +++++++++++++++++++++
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 8 +++++++
2 files changed, 32 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 301e00cb3567d..b8b197cbf9b0a 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -611,6 +611,30 @@ GCNScheduleDAGMILive::getBBLiveInMap() const {
return getLiveRegMap(BBStarters, false /*After*/, *LIS);
}
+DenseMap<int, GCNRPTracker::LiveRegSet>
+GCNScheduleDAGMILive::getBBLiveOutMap() const {
+ assert(!Regions.empty());
+ DenseMap<MachineInstr *, int> BBEnders;
+ for (int I = Regions.size() - 1; I >= 0; I--) {
+ auto Rgn = Regions[I];
+ auto TheBB = Rgn.first->getParent();
+ if (Rgn.second != TheBB->end() && !Rgn.second->isDebugInstr()) {
+ BBEnders.insert({&*Rgn.second, I});
+ continue;
+ }
+ if (Rgn.second == TheBB->end()) {
+ auto *MI = &*prev_nodbg(Rgn.second, Rgn.first);
+ BBEnders.insert({&*MI, I});
+ continue;
+ }
+
+ auto *MI = &*skipDebugInstructionsBackward(Rgn.second, Rgn.first);
+ BBEnders.insert({MI, I});
+ }
+
+ return getLiveRegMap(BBEnders, true /*After*/, *LIS);
+}
+
void GCNScheduleDAGMILive::finalizeSchedule() {
// Start actual scheduling here. This function is called by the base
// MachineScheduler after all regions have been recorded by
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index fccb787d49672..87b615007e168 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -211,10 +211,18 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
// Temporary basic block live-in cache.
DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns;
+ // Map of RegionIdx->LiveIns
DenseMap<int, GCNRPTracker::LiveRegSet> BBLiveInMap;
+ // Calcalute and retun the per region map: RegionIdx->LiveIns
DenseMap<int, GCNRPTracker::LiveRegSet> getBBLiveInMap() const;
+ // Map of RegionIdx->LiveOuts
+ DenseMap<int, GCNRPTracker::LiveRegSet> BBLiveOutMap;
+
+ // Calcalute and retun the per region map: RegionIdx->LiveOuts
+ DenseMap<int, GCNRPTracker::LiveRegSet> getBBLiveOutMap() const;
+
// Return current region pressure.
GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
>From be2ba38dc9e4bdf124b6d3dd73c8f37e6efe77a8 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 12 Apr 2024 16:17:55 -0700
Subject: [PATCH 4/9] [AMDGPU] Optionally use new trackers
Change-Id: I1f2d93f75881a87434033e7bb387b3342309118b
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 111 ++++++++++++++++----
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 23 ++++
2 files changed, 116 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index b8b197cbf9b0a..a2d966b299493 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -58,6 +58,11 @@ static cl::opt<bool>
"Wave Limited (amdgpu-limit-wave-threshold)."),
cl::init(false));
+static cl::opt<bool> GCNTrackers(
+ "amdgpu-use-gcn-iterative-trackers", cl::Hidden,
+ cl::desc("Use the GCN specific iterative RPTrackers during scheduling"),
+ cl::init(false));
+
const unsigned ScheduleMetrics::ScaleFactor = 100;
GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
@@ -128,23 +133,46 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
if (!DAG->isTrackingPressure())
return;
- // getDownwardPressure() and getUpwardPressure() make temporary changes to
- // the tracker, so we need to pass those function a non-const copy.
- RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
+ unsigned NewSGPRPressure, NewVGPRPressure;
+ if (!GCNTrackers) {
+ // getDownwardPressure() and getUpwardPressure() make temporary changes to
+ // the tracker, so we need to pass those function a non-const copy.
+ RegPressureTracker &TempTracker =
+ const_cast<RegPressureTracker &>(RPTracker);
+
+ Pressure.clear();
+ MaxPressure.clear();
+
+ if (AtTop)
+ TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
+ else {
+ // FIXME: I think for bottom up scheduling, the register pressure is
+ // cached and can be retrieved by DAG->getPressureDif(SU).
+ TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
+ }
+ NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
+ NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
+ }
- Pressure.clear();
- MaxPressure.clear();
+ if (GCNTrackers) {
+ if (AtTop) {
+ GCNIterativeDownwardRPTracker TempTopTracker(TheTracker);
+ auto MI = SU->getInstr();
+ TempTopTracker.advance(*MI, DAG->getLIS());
- if (AtTop)
- TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
- else {
- // FIXME: I think for bottom up scheduling, the register pressure is cached
- // and can be retrieved by DAG->getPressureDif(SU).
- TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
- }
+ NewSGPRPressure = TempTopTracker.getPressure().getSGPRNum();
+ NewVGPRPressure = TempTopTracker.getPressure().getVGPRNum(false);
+ }
- unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
- unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
+ else {
+ GCNIterativeUpwardRPTracker TempBotTracker(TheUpwardTracker);
+ auto MI = SU->getInstr();
+ TempBotTracker.recede(*MI, DAG->getLIS());
+
+ NewSGPRPressure = TempBotTracker.getPressure().getSGPRNum();
+ NewVGPRPressure = TempBotTracker.getPressure().getVGPRNum(false);
+ }
+ }
// If two instructions increase the pressure of different register sets
// by the same amount, the generic scheduler will prefer to schedule the
@@ -213,12 +241,20 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
unsigned SGPRPressure = 0;
unsigned VGPRPressure = 0;
if (DAG->isTrackingPressure()) {
- SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
- VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
+ SGPRPressure =
+ GCNTrackers
+ ? (Zone.isTop() ? TheTracker.getPressure().getSGPRNum()
+ : TheUpwardTracker.getPressure().getSGPRNum())
+ : Pressure[AMDGPU::RegisterPressureSets::SReg_32];
+ VGPRPressure =
+ GCNTrackers
+ ? (Zone.isTop() ? TheTracker.getPressure().getVGPRNum(false)
+ : TheUpwardTracker.getPressure().getVGPRNum(false))
+ : Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
}
+
ReadyQueue &Q = Zone.Available;
for (SUnit *SU : Q) {
-
SchedCandidate TryCand(ZonePolicy);
initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
SGPRPressure, VGPRPressure);
@@ -312,6 +348,16 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
return Cand.SU;
}
+void GCNSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
+ if (GCNTrackers) {
+ MachineInstr *MI = SU->getInstr();
+ IsTopNode ? TheTracker.advance(*MI, DAG->getLIS())
+ : TheUpwardTracker.recede(*MI, DAG->getLIS());
+ }
+
+ return GenericScheduler::schedNode(SU, IsTopNode);
+}
+
// This function is mostly cut and pasted from
// GenericScheduler::pickNode()
SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
@@ -658,9 +704,14 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
void GCNScheduleDAGMILive::runSchedStages() {
LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
- if (!Regions.empty())
+ if (!Regions.empty()) {
BBLiveInMap = getBBLiveInMap();
+ if (GCNTrackers) {
+ BBLiveOutMap = getBBLiveOutMap();
+ }
+ }
+
GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
while (S.advanceStage()) {
auto Stage = createSchedStage(S.getCurrentStage());
@@ -677,6 +728,27 @@ void GCNScheduleDAGMILive::runSchedStages() {
continue;
}
+ if (GCNTrackers) {
+ GCNIterativeDownwardRPTracker *TheTracker = S.getTracker();
+ GCNIterativeUpwardRPTracker *TheUpwardTracker = S.getUpwardTracker();
+ auto LiveInEntry = BBLiveInMap.find(Stage->getRegionIdx());
+ GCNRPTracker::LiveRegSet *LiveIns =
+ LiveInEntry != BBLiveInMap.end() ? &LiveInEntry->second : nullptr;
+ auto LiveOutEntry = BBLiveOutMap.find(Stage->getRegionIdx());
+ GCNRPTracker::LiveRegSet *LiveOuts = LiveOutEntry != BBLiveOutMap.end()
+ ? &LiveOutEntry->second
+ : nullptr;
+ TheTracker->reset(
+ &Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
+ LiveIns);
+ TheUpwardTracker->reset(
+ &Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
+ LiveOuts);
+
+ S.setTracker(*TheTracker);
+ S.setUpwardTracker(*TheUpwardTracker);
+ }
+
ScheduleDAGMILive::schedule();
Stage->finalizeGCNRegion();
}
@@ -1513,6 +1585,9 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
DAG.Pressure[Idx] = NewPressure[Idx];
DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent());
}
+
+ DAG.BBLiveOutMap = DAG.getBBLiveOutMap();
+
DAG.Regions = NewRegions;
DAG.RescheduleRegions = NewRescheduleRegions;
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 87b615007e168..e463bd76c1124 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -70,6 +70,12 @@ class GCNSchedStrategy : public GenericScheduler {
// Pointer to the current SchedStageID.
SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr;
+ // GCN RP Tracker for top-down scheduling
+ GCNIterativeDownwardRPTracker TheTracker;
+
+ // GCN RP Trakcer for botttom-up scheduling
+ GCNIterativeUpwardRPTracker TheUpwardTracker;
+
public:
// schedule() have seen register pressure over the critical limits and had to
// track register pressure for actual scheduling heuristics.
@@ -102,6 +108,8 @@ class GCNSchedStrategy : public GenericScheduler {
SUnit *pickNode(bool &IsTopNode) override;
+ void schedNode(SUnit *SU, bool IsTopNode) override;
+
void initialize(ScheduleDAGMI *DAG) override;
unsigned getTargetOccupancy() { return TargetOccupancy; }
@@ -116,6 +124,18 @@ class GCNSchedStrategy : public GenericScheduler {
bool hasNextStage() const;
GCNSchedStageID getNextStage() const;
+
+ GCNIterativeDownwardRPTracker *getTracker() { return &TheTracker; }
+
+ GCNIterativeUpwardRPTracker *getUpwardTracker() { return &TheUpwardTracker; }
+
+ void setTracker(GCNIterativeDownwardRPTracker &Tracker) {
+ TheTracker = Tracker;
+ }
+
+ void setUpwardTracker(GCNIterativeUpwardRPTracker &Tracker) {
+ TheUpwardTracker = Tracker;
+ }
};
/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
@@ -319,6 +339,9 @@ class GCNSchedStage {
return DAG.RegionsWithExcessRP[RegionIdx];
}
+ // The region number this stage is currently working on
+ unsigned getRegionIdx() { return RegionIdx; }
+
// Returns true if the new schedule may result in more spilling.
bool mayCauseSpilling(unsigned WavesAfter);
>From 45ddcd8b8206125c6648f5c2fc15e4aa67c60b6a Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 15 Apr 2024 13:43:43 -0700
Subject: [PATCH 5/9] Add note about recede / advance
Change-Id: Ia353f09d201e2b5a472b73930e3dde1fc51da363
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index fb82f88d25642..06be02d1dc9cb 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -626,6 +626,7 @@ void GCNIterativeRPTracker::reset(const MachineRegisterInfo *MRI_,
MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs);
}
+// Mostly copy+paste from GCNUpwardRPTracker::recede
void GCNIterativeUpwardRPTracker::recede(const MachineInstr &MI,
LiveIntervals *LIS) {
assert(MRI && "call reset first");
@@ -669,6 +670,8 @@ void GCNIterativeUpwardRPTracker::recede(const MachineInstr &MI,
assert(CurPressure == getRegPressure(*MRI, LiveRegs));
}
+// Mostly copy+paste from GCNDownwardRPTracker::(advanceBeforeNext +
+// advanceToNext)
void GCNIterativeDownwardRPTracker::advance(const MachineInstr &MI,
LiveIntervals *LIS) {
assert(MRI && "call reset first");
@@ -693,9 +696,9 @@ void GCNIterativeDownwardRPTracker::advance(const MachineInstr &MI,
for (auto &MO : MI.operands()) {
if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
- if (MO.isUse() && !MO.readsReg())
+ if (!MO.isUse())
continue;
- if (MO.isDef())
+ if (!MO.readsReg())
continue;
if (!SeenRegs.insert(MO.getReg()).second)
continue;
>From 4dcec51b2f46ba6dfdba3b6e4fedfbec758cca35 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 15 Apr 2024 13:59:21 -0700
Subject: [PATCH 6/9] Guard recomputation of liveOuts in SinkRemat stage
Change-Id: I15025afa46ae092aa67f42984ee52154c5f4dba4
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index a2d966b299493..4019dcebd8d5c 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1586,7 +1586,8 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent());
}
- DAG.BBLiveOutMap = DAG.getBBLiveOutMap();
+ if (GCNTrackers)
+ DAG.BBLiveOutMap = DAG.getBBLiveOutMap();
DAG.Regions = NewRegions;
DAG.RescheduleRegions = NewRescheduleRegions;
>From 1c6ff626130f9aaf6b5a61bc84ab29c17a3fc2ca Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 18 Apr 2024 09:55:10 -0700
Subject: [PATCH 7/9] Compatability w/ legacy maxOccupancy scheduler + review
comment
Change-Id: I5522bc12692d9c0d406f0d6e3fe002ff6ff40494
---
llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp | 2 +-
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 4 ++--
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 3 ++-
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 3 ++-
4 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index aebfe154b3139..ccee5db9a3bb6 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -480,7 +480,7 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
"target occupancy = "
<< TgtOcc << '\n');
- GCNMaxOccupancySchedStrategy LStrgy(Context);
+ GCNMaxOccupancySchedStrategy LStrgy(Context, /*IsLegacyScheduler*/ true);
unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
for (int I = 0; I < NumPasses; ++I) {
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 06be02d1dc9cb..6b0b267ef4bae 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -696,10 +696,10 @@ void GCNIterativeDownwardRPTracker::advance(const MachineInstr &MI,
for (auto &MO : MI.operands()) {
if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
- if (!MO.isUse())
- continue;
if (!MO.readsReg())
continue;
+ if (!MO.isUse())
+ continue;
if (!SeenRegs.insert(MO.getReg()).second)
continue;
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 4019dcebd8d5c..f9df36a125749 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -429,12 +429,13 @@ GCNSchedStageID GCNSchedStrategy::getNextStage() const {
}
GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
- const MachineSchedContext *C)
+ const MachineSchedContext *C, bool IsLegacyScheduler)
: GCNSchedStrategy(C) {
SchedStages.push_back(GCNSchedStageID::OccInitialSchedule);
SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule);
SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule);
SchedStages.push_back(GCNSchedStageID::PreRARematerialize);
+ GCNTrackers = GCNTrackers && !IsLegacyScheduler;
}
GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy(const MachineSchedContext *C)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index e463bd76c1124..a748300641517 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -142,7 +142,8 @@ class GCNSchedStrategy : public GenericScheduler {
/// maximum number of waves per simd).
class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy {
public:
- GCNMaxOccupancySchedStrategy(const MachineSchedContext *C);
+ GCNMaxOccupancySchedStrategy(const MachineSchedContext *C,
+ bool IsLegacyScheduler = false);
};
/// The goal of this scheduling strategy is to maximize ILP for a single wave
>From b0a75e447e7eac7ad84cd005445e6096cab04a3a Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 18 Apr 2024 11:34:10 -0700
Subject: [PATCH 8/9] formatting
Change-Id: I1ce9d57c2971f0b498144f353de0e112a9b60a78
---
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 3991a51ff09a3..8b23981dcf56c 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -269,7 +269,7 @@ class GCNIterativeRPTracker {
mutable const MachineRegisterInfo *MRI = nullptr;
- GCNIterativeRPTracker() {};
+ GCNIterativeRPTracker(){};
public:
void reset(const MachineRegisterInfo *MRI_, const LiveRegSet *LiveRegsCopy);
@@ -280,7 +280,7 @@ class GCNIterativeRPTracker {
class GCNIterativeUpwardRPTracker : public GCNIterativeRPTracker {
public:
- GCNIterativeUpwardRPTracker() {};
+ GCNIterativeUpwardRPTracker(){};
// Move to the state just before the MI.
void recede(const MachineInstr &MI, LiveIntervals *TheLIS);
@@ -288,7 +288,7 @@ class GCNIterativeUpwardRPTracker : public GCNIterativeRPTracker {
class GCNIterativeDownwardRPTracker : public GCNIterativeRPTracker {
public:
- GCNIterativeDownwardRPTracker() {};
+ GCNIterativeDownwardRPTracker(){};
// Move to the state just after the MI.
void advance(const MachineInstr &MI, LiveIntervals *TheLIS);
>From dd877ee1048acb7f3a3fb5ea27494e9b58c23f57 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 7 May 2024 14:52:06 -0700
Subject: [PATCH 9/9] Switch to unsigned + fix bug
Change-Id: Ibde513bc45e08966b471b7189b6dc8953bd2d198
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 8 +++---
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 4 +--
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 28 +++++++++++----------
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 8 +++---
4 files changed, 25 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 6b0b267ef4bae..307b8477041c5 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -343,20 +343,20 @@ void GCNRPTracker::reset(const MachineInstr &MI,
MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs);
}
-DenseMap<int, GCNRPTracker::LiveRegSet>
-llvm::getLiveRegMap(DenseMap<MachineInstr *, int> &R, bool After,
+DenseMap<unsigned, GCNRPTracker::LiveRegSet>
+llvm::getLiveRegMap(DenseMap<MachineInstr *, unsigned> &R, bool After,
LiveIntervals &LIS) {
std::vector<SlotIndex> Indexes;
// Indexes.reserve(R.size());
auto &SII = *LIS.getSlotIndexes();
- for (std::pair<MachineInstr *, int> &Entry : R) {
+ for (std::pair<MachineInstr *, unsigned> &Entry : R) {
auto SI = SII.getInstructionIndex(*Entry.first);
Indexes.push_back(After ? SI.getDeadSlot() : SI.getBaseIndex());
}
llvm::sort(Indexes);
auto &MRI = (*R.begin()).first->getParent()->getParent()->getRegInfo();
- DenseMap<int, GCNRPTracker::LiveRegSet> LiveRegMap;
+ DenseMap<unsigned, GCNRPTracker::LiveRegSet> LiveRegMap;
SmallVector<SlotIndex, 32> LiveIdxs, SRLiveIdxs;
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
auto Reg = Register::index2VirtReg(I);
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 8b23981dcf56c..69a04e18185d7 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -310,8 +310,8 @@ GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
/// After - upon entry or exit of every instruction
/// Note: there is no entry in the map for instructions with empty live reg set
/// Complexity = O(NumVirtRegs * averageLiveRangeSegmentsPerReg * lg(R))
-DenseMap<int, GCNRPTracker::LiveRegSet>
-getLiveRegMap(DenseMap<MachineInstr *, int> &R, bool After, LiveIntervals &LIS);
+DenseMap<unsigned, GCNRPTracker::LiveRegSet>
+getLiveRegMap(DenseMap<MachineInstr *, unsigned> &R, bool After, LiveIntervals &LIS);
inline GCNRPTracker::LiveRegSet getLiveRegsAfter(const MachineInstr &MI,
const LiveIntervals &LIS) {
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index f9df36a125749..29904d813d9f4 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -646,27 +646,29 @@ void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
}
}
-DenseMap<int, GCNRPTracker::LiveRegSet>
+DenseMap<unsigned, GCNRPTracker::LiveRegSet>
GCNScheduleDAGMILive::getBBLiveInMap() const {
assert(!Regions.empty());
- DenseMap<MachineInstr *, int> BBStarters;
- for (int I = Regions.size() - 1; I >= 0; I--) {
- auto Rgn = Regions[I];
+ DenseMap<MachineInstr *, unsigned> BBStarters;
+ for (unsigned I = Regions.size(); I > 0; I--) {
+ unsigned Idx = I - 1;
+ auto Rgn = Regions[Idx];
auto *MI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
- BBStarters.insert({MI, I});
+ BBStarters.insert({MI, Idx});
}
return getLiveRegMap(BBStarters, false /*After*/, *LIS);
}
-DenseMap<int, GCNRPTracker::LiveRegSet>
+DenseMap<unsigned, GCNRPTracker::LiveRegSet>
GCNScheduleDAGMILive::getBBLiveOutMap() const {
assert(!Regions.empty());
- DenseMap<MachineInstr *, int> BBEnders;
- for (int I = Regions.size() - 1; I >= 0; I--) {
- auto Rgn = Regions[I];
+ DenseMap<MachineInstr *, unsigned> BBEnders;
+ for (unsigned I = Regions.size(); I > 0; I--) {
+ unsigned Idx = I - 1;
+ auto Rgn = Regions[Idx];
auto TheBB = Rgn.first->getParent();
if (Rgn.second != TheBB->end() && !Rgn.second->isDebugInstr()) {
- BBEnders.insert({&*Rgn.second, I});
+ BBEnders.insert({&*Rgn.second, Idx});
continue;
}
if (Rgn.second == TheBB->end()) {
@@ -1587,12 +1589,12 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent());
}
- if (GCNTrackers)
- DAG.BBLiveOutMap = DAG.getBBLiveOutMap();
-
DAG.Regions = NewRegions;
DAG.RescheduleRegions = NewRescheduleRegions;
+ if (GCNTrackers)
+ DAG.BBLiveOutMap = DAG.getBBLiveOutMap();
+
SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index a748300641517..19c841df88b77 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -233,16 +233,16 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns;
// Map of RegionIdx->LiveIns
- DenseMap<int, GCNRPTracker::LiveRegSet> BBLiveInMap;
+ DenseMap<unsigned, GCNRPTracker::LiveRegSet> BBLiveInMap;
// Calcalute and retun the per region map: RegionIdx->LiveIns
- DenseMap<int, GCNRPTracker::LiveRegSet> getBBLiveInMap() const;
+ DenseMap<unsigned, GCNRPTracker::LiveRegSet> getBBLiveInMap() const;
// Map of RegionIdx->LiveOuts
- DenseMap<int, GCNRPTracker::LiveRegSet> BBLiveOutMap;
+ DenseMap<unsigned, GCNRPTracker::LiveRegSet> BBLiveOutMap;
// Calcalute and retun the per region map: RegionIdx->LiveOuts
- DenseMap<int, GCNRPTracker::LiveRegSet> getBBLiveOutMap() const;
+ DenseMap<unsigned, GCNRPTracker::LiveRegSet> getBBLiveOutMap() const;
// Return current region pressure.
GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
More information about the llvm-commits
mailing list