[llvm] [AMDGPU] Add and optionally use GCNIterativeRPTrackers (PR #88797)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 15 14:07:00 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Jeffrey Byrnes (jrbyrnes)
<details>
<summary>Changes</summary>
Having walked through several interesting lit changes, I think it's a good time to put up a WIP PR for any high level comments.
There are still a few things left to do, namely:
1. Verify more lit changes when enabled
2. Add tests
3. Compatibility with schedulers in GCNIterativeScheduler.cpp
Nonetheless, this patch adds the GCNIterativeRPTrackers. We can optionally use these instead of generic RPTrackers during scheduling. The benefits include: 1. better subreg liveness modeling, 2. actually tracking %av registers (counted as VGPRs), and 3. having accurate global live-through RP.
I've introduced as separate classes since there is other outstanding work on RPTrackers. The plan is to merge the two projects once at steady state.
---
Patch is 21.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/88797.diff
4 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/GCNRegPressure.cpp (+163)
- (modified) llvm/lib/Target/AMDGPU/GCNRegPressure.h (+37-38)
- (modified) llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp (+127-34)
- (modified) llvm/lib/Target/AMDGPU/GCNSchedStrategy.h (+33-2)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 5c394e6d6296d0..06be02d1dc9cb6 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -343,6 +343,48 @@ void GCNRPTracker::reset(const MachineInstr &MI,
MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs);
}
+DenseMap<int, GCNRPTracker::LiveRegSet>
+llvm::getLiveRegMap(DenseMap<MachineInstr *, int> &R, bool After,
+ LiveIntervals &LIS) {
+ std::vector<SlotIndex> Indexes;
+ // Indexes.reserve(R.size());
+ auto &SII = *LIS.getSlotIndexes();
+ for (std::pair<MachineInstr *, int> &Entry : R) {
+ auto SI = SII.getInstructionIndex(*Entry.first);
+ Indexes.push_back(After ? SI.getDeadSlot() : SI.getBaseIndex());
+ }
+ llvm::sort(Indexes);
+
+ auto &MRI = (*R.begin()).first->getParent()->getParent()->getRegInfo();
+ DenseMap<int, GCNRPTracker::LiveRegSet> LiveRegMap;
+ SmallVector<SlotIndex, 32> LiveIdxs, SRLiveIdxs;
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ auto Reg = Register::index2VirtReg(I);
+ if (!LIS.hasInterval(Reg))
+ continue;
+ auto &LI = LIS.getInterval(Reg);
+ LiveIdxs.clear();
+ if (!LI.findIndexesLiveAt(Indexes, std::back_inserter(LiveIdxs)))
+ continue;
+ if (!LI.hasSubRanges()) {
+ for (auto SI : LiveIdxs) {
+ auto Idx = R[SII.getInstructionFromIndex(SI)];
+ LiveRegMap[Idx][Reg] = MRI.getMaxLaneMaskForVReg(Reg);
+ }
+ } else
+ for (const auto &S : LI.subranges()) {
+ // constrain search for subranges by indexes live at main range
+ SRLiveIdxs.clear();
+ S.findIndexesLiveAt(LiveIdxs, std::back_inserter(SRLiveIdxs));
+ for (auto SI : SRLiveIdxs) {
+ auto Idx = R[SII.getInstructionFromIndex(SI)];
+ LiveRegMap[Idx][Reg] |= S.LaneMask;
+ }
+ }
+ }
+ return LiveRegMap;
+}
+
////////////////////////////////////////////////////////////////////////////////
// GCNUpwardRPTracker
@@ -570,6 +612,127 @@ bool GCNUpwardRPTracker::isValid() const {
return true;
}
+////////////////////////////////////////////////////////////////////////////////
+// GCNIterativeRPTrackers
+
+void GCNIterativeRPTracker::reset(const MachineRegisterInfo *MRI_,
+ const LiveRegSet *LiveRegsCopy) {
+
+ MRI = MRI_;
+ if (LiveRegsCopy && &LiveRegs != LiveRegsCopy)
+ LiveRegs = *LiveRegsCopy;
+ if (!LiveRegsCopy)
+ LiveRegs.clear();
+ MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs);
+}
+
+// Mostly copy+paste from GCNUpwardRPTracker::recede
+void GCNIterativeUpwardRPTracker::recede(const MachineInstr &MI,
+ LiveIntervals *LIS) {
+ assert(MRI && "call reset first");
+
+ if (MI.isDebugInstr())
+ return;
+
+ SmallVector<RegisterMaskPair, 8> RegUses;
+ collectVirtualRegUses(RegUses, MI, *LIS, *MRI);
+
+ // calc pressure at the MI (defs + uses)
+ auto AtMIPressure = CurPressure;
+ for (const auto &U : RegUses) {
+ auto LiveMask = LiveRegs[U.RegUnit];
+ AtMIPressure.inc(U.RegUnit, LiveMask, LiveMask | U.LaneMask, *MRI);
+ }
+ // update max pressure
+ MaxPressure = max(AtMIPressure, MaxPressure);
+
+ for (const auto &MO : MI.all_defs()) {
+ if (!MO.getReg().isVirtual() || MO.isDead())
+ continue;
+
+ auto Reg = MO.getReg();
+ auto I = LiveRegs.find(Reg);
+ if (I == LiveRegs.end())
+ continue;
+ auto &LiveMask = I->second;
+ auto PrevMask = LiveMask;
+ LiveMask &= ~getDefRegMask(MO, *MRI);
+ CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
+ if (LiveMask.none())
+ LiveRegs.erase(I);
+ }
+ for (const auto &U : RegUses) {
+ auto &LiveMask = LiveRegs[U.RegUnit];
+ auto PrevMask = LiveMask;
+ LiveMask |= U.LaneMask;
+ CurPressure.inc(U.RegUnit, PrevMask, LiveMask, *MRI);
+ }
+ assert(CurPressure == getRegPressure(*MRI, LiveRegs));
+}
+
+// Mostly copy+paste from GCNDownwardRPTracker::(advanceBeforeNext +
+// advanceToNext)
+void GCNIterativeDownwardRPTracker::advance(const MachineInstr &MI,
+ LiveIntervals *LIS) {
+ assert(MRI && "call reset first");
+ // Add new registers or mask bits.
+ for (const auto &MO : MI.all_defs()) {
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual())
+ continue;
+ if (MO.isDead())
+ continue;
+ auto &LiveMask = LiveRegs[Reg];
+ auto PrevMask = LiveMask;
+ LiveMask |= getDefRegMask(MO, *MRI);
+ CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
+ }
+
+ SlotIndex SI = LIS->getInstructionIndex(MI).getBoundaryIndex();
+ assert(SI.isValid());
+
+ // Remove dead registers or mask bits.
+ SmallSet<Register, 8> SeenRegs;
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.getReg().isVirtual())
+ continue;
+ if (!MO.isUse())
+ continue;
+ if (!MO.readsReg())
+ continue;
+ if (!SeenRegs.insert(MO.getReg()).second)
+ continue;
+
+ const LiveInterval &LI = LIS->getInterval(MO.getReg());
+ if (LI.hasSubRanges()) {
+ auto It = LiveRegs.end();
+ for (const auto &S : LI.subranges()) {
+ if (S.expiredAt(SI)) {
+ if (It == LiveRegs.end()) {
+ It = LiveRegs.find(MO.getReg());
+ if (It == LiveRegs.end())
+ llvm_unreachable("register isn't live");
+ }
+ auto PrevMask = It->second;
+ It->second &= ~S.LaneMask;
+ CurPressure.inc(MO.getReg(), PrevMask, It->second, *MRI);
+ }
+ }
+ if (It != LiveRegs.end() && It->second.none()) {
+ LiveRegs.erase(It);
+ }
+ } else if (LI.expiredAt(SI)) {
+ auto It = LiveRegs.find(MO.getReg());
+ if (It == LiveRegs.end())
+ llvm_unreachable("register isn't live");
+ CurPressure.inc(MO.getReg(), It->second, LaneBitmask::getNone(), *MRI);
+ LiveRegs.erase(It);
+ }
+ }
+
+ MaxPressure = max(MaxPressure, CurPressure);
+}
+
Printable llvm::print(const GCNRPTracker::LiveRegSet &LiveRegs,
const MachineRegisterInfo &MRI) {
return Printable([&LiveRegs, &MRI](raw_ostream &OS) {
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 752f53752fa68b..3991a51ff09a3b 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -259,6 +259,41 @@ class GCNDownwardRPTracker : public GCNRPTracker {
const LiveRegSet *LiveRegsCopy = nullptr);
};
+class GCNIterativeRPTracker {
+public:
+ using LiveRegSet = DenseMap<unsigned, LaneBitmask>;
+
+protected:
+ LiveRegSet LiveRegs;
+ GCNRegPressure CurPressure, MaxPressure;
+
+ mutable const MachineRegisterInfo *MRI = nullptr;
+
+ GCNIterativeRPTracker() {};
+
+public:
+ void reset(const MachineRegisterInfo *MRI_, const LiveRegSet *LiveRegsCopy);
+
+ GCNRegPressure getPressure() const { return CurPressure; }
+ GCNRegPressure getMaxPressure() const { return MaxPressure; }
+};
+
+class GCNIterativeUpwardRPTracker : public GCNIterativeRPTracker {
+public:
+ GCNIterativeUpwardRPTracker() {};
+
+ // Move to the state just before the MI.
+ void recede(const MachineInstr &MI, LiveIntervals *TheLIS);
+};
+
+class GCNIterativeDownwardRPTracker : public GCNIterativeRPTracker {
+public:
+ GCNIterativeDownwardRPTracker() {};
+
+ // Move to the state just after the MI.
+ void advance(const MachineInstr &MI, LiveIntervals *TheLIS);
+};
+
LaneBitmask getLiveLaneMask(unsigned Reg,
SlotIndex SI,
const LiveIntervals &LIS,
@@ -275,44 +310,8 @@ GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
/// After - upon entry or exit of every instruction
/// Note: there is no entry in the map for instructions with empty live reg set
/// Complexity = O(NumVirtRegs * averageLiveRangeSegmentsPerReg * lg(R))
-template <typename Range>
-DenseMap<MachineInstr*, GCNRPTracker::LiveRegSet>
-getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) {
- std::vector<SlotIndex> Indexes;
- Indexes.reserve(std::distance(R.begin(), R.end()));
- auto &SII = *LIS.getSlotIndexes();
- for (MachineInstr *I : R) {
- auto SI = SII.getInstructionIndex(*I);
- Indexes.push_back(After ? SI.getDeadSlot() : SI.getBaseIndex());
- }
- llvm::sort(Indexes);
-
- auto &MRI = (*R.begin())->getParent()->getParent()->getRegInfo();
- DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> LiveRegMap;
- SmallVector<SlotIndex, 32> LiveIdxs, SRLiveIdxs;
- for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- auto Reg = Register::index2VirtReg(I);
- if (!LIS.hasInterval(Reg))
- continue;
- auto &LI = LIS.getInterval(Reg);
- LiveIdxs.clear();
- if (!LI.findIndexesLiveAt(Indexes, std::back_inserter(LiveIdxs)))
- continue;
- if (!LI.hasSubRanges()) {
- for (auto SI : LiveIdxs)
- LiveRegMap[SII.getInstructionFromIndex(SI)][Reg] =
- MRI.getMaxLaneMaskForVReg(Reg);
- } else
- for (const auto &S : LI.subranges()) {
- // constrain search for subranges by indexes live at main range
- SRLiveIdxs.clear();
- S.findIndexesLiveAt(LiveIdxs, std::back_inserter(SRLiveIdxs));
- for (auto SI : SRLiveIdxs)
- LiveRegMap[SII.getInstructionFromIndex(SI)][Reg] |= S.LaneMask;
- }
- }
- return LiveRegMap;
-}
+DenseMap<int, GCNRPTracker::LiveRegSet>
+getLiveRegMap(DenseMap<MachineInstr *, int> &R, bool After, LiveIntervals &LIS);
inline GCNRPTracker::LiveRegSet getLiveRegsAfter(const MachineInstr &MI,
const LiveIntervals &LIS) {
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 94d93390d0916f..4019dcebd8d5c5 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -58,6 +58,11 @@ static cl::opt<bool>
"Wave Limited (amdgpu-limit-wave-threshold)."),
cl::init(false));
+static cl::opt<bool> GCNTrackers(
+ "amdgpu-use-gcn-iterative-trackers", cl::Hidden,
+ cl::desc("Use the GCN specific iterative RPTrackers during scheduling"),
+ cl::init(false));
+
const unsigned ScheduleMetrics::ScaleFactor = 100;
GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
@@ -128,23 +133,46 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
if (!DAG->isTrackingPressure())
return;
- // getDownwardPressure() and getUpwardPressure() make temporary changes to
- // the tracker, so we need to pass those function a non-const copy.
- RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
+ unsigned NewSGPRPressure, NewVGPRPressure;
+ if (!GCNTrackers) {
+ // getDownwardPressure() and getUpwardPressure() make temporary changes to
+ // the tracker, so we need to pass those function a non-const copy.
+ RegPressureTracker &TempTracker =
+ const_cast<RegPressureTracker &>(RPTracker);
+
+ Pressure.clear();
+ MaxPressure.clear();
+
+ if (AtTop)
+ TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
+ else {
+ // FIXME: I think for bottom up scheduling, the register pressure is
+ // cached and can be retrieved by DAG->getPressureDif(SU).
+ TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
+ }
+ NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
+ NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
+ }
- Pressure.clear();
- MaxPressure.clear();
+ if (GCNTrackers) {
+ if (AtTop) {
+ GCNIterativeDownwardRPTracker TempTopTracker(TheTracker);
+ auto MI = SU->getInstr();
+ TempTopTracker.advance(*MI, DAG->getLIS());
- if (AtTop)
- TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
- else {
- // FIXME: I think for bottom up scheduling, the register pressure is cached
- // and can be retrieved by DAG->getPressureDif(SU).
- TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
- }
+ NewSGPRPressure = TempTopTracker.getPressure().getSGPRNum();
+ NewVGPRPressure = TempTopTracker.getPressure().getVGPRNum(false);
+ }
+
+ else {
+ GCNIterativeUpwardRPTracker TempBotTracker(TheUpwardTracker);
+ auto MI = SU->getInstr();
+ TempBotTracker.recede(*MI, DAG->getLIS());
- unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
- unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
+ NewSGPRPressure = TempBotTracker.getPressure().getSGPRNum();
+ NewVGPRPressure = TempBotTracker.getPressure().getVGPRNum(false);
+ }
+ }
// If two instructions increase the pressure of different register sets
// by the same amount, the generic scheduler will prefer to schedule the
@@ -213,12 +241,20 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
unsigned SGPRPressure = 0;
unsigned VGPRPressure = 0;
if (DAG->isTrackingPressure()) {
- SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
- VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
+ SGPRPressure =
+ GCNTrackers
+ ? (Zone.isTop() ? TheTracker.getPressure().getSGPRNum()
+ : TheUpwardTracker.getPressure().getSGPRNum())
+ : Pressure[AMDGPU::RegisterPressureSets::SReg_32];
+ VGPRPressure =
+ GCNTrackers
+ ? (Zone.isTop() ? TheTracker.getPressure().getVGPRNum(false)
+ : TheUpwardTracker.getPressure().getVGPRNum(false))
+ : Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
}
+
ReadyQueue &Q = Zone.Available;
for (SUnit *SU : Q) {
-
SchedCandidate TryCand(ZonePolicy);
initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
SGPRPressure, VGPRPressure);
@@ -312,6 +348,16 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
return Cand.SU;
}
+void GCNSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
+ if (GCNTrackers) {
+ MachineInstr *MI = SU->getInstr();
+ IsTopNode ? TheTracker.advance(*MI, DAG->getLIS())
+ : TheUpwardTracker.recede(*MI, DAG->getLIS());
+ }
+
+ return GenericScheduler::schedNode(SU, IsTopNode);
+}
+
// This function is mostly cut and pasted from
// GenericScheduler::pickNode()
SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
@@ -565,7 +611,7 @@ void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
MBBLiveIns.erase(LiveInIt);
} else {
I = Rgn.first;
- auto LRS = BBLiveInMap.lookup(NonDbgMI);
+ auto LRS = BBLiveInMap.lookup(CurRegion);
#ifdef EXPENSIVE_CHECKS
assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
#endif
@@ -599,23 +645,42 @@ void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
}
}
-DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
+DenseMap<int, GCNRPTracker::LiveRegSet>
GCNScheduleDAGMILive::getBBLiveInMap() const {
assert(!Regions.empty());
- std::vector<MachineInstr *> BBStarters;
- BBStarters.reserve(Regions.size());
- auto I = Regions.rbegin(), E = Regions.rend();
- auto *BB = I->first->getParent();
- do {
- auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
- BBStarters.push_back(MI);
- do {
- ++I;
- } while (I != E && I->first->getParent() == BB);
- } while (I != E);
+ DenseMap<MachineInstr *, int> BBStarters;
+ for (int I = Regions.size() - 1; I >= 0; I--) {
+ auto Rgn = Regions[I];
+ auto *MI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
+ BBStarters.insert({MI, I});
+ }
return getLiveRegMap(BBStarters, false /*After*/, *LIS);
}
+DenseMap<int, GCNRPTracker::LiveRegSet>
+GCNScheduleDAGMILive::getBBLiveOutMap() const {
+ assert(!Regions.empty());
+ DenseMap<MachineInstr *, int> BBEnders;
+ for (int I = Regions.size() - 1; I >= 0; I--) {
+ auto Rgn = Regions[I];
+ auto TheBB = Rgn.first->getParent();
+ if (Rgn.second != TheBB->end() && !Rgn.second->isDebugInstr()) {
+ BBEnders.insert({&*Rgn.second, I});
+ continue;
+ }
+ if (Rgn.second == TheBB->end()) {
+ auto *MI = &*prev_nodbg(Rgn.second, Rgn.first);
+ BBEnders.insert({&*MI, I});
+ continue;
+ }
+
+ auto *MI = &*skipDebugInstructionsBackward(Rgn.second, Rgn.first);
+ BBEnders.insert({MI, I});
+ }
+
+ return getLiveRegMap(BBEnders, true /*After*/, *LIS);
+}
+
void GCNScheduleDAGMILive::finalizeSchedule() {
// Start actual scheduling here. This function is called by the base
// MachineScheduler after all regions have been recorded by
@@ -639,9 +704,14 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
void GCNScheduleDAGMILive::runSchedStages() {
LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
- if (!Regions.empty())
+ if (!Regions.empty()) {
BBLiveInMap = getBBLiveInMap();
+ if (GCNTrackers) {
+ BBLiveOutMap = getBBLiveOutMap();
+ }
+ }
+
GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
while (S.advanceStage()) {
auto Stage = createSchedStage(S.getCurrentStage());
@@ -658,6 +728,27 @@ void GCNScheduleDAGMILive::runSchedStages() {
continue;
}
+ if (GCNTrackers) {
+ GCNIterativeDownwardRPTracker *TheTracker = S.getTracker();
+ GCNIterativeUpwardRPTracker *TheUpwardTracker = S.getUpwardTracker();
+ auto LiveInEntry = BBLiveInMap.find(Stage->getRegionIdx());
+ GCNRPTracker::LiveRegSet *LiveIns =
+ LiveInEntry != BBLiveInMap.end() ? &LiveInEntry->second : nullptr;
+ auto LiveOutEntry = BBLiveOutMap.find(Stage->getRegionIdx());
+ GCNRPTracker::LiveRegSet *LiveOuts = LiveOutEntry != BBLiveOutMap.end()
+ ? &LiveOutEntry->second
+ : nullptr;
+ TheTracker->reset(
+ &Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
+ LiveIns);
+ TheUpwardTracker->reset(
+ &Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
+ LiveOuts);
+
+ S.setTracker(*TheTracker);
+ S.setUpwardTracker(*TheUpwardTracker);
+ }
+
ScheduleDAGMILive::schedule();
Stage->finalizeGCNRegion();
}
@@ -1479,9 +1570,6 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
MachineInstr *MI = Entry.first;
MachineInstr *OldMI = Entry.second;
- // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
- DAG.BBLiveInMap.erase(OldMI);
-
// Remove OldMI and update LIS
Register Reg = MI->getOperand(0).getReg();
LIS->RemoveMachineInstrFromMaps(*OldMI);
@@ -1493,9 +1581,14 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
// Update live-ins, register pressure, and regions caches.
for (auto Idx : ImpactedRegions) {
DAG.LiveIns[Idx] = NewLiveIns[Idx];
+ DAG.BBLiveInMap[Idx] = NewLiveIns[Idx];
DAG.Pressure[Idx] = NewPressure[Idx];
DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent());
}
+
+ if (GCNTrackers)
+ DAG.BBLiveOutMap = DAG.getBBLiveOutMap();
+
DAG.Regions = NewRegions;
DAG.RescheduleRegions = NewRescheduleRegions;
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 2084aae4128ff3..e463bd76c1124e 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -70,6 +70,12 @@ class GCNSchedStrategy : public GenericScheduler {
// Pointer to the current SchedStageID.
SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr;
+ // GCN RP Tracker for top-down scheduling
+ GCNIterativeDownwardRPTracker TheTracker;
+
+ // GCN RP Trakcer for botttom-up scheduling
+ GCNIterativeUpwardRPTracker TheUpwardTracker;
+
public:
// schedule() have seen register pressure over the critical limits and had to
// track register pressure for actual scheduling heuristics.
@@ -102,6 +108,8 @@ class GCNSchedStrategy : public GenericScheduler {
SUnit *pickNode(bool &IsTopNode) override;
+ void schedNode(SUnit *SU, bool IsTopNode) override;
+
void initialize(ScheduleDAGMI *DAG) override...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/88797
More information about the llvm-commits
mailing list