[llvm] [AMDGPU] Optionally Use GCNRPTrackers during scheduling (PR #93090)

Jeffrey Byrnes via llvm-commits llvm-commits at lists.llvm.org
Mon May 27 10:58:47 PDT 2024


https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/93090

>From 24f2d093aa0f1bdf2f42d874223275e73d98eebd Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 21 May 2024 12:55:07 -0700
Subject: [PATCH 1/9] [AMDGPU] NFC: Add BBLiveOutMap & LiveOut Cache

Change-Id: I63cfd44e635cc4bee0e6780ca43b692c46e940b7
---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 56 ++++++++++++++++++++-
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.h   |  7 +++
 2 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 94d93390d0916..a4d05f62a7f74 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -58,6 +58,11 @@ static cl::opt<bool>
                         "Wave Limited (amdgpu-limit-wave-threshold)."),
                cl::init(false));
 
+static cl::opt<bool> GCNTrackers(
+    "amdgpu-use-amdgpu-trackers", cl::Hidden,
+    cl::desc("Use the AMDGPU specific RPTrackers during scheduling"),
+    cl::init(false));
+
 const unsigned ScheduleMetrics::ScaleFactor = 100;
 
 GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
@@ -526,6 +531,19 @@ GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
   return RPTracker.moveMaxPressure();
 }
 
+static MachineInstr *getLastMIForRegion(MachineBasicBlock::iterator RegionBegin,
+                                        MachineBasicBlock::iterator RegionEnd) {
+  MachineInstr *LastMI;
+  auto *BB = RegionBegin->getParent();
+  if (RegionEnd != BB->end() && !RegionEnd->isDebugInstr())
+    LastMI = &*RegionEnd;
+  else if (RegionEnd == BB->end())
+    LastMI = &*prev_nodbg(RegionEnd, RegionBegin);
+  else
+    LastMI = &*skipDebugInstructionsBackward(RegionEnd, RegionBegin);
+  return LastMI;
+}
+
 void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
                                                 const MachineBasicBlock *MBB) {
   GCNDownwardRPTracker RPTracker(*LIS);
@@ -597,6 +615,16 @@ void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
     RPTracker.advanceBeforeNext();
     MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
   }
+
+  if (GCNTrackers) {
+    assert(LiveOuts.size() == Regions.size());
+    for (unsigned RegionIdx = 0; RegionIdx < Regions.size(); RegionIdx++) {
+      auto RegionBegin = Regions[RegionIdx].first;
+      auto RegionEnd = Regions[RegionIdx].second;
+      MachineInstr *LastMI = getLastMIForRegion(RegionBegin, RegionEnd);
+      LiveOuts[RegionIdx] = BBLiveOutMap.lookup(LastMI);
+    }
+  }
 }
 
 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
@@ -616,11 +644,24 @@ GCNScheduleDAGMILive::getBBLiveInMap() const {
   return getLiveRegMap(BBStarters, false /*After*/, *LIS);
 }
 
+DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
+GCNScheduleDAGMILive::getBBLiveOutMap() const {
+  assert(!Regions.empty());
+  std::vector<MachineInstr *> BBEnders;
+  BBEnders.reserve(Regions.size());
+  auto I = Regions.rbegin(), E = Regions.rend();
+  for (; I != E; I++)
+    BBEnders.push_back(getLastMIForRegion(I->first, I->second));
+
+  return getLiveRegMap(BBEnders, true /*After*/, *LIS);
+}
+
 void GCNScheduleDAGMILive::finalizeSchedule() {
   // Start actual scheduling here. This function is called by the base
   // MachineScheduler after all regions have been recorded by
   // GCNScheduleDAGMILive::schedule().
   LiveIns.resize(Regions.size());
+  LiveOuts.resize(Regions.size());
   Pressure.resize(Regions.size());
   RescheduleRegions.resize(Regions.size());
   RegionsWithHighRP.resize(Regions.size());
@@ -639,8 +680,12 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
 void GCNScheduleDAGMILive::runSchedStages() {
   LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
 
-  if (!Regions.empty())
+  if (!Regions.empty()) {
     BBLiveInMap = getBBLiveInMap();
+    if (GCNTrackers) {
+      BBLiveOutMap = getBBLiveOutMap();
+    }
+  }
 
   GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
   while (S.advanceStage()) {
@@ -1499,6 +1544,15 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
   DAG.Regions = NewRegions;
   DAG.RescheduleRegions = NewRescheduleRegions;
 
+  if (GCNTrackers) {
+    DAG.BBLiveOutMap = DAG.getBBLiveOutMap();
+    auto I = DAG.Regions.begin(), E = DAG.Regions.end();
+    for (; I != E; I++) {
+      MachineInstr *LastMI = getLastMIForRegion(I->first, I->second);
+      DAG.LiveOuts.push_back(DAG.BBLiveOutMap.lookup(LastMI));
+    }
+  }
+
   SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
   MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
 
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 2084aae4128ff..243bb7f0c094d 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -205,6 +205,9 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
   // Region live-in cache.
   SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns;
 
+  // Region live-out cache.
+  SmallVector<GCNRPTracker::LiveRegSet, 32> LiveOuts;
+
   // Region pressure cache.
   SmallVector<GCNRegPressure, 32> Pressure;
 
@@ -215,6 +218,10 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
 
   DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveInMap() const;
 
+  DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveOutMap;
+
+  DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveOutMap() const;
+
   // Return current region pressure.
   GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
 

>From 8deb94f63a6f1fae8ebbb52b9f91a92e8b4ce192 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 23 May 2024 11:03:27 -0700
Subject: [PATCH 2/9] Review Comments

Change-Id: Iaeaa9bc5b037d78ab965c3bc1778d424e37eb546
---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index a4d05f62a7f74..6f792ce24350a 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -533,15 +533,10 @@ GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
 
 static MachineInstr *getLastMIForRegion(MachineBasicBlock::iterator RegionBegin,
                                         MachineBasicBlock::iterator RegionEnd) {
-  MachineInstr *LastMI;
-  auto *BB = RegionBegin->getParent();
-  if (RegionEnd != BB->end() && !RegionEnd->isDebugInstr())
-    LastMI = &*RegionEnd;
-  else if (RegionEnd == BB->end())
-    LastMI = &*prev_nodbg(RegionEnd, RegionBegin);
-  else
-    LastMI = &*skipDebugInstructionsBackward(RegionEnd, RegionBegin);
-  return LastMI;
+  auto REnd = RegionEnd == RegionBegin->getParent()->end()
+                  ? std::prev(RegionEnd)
+                  : RegionEnd;
+  return &*skipDebugInstructionsBackward(REnd, RegionBegin);
 }
 
 void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
@@ -641,7 +636,7 @@ GCNScheduleDAGMILive::getBBLiveInMap() const {
       ++I;
     } while (I != E && I->first->getParent() == BB);
   } while (I != E);
-  return getLiveRegMap(BBStarters, false /*After*/, *LIS);
+  return getLiveRegMap(BBStarters, /*After=*/false, *LIS);
 }
 
 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
@@ -653,7 +648,7 @@ GCNScheduleDAGMILive::getBBLiveOutMap() const {
   for (; I != E; I++)
     BBEnders.push_back(getLastMIForRegion(I->first, I->second));
 
-  return getLiveRegMap(BBEnders, true /*After*/, *LIS);
+  return getLiveRegMap(BBEnders, /*After= */true, *LIS);
 }
 
 void GCNScheduleDAGMILive::finalizeSchedule() {

>From b2417c31bd90c8d48a00e425aa3c441952924961 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 23 May 2024 11:11:50 -0700
Subject: [PATCH 3/9] Formatting

Change-Id: I8418e9dd9571feb8cdbb32623f21ecb2ff41aa9e
---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 6f792ce24350a..215fe79cfc728 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -648,7 +648,7 @@ GCNScheduleDAGMILive::getBBLiveOutMap() const {
   for (; I != E; I++)
     BBEnders.push_back(getLastMIForRegion(I->first, I->second));
 
-  return getLiveRegMap(BBEnders, /*After= */true, *LIS);
+  return getLiveRegMap(BBEnders, /*After= */ true, *LIS);
 }
 
 void GCNScheduleDAGMILive::finalizeSchedule() {

>From 715b1dde9adc8414dd7c752bea631cee00203798 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 21 May 2024 13:34:59 -0700
Subject: [PATCH 4/9] [AMDGPU] NFC: Provide RPTracker interface for external
 iterators

Change-Id: I79b54722e6e858961486248d94766c3f3c161160
---
 llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 70 +++++++++++++++--------
 llvm/lib/Target/AMDGPU/GCNRegPressure.h   | 18 +++---
 2 files changed, 56 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 5c394e6d6296d..f1c4c8b397ddc 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -343,24 +343,25 @@ void GCNRPTracker::reset(const MachineInstr &MI,
   MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs);
 }
 
-////////////////////////////////////////////////////////////////////////////////
-// GCNUpwardRPTracker
-
-void GCNUpwardRPTracker::reset(const MachineRegisterInfo &MRI_,
-                               const LiveRegSet &LiveRegs_) {
+void GCNRPTracker::reset(const MachineRegisterInfo &MRI_,
+                         const LiveRegSet &LiveRegs_) {
   MRI = &MRI_;
   LiveRegs = LiveRegs_;
   LastTrackedMI = nullptr;
   MaxPressure = CurPressure = getRegPressure(MRI_, LiveRegs_);
 }
 
-void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
+////////////////////////////////////////////////////////////////////////////////
+// GCNUpwardRPTracker
+
+bool GCNUpwardRPTracker::recede(const MachineInstr &MI, bool ShouldTrackIt) {
   assert(MRI && "call reset first");
 
-  LastTrackedMI = &MI;
+  if (ShouldTrackIt)
+    LastTrackedMI = &MI;
 
   if (MI.isDebugInstr())
-    return;
+    return false;
 
   // Kill all defs.
   GCNRegPressure DefPressure, ECDefPressure;
@@ -412,6 +413,7 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
                           : max(CurPressure, MaxPressure);
 
   assert(CurPressure == getRegPressure(*MRI, LiveRegs));
+  return false;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -430,28 +432,44 @@ bool GCNDownwardRPTracker::reset(const MachineInstr &MI,
   return true;
 }
 
-bool GCNDownwardRPTracker::advanceBeforeNext() {
+bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
+                                             bool ShouldTrackIt,
+                                             LiveIntervals *TheLIS) {
   assert(MRI && "call reset first");
-  if (!LastTrackedMI)
-    return NextMI == MBBEnd;
+  SlotIndex SI;
+  LiveIntervals *CurrLIS;
+  MachineInstr *CurrMI;
+  if (ShouldTrackIt) {
+    if (!LastTrackedMI)
+      return NextMI == MBBEnd;
+
+    assert(NextMI == MBBEnd || !NextMI->isDebugInstr());
+    CurrLIS = const_cast<LiveIntervals *>(&LIS);
+    CurrMI = const_cast<MachineInstr *>(LastTrackedMI);
+
+    SI = NextMI == MBBEnd
+             ? CurrLIS->getInstructionIndex(*LastTrackedMI).getDeadSlot()
+             : CurrLIS->getInstructionIndex(*NextMI).getBaseIndex();
+  }
 
-  assert(NextMI == MBBEnd || !NextMI->isDebugInstr());
+  else { //! ShouldTrackIt
+    CurrLIS = TheLIS;
+    SI = CurrLIS->getInstructionIndex(*MI).getBaseIndex();
+    CurrMI = MI;
+  }
 
-  SlotIndex SI = NextMI == MBBEnd
-                     ? LIS.getInstructionIndex(*LastTrackedMI).getDeadSlot()
-                     : LIS.getInstructionIndex(*NextMI).getBaseIndex();
   assert(SI.isValid());
 
   // Remove dead registers or mask bits.
   SmallSet<Register, 8> SeenRegs;
-  for (auto &MO : LastTrackedMI->operands()) {
+  for (auto &MO : CurrMI->operands()) {
     if (!MO.isReg() || !MO.getReg().isVirtual())
       continue;
     if (MO.isUse() && !MO.readsReg())
       continue;
     if (!SeenRegs.insert(MO.getReg()).second)
       continue;
-    const LiveInterval &LI = LIS.getInterval(MO.getReg());
+    const LiveInterval &LI = CurrLIS->getInterval(MO.getReg());
     if (LI.hasSubRanges()) {
       auto It = LiveRegs.end();
       for (const auto &S : LI.subranges()) {
@@ -481,15 +499,18 @@ bool GCNDownwardRPTracker::advanceBeforeNext() {
 
   LastTrackedMI = nullptr;
 
-  return NextMI == MBBEnd;
+  return ShouldTrackIt && (NextMI == MBBEnd);
 }
 
-void GCNDownwardRPTracker::advanceToNext() {
+void GCNDownwardRPTracker::advanceToNext(MachineInstr *MI, bool ShouldTrackIt) {
   LastTrackedMI = &*NextMI++;
   NextMI = skipDebugInstructionsForward(NextMI, MBBEnd);
 
+  MachineInstr *CurrMI =
+      ShouldTrackIt ? const_cast<MachineInstr *>(LastTrackedMI) : MI;
+
   // Add new registers or mask bits.
-  for (const auto &MO : LastTrackedMI->all_defs()) {
+  for (const auto &MO : CurrMI->all_defs()) {
     Register Reg = MO.getReg();
     if (!Reg.isVirtual())
       continue;
@@ -502,11 +523,12 @@ void GCNDownwardRPTracker::advanceToNext() {
   MaxPressure = max(MaxPressure, CurPressure);
 }
 
-bool GCNDownwardRPTracker::advance() {
-  if (NextMI == MBBEnd)
+bool GCNDownwardRPTracker::advance(MachineInstr *MI, bool ShouldTrackIt,
+                                   LiveIntervals *TheLIS) {
+  if (ShouldTrackIt && NextMI == MBBEnd)
     return false;
-  advanceBeforeNext();
-  advanceToNext();
+  advanceBeforeNext(MI, ShouldTrackIt, TheLIS);
+  advanceToNext(MI, ShouldTrackIt);
   return true;
 }
 
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 752f53752fa68..8abbce138cf16 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -160,6 +160,9 @@ class GCNRPTracker {
              bool After);
 
 public:
+  // reset tracker and set live register set to the specified value.
+  void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_);
+
   // live regs for the current state
   const decltype(LiveRegs) &getLiveRegs() const { return LiveRegs; }
   const MachineInstr *getLastTrackedMI() const { return LastTrackedMI; }
@@ -180,12 +183,9 @@ class GCNUpwardRPTracker : public GCNRPTracker {
 public:
   GCNUpwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {}
 
-  // reset tracker and set live register set to the specified value.
-  void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_);
-
   // reset tracker at the specified slot index.
   void reset(const MachineRegisterInfo &MRI, SlotIndex SI) {
-    reset(MRI, llvm::getLiveRegs(SI, LIS, MRI));
+    GCNRPTracker::reset(MRI, llvm::getLiveRegs(SI, LIS, MRI));
   }
 
   // reset tracker to the end of the MBB.
@@ -200,7 +200,7 @@ class GCNUpwardRPTracker : public GCNRPTracker {
   }
 
   // move to the state just before the MI (in program order).
-  void recede(const MachineInstr &MI);
+  bool recede(const MachineInstr &MI, bool ShouldTrackIt = true);
 
   // checks whether the tracker's state after receding MI corresponds
   // to reported by LIS.
@@ -242,13 +242,15 @@ class GCNDownwardRPTracker : public GCNRPTracker {
 
   // Move to the state right before the next MI or after the end of MBB.
   // Returns false if reached end of the block.
-  bool advanceBeforeNext();
+  bool advanceBeforeNext(MachineInstr *MI = nullptr, bool ShouldTrackIt = true,
+                         LiveIntervals *TheLIS = nullptr);
 
   // Move to the state at the MI, advanceBeforeNext has to be called first.
-  void advanceToNext();
+  void advanceToNext(MachineInstr *MI = nullptr, bool ShouldTrackIt = true);
 
   // Move to the state at the next MI. Returns false if reached end of block.
-  bool advance();
+  bool advance(MachineInstr *MI = nullptr, bool ShouldTrackIt = true,
+               LiveIntervals *TheLIS = nullptr);
 
   // Advance instructions until before End.
   bool advance(MachineBasicBlock::const_iterator End);

>From abcc19e2127751d47e5b9edd215eed09a4bf2e24 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 23 May 2024 11:11:38 -0700
Subject: [PATCH 5/9] Review comments

Change-Id: Ib798a9d6add7d6dd1ccd0e01bea09ac2f4eeb94f
---
 llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 15 +++---
 llvm/lib/Target/AMDGPU/GCNRegPressure.h   | 64 +++++++++++++++++------
 2 files changed, 54 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index f1c4c8b397ddc..6957bf8da6713 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -354,14 +354,14 @@ void GCNRPTracker::reset(const MachineRegisterInfo &MRI_,
 ////////////////////////////////////////////////////////////////////////////////
 // GCNUpwardRPTracker
 
-bool GCNUpwardRPTracker::recede(const MachineInstr &MI, bool ShouldTrackIt) {
+void GCNUpwardRPTracker::recede(const MachineInstr &MI, bool ShouldTrackIt) {
   assert(MRI && "call reset first");
 
   if (ShouldTrackIt)
     LastTrackedMI = &MI;
 
   if (MI.isDebugInstr())
-    return false;
+    return;
 
   // Kill all defs.
   GCNRegPressure DefPressure, ECDefPressure;
@@ -413,7 +413,6 @@ bool GCNUpwardRPTracker::recede(const MachineInstr &MI, bool ShouldTrackIt) {
                           : max(CurPressure, MaxPressure);
 
   assert(CurPressure == getRegPressure(*MRI, LiveRegs));
-  return false;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -450,9 +449,7 @@ bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
     SI = NextMI == MBBEnd
              ? CurrLIS->getInstructionIndex(*LastTrackedMI).getDeadSlot()
              : CurrLIS->getInstructionIndex(*NextMI).getBaseIndex();
-  }
-
-  else { //! ShouldTrackIt
+  } else { //! ShouldTrackIt
     CurrLIS = TheLIS;
     SI = CurrLIS->getInstructionIndex(*MI).getBaseIndex();
     CurrMI = MI;
@@ -503,8 +500,10 @@ bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
 }
 
 void GCNDownwardRPTracker::advanceToNext(MachineInstr *MI, bool ShouldTrackIt) {
-  LastTrackedMI = &*NextMI++;
-  NextMI = skipDebugInstructionsForward(NextMI, MBBEnd);
+  if (ShouldTrackIt) {
+    LastTrackedMI = &*NextMI++;
+    NextMI = skipDebugInstructionsForward(NextMI, MBBEnd);
+  }
 
   MachineInstr *CurrMI =
       ShouldTrackIt ? const_cast<MachineInstr *>(LastTrackedMI) : MI;
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 8abbce138cf16..54e320cf08707 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -143,6 +143,9 @@ inline GCNRegPressure operator-(const GCNRegPressure &P1,
   return Diff;
 }
 
+///////////////////////////////////////////////////////////////////////////////
+// GCNRPTracker
+
 class GCNRPTracker {
 public:
   using LiveRegSet = DenseMap<unsigned, LaneBitmask>;
@@ -179,31 +182,36 @@ class GCNRPTracker {
 GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
                                      const MachineRegisterInfo &MRI);
 
+////////////////////////////////////////////////////////////////////////////////
+// GCNUpwardRPTracker
+
 class GCNUpwardRPTracker : public GCNRPTracker {
 public:
   GCNUpwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {}
 
-  // reset tracker at the specified slot index.
+  /// reset tracker at the specified slot index \p SI.
   void reset(const MachineRegisterInfo &MRI, SlotIndex SI) {
     GCNRPTracker::reset(MRI, llvm::getLiveRegs(SI, LIS, MRI));
   }
 
-  // reset tracker to the end of the MBB.
+  /// reset tracker to the end of the \p MBB.
   void reset(const MachineBasicBlock &MBB) {
     reset(MBB.getParent()->getRegInfo(),
           LIS.getSlotIndexes()->getMBBEndIdx(&MBB));
   }
 
-  // reset tracker to the point just after MI (in program order).
+  /// reset tracker to the point just after \p MI (in program order).
   void reset(const MachineInstr &MI) {
     reset(MI.getMF()->getRegInfo(), LIS.getInstructionIndex(MI).getDeadSlot());
   }
 
-  // move to the state just before the MI (in program order).
-  bool recede(const MachineInstr &MI, bool ShouldTrackIt = true);
+  /// Move to the state of RP just before the \p MI . If \p ShouldTrackIt is
+  /// set, also update the internal iterators. Setting \p ShouldTrackIt to false
+  /// allows for an externally managed iterator / program order.
+  void recede(const MachineInstr &MI, bool ShouldTrackIt = true);
 
-  // checks whether the tracker's state after receding MI corresponds
-  // to reported by LIS.
+  /// \p returns whether the tracker's state after receding MI corresponds
+  /// to reported by LIS.
   bool isValid() const;
 
   const GCNRegPressure &getMaxPressure() const { return MaxPressure; }
@@ -217,6 +225,9 @@ class GCNUpwardRPTracker : public GCNRPTracker {
   }
 };
 
+////////////////////////////////////////////////////////////////////////////////
+// GCNDownwardRPTracker
+
 class GCNDownwardRPTracker : public GCNRPTracker {
   // Last position of reset or advanceBeforeNext
   MachineBasicBlock::const_iterator NextMI;
@@ -228,34 +239,53 @@ class GCNDownwardRPTracker : public GCNRPTracker {
 
   MachineBasicBlock::const_iterator getNext() const { return NextMI; }
 
-  // Return MaxPressure and clear it.
+  /// \p return MaxPressure and clear it.
   GCNRegPressure moveMaxPressure() {
     auto Res = MaxPressure;
     MaxPressure.clear();
     return Res;
   }
 
-  // Reset tracker to the point before the MI
-  // filling live regs upon this point using LIS.
-  // Returns false if block is empty except debug values.
+  /// Reset tracker to the point before the \p MI
+  /// filling \p LiveRegs upon this point using LIS.
+  /// \p returns false if block is empty except debug values.
   bool reset(const MachineInstr &MI, const LiveRegSet *LiveRegs = nullptr);
 
-  // Move to the state right before the next MI or after the end of MBB.
-  // Returns false if reached end of the block.
+  /// Move to the state right before the next MI or after the end of MBB.
+  /// \p returns false if reached end of the block.
+  /// If \p ShouldTrackIt is true, then internal iterators are used and set to
+  /// process in program order.
+  /// If \p ShouldTrackIt is false, then it is assumed that the tracker is using
+  /// an externally managed iterator, and advance* calls will not update the
+  /// state of the iterator. In such cases, the tracker will move to the state
+  /// right before the provided \p MI and use the provided \p TheLIS for RP
+  /// calculations.
   bool advanceBeforeNext(MachineInstr *MI = nullptr, bool ShouldTrackIt = true,
                          LiveIntervals *TheLIS = nullptr);
 
-  // Move to the state at the MI, advanceBeforeNext has to be called first.
+  /// Move to the state at the MI, advanceBeforeNext has to be called first.
+  /// If \p ShouldTrackIt is true, then internal iterators are used and set to
+  /// process in program order.
+  /// If \p ShouldTrackIt is false, then it is assumed that the tracker is using
+  /// an externally managed iterator, and advance* calls will not update the
+  /// state of the iterator. In such cases, the tracker will move to the state
+  /// at the provided \p MI .
   void advanceToNext(MachineInstr *MI = nullptr, bool ShouldTrackIt = true);
 
-  // Move to the state at the next MI. Returns false if reached end of block.
+  /// Move to the state at the next MI. \p returns false if reached end of
+  /// block. If \p ShouldTrackIt is true, then internal iterators are used and
+  /// set to process in program order. If \p ShouldTrackIt is false, then it is
+  /// assumed that the tracker is using an externally managed iterator, and
+  /// advance* calls will not update the state of the iterator. In such cases,
+  /// the tracker will move to the state right before the provided \p MI and use
+  /// the provided \p TheLIS for RP calculations.
   bool advance(MachineInstr *MI = nullptr, bool ShouldTrackIt = true,
                LiveIntervals *TheLIS = nullptr);
 
-  // Advance instructions until before End.
+  /// Advance instructions until before \p End.
   bool advance(MachineBasicBlock::const_iterator End);
 
-  // Reset to Begin and advance to End.
+  /// Reset to \p Begin and advance to \p End.
   bool advance(MachineBasicBlock::const_iterator Begin,
                MachineBasicBlock::const_iterator End,
                const LiveRegSet *LiveRegsCopy = nullptr);

>From b528e171be3295ef15d238b0fcf6801f70c98a47 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 27 May 2024 10:40:10 -0700
Subject: [PATCH 6/9] Fix RP calculation behavior

Change-Id: I10242186f538359ff09110dd70b23e5136655849
---
 llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 6957bf8da6713..e420ed06be6b0 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -464,6 +464,8 @@ bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
       continue;
     if (MO.isUse() && !MO.readsReg())
       continue;
+    if (!ShouldTrackIt && MO.isDef())
+      continue;
     if (!SeenRegs.insert(MO.getReg()).second)
       continue;
     const LiveInterval &LI = CurrLIS->getInterval(MO.getReg());

>From fbd0e54f749475c91a6fea60e84298cfdb043b38 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 21 May 2024 18:04:25 -0700
Subject: [PATCH 7/9] [AMDGPU] Optionally Use AMDGPU RPTrackers during
 scheduling

Change-Id: I6ae56149c1eb49ea85362267174cc6274c416330
---
 .../Target/AMDGPU/GCNIterativeScheduler.cpp   |   2 +-
 llvm/lib/Target/AMDGPU/GCNRegPressure.h       |   1 -
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp   | 101 ++++++++++++++----
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.h     |  19 +++-
 4 files changed, 98 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index aebfe154b3139..ccee5db9a3bb6 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -480,7 +480,7 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
   LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
                        "target occupancy = "
                     << TgtOcc << '\n');
-  GCNMaxOccupancySchedStrategy LStrgy(Context);
+  GCNMaxOccupancySchedStrategy LStrgy(Context, /*IsLegacyScheduler*/ true);
   unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
 
   for (int I = 0; I < NumPasses; ++I) {
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 54e320cf08707..d764d6a041dad 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -165,7 +165,6 @@ class GCNRPTracker {
 public:
   // reset tracker and set live register set to the specified value.
   void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_);
-
   // live regs for the current state
   const decltype(LiveRegs) &getLiveRegs() const { return LiveRegs; }
   const MachineInstr *getLastTrackedMI() const { return LastTrackedMI; }
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 215fe79cfc728..0b6235190856b 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -67,6 +67,7 @@ const unsigned ScheduleMetrics::ScaleFactor = 100;
 
 GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
     : GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
+      TheTracker(*C->LIS), TheUpwardTracker(*C->LIS),
       HasHighPressure(false) {}
 
 void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
@@ -133,23 +134,46 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
   if (!DAG->isTrackingPressure())
     return;
 
-  // getDownwardPressure() and getUpwardPressure() make temporary changes to
-  // the tracker, so we need to pass those function a non-const copy.
-  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
+  unsigned NewSGPRPressure, NewVGPRPressure;
+  if (!GCNTrackers) {
+    // getDownwardPressure() and getUpwardPressure() make temporary changes to
+    // the tracker, so we need to pass those function a non-const copy.
+    RegPressureTracker &TempTracker =
+        const_cast<RegPressureTracker &>(RPTracker);
+
+    Pressure.clear();
+    MaxPressure.clear();
+
+    if (AtTop)
+      TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
+    else {
+      // FIXME: I think for bottom up scheduling, the register pressure is
+      // cached and can be retrieved by DAG->getPressureDif(SU).
+      TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
+    }
+    NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
+    NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
+  }
 
-  Pressure.clear();
-  MaxPressure.clear();
+  if (GCNTrackers) {
+    if (AtTop) {
+      GCNDownwardRPTracker TempTopTracker(TheTracker);
+      auto MI = SU->getInstr();
+      TempTopTracker.advance(MI, true, DAG->getLIS());
 
-  if (AtTop)
-    TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
-  else {
-    // FIXME: I think for bottom up scheduling, the register pressure is cached
-    // and can be retrieved by DAG->getPressureDif(SU).
-    TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
-  }
+      NewSGPRPressure = TempTopTracker.getPressure().getSGPRNum();
+      NewVGPRPressure = TempTopTracker.getPressure().getVGPRNum(false);
+    }
 
-  unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
-  unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
+    else {
+      GCNUpwardRPTracker TempBotTracker(TheUpwardTracker);
+      auto MI = SU->getInstr();
+      TempBotTracker.recede(*MI, true);
+
+      NewSGPRPressure = TempBotTracker.getPressure().getSGPRNum();
+      NewVGPRPressure = TempBotTracker.getPressure().getVGPRNum(false);
+    }
+  }
 
   // If two instructions increase the pressure of different register sets
   // by the same amount, the generic scheduler will prefer to schedule the
@@ -218,8 +242,16 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
   unsigned SGPRPressure = 0;
   unsigned VGPRPressure = 0;
   if (DAG->isTrackingPressure()) {
-    SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
-    VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
+    SGPRPressure =
+        GCNTrackers
+            ? (Zone.isTop() ? TheTracker.getPressure().getSGPRNum()
+                            : TheUpwardTracker.getPressure().getSGPRNum())
+            : Pressure[AMDGPU::RegisterPressureSets::SReg_32];
+    VGPRPressure =
+        GCNTrackers
+            ? (Zone.isTop() ? TheTracker.getPressure().getVGPRNum(false)
+                            : TheUpwardTracker.getPressure().getVGPRNum(false))
+            : Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
   }
   ReadyQueue &Q = Zone.Available;
   for (SUnit *SU : Q) {
@@ -362,6 +394,16 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
   return SU;
 }
 
+void GCNSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
+  if (GCNTrackers) {
+    MachineInstr *MI = SU->getInstr();
+    IsTopNode ? TheTracker.advance(MI, true, DAG->getLIS())
+              : TheUpwardTracker.recede(*MI, true);
+  }
+
+  return GenericScheduler::schedNode(SU, IsTopNode);
+}
+
 GCNSchedStageID GCNSchedStrategy::getCurrentStage() {
   assert(CurrentStage && CurrentStage != SchedStages.end());
   return *CurrentStage;
@@ -388,12 +430,13 @@ GCNSchedStageID GCNSchedStrategy::getNextStage() const {
 }
 
 GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
-    const MachineSchedContext *C)
+    const MachineSchedContext *C, bool IsLegacyScheduler)
     : GCNSchedStrategy(C) {
   SchedStages.push_back(GCNSchedStageID::OccInitialSchedule);
   SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule);
   SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule);
   SchedStages.push_back(GCNSchedStageID::PreRARematerialize);
+  GCNTrackers = GCNTrackers & !IsLegacyScheduler;
 }
 
 GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy(const MachineSchedContext *C)
@@ -677,9 +720,8 @@ void GCNScheduleDAGMILive::runSchedStages() {
 
   if (!Regions.empty()) {
     BBLiveInMap = getBBLiveInMap();
-    if (GCNTrackers) {
+    if (GCNTrackers)
       BBLiveOutMap = getBBLiveOutMap();
-    }
   }
 
   GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
@@ -698,6 +740,21 @@ void GCNScheduleDAGMILive::runSchedStages() {
         continue;
       }
 
+      if (GCNTrackers) {
+        GCNDownwardRPTracker *TheTracker = S.getTracker();
+        GCNUpwardRPTracker *TheUpwardTracker = S.getUpwardTracker();
+        GCNRPTracker::LiveRegSet *RegionLiveIns = &LiveIns[Stage->getRegionIdx()];
+        GCNRPTracker::LiveRegSet *RegionLiveOuts = &LiveOuts[Stage->getRegionIdx()];
+
+        reinterpret_cast<GCNRPTracker *>(TheTracker)->reset(
+            Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
+            *RegionLiveIns);
+        reinterpret_cast<GCNRPTracker *>(TheUpwardTracker)->reset(
+            Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
+            *RegionLiveOuts);
+
+      }
+
       ScheduleDAGMILive::schedule();
       Stage->finalizeGCNRegion();
     }
@@ -968,6 +1025,7 @@ void GCNSchedStage::finalizeGCNRegion() {
 void GCNSchedStage::checkScheduling() {
   // Check the results of scheduling.
   PressureAfter = DAG.getRealRegPressure(RegionIdx);
+
   LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));
   LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
 
@@ -1519,9 +1577,6 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
     MachineInstr *MI = Entry.first;
     MachineInstr *OldMI = Entry.second;
 
-    // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
-    DAG.BBLiveInMap.erase(OldMI);
-
     // Remove OldMI and update LIS
     Register Reg = MI->getOperand(0).getReg();
     LIS->RemoveMachineInstrFromMaps(*OldMI);
@@ -1539,6 +1594,8 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
   DAG.Regions = NewRegions;
   DAG.RescheduleRegions = NewRescheduleRegions;
 
+  DAG.BBLiveInMap = DAG.getBBLiveInMap();
+
   if (GCNTrackers) {
     DAG.BBLiveOutMap = DAG.getBBLiveOutMap();
     auto I = DAG.Regions.begin(), E = DAG.Regions.end();
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 243bb7f0c094d..b666da267d117 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -70,6 +70,12 @@ class GCNSchedStrategy : public GenericScheduler {
   // Pointer to the current SchedStageID.
   SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr;
 
+  // GCN RP Tracker for top-down scheduling
+  mutable GCNDownwardRPTracker TheTracker;
+
+  // GCN RP Tracker for botttom-up scheduling
+  mutable GCNUpwardRPTracker TheUpwardTracker;
+
 public:
   // schedule() have seen register pressure over the critical limits and had to
   // track register pressure for actual scheduling heuristics.
@@ -102,6 +108,8 @@ class GCNSchedStrategy : public GenericScheduler {
 
   SUnit *pickNode(bool &IsTopNode) override;
 
+  void schedNode(SUnit *SU, bool IsTopNode) override;
+
   void initialize(ScheduleDAGMI *DAG) override;
 
   unsigned getTargetOccupancy() { return TargetOccupancy; }
@@ -116,13 +124,19 @@ class GCNSchedStrategy : public GenericScheduler {
   bool hasNextStage() const;
 
   GCNSchedStageID getNextStage() const;
+
+  GCNDownwardRPTracker *getTracker() { return &TheTracker; }
+
+  GCNUpwardRPTracker *getUpwardTracker() { return &TheUpwardTracker; }
+
 };
 
 /// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
 /// maximum number of waves per simd).
 class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy {
 public:
-  GCNMaxOccupancySchedStrategy(const MachineSchedContext *C);
+  GCNMaxOccupancySchedStrategy(const MachineSchedContext *C,
+                               bool IsLegacyScheduler = false);
 };
 
 /// The goal of this scheduling strategy is to maximize ILP for a single wave
@@ -317,6 +331,9 @@ class GCNSchedStage {
   bool isRegionWithExcessRP() const {
     return DAG.RegionsWithExcessRP[RegionIdx];
   }
+  
+  // The region number this stage is currently working on
+  unsigned getRegionIdx() { return RegionIdx; }
 
   // Returns true if the new schedule may result in more spilling.
   bool mayCauseSpilling(unsigned WavesAfter);

>From 3b1b96fbf1c93c6521c5bab582cfc863b5d822cf Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Wed, 22 May 2024 12:06:32 -0700
Subject: [PATCH 8/9] Formatting

Change-Id: I136a12e9a50d0c987ebaa9a6871b38ab17ffae33
---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 26 ++++++++++-----------
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.h   |  3 +--
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 0b6235190856b..2881330c0c742 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -66,9 +66,8 @@ static cl::opt<bool> GCNTrackers(
 const unsigned ScheduleMetrics::ScaleFactor = 100;
 
 GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
-    : GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
-      TheTracker(*C->LIS), TheUpwardTracker(*C->LIS),
-      HasHighPressure(false) {}
+    : GenericScheduler(C), TargetOccupancy(0), MF(nullptr), TheTracker(*C->LIS),
+      TheUpwardTracker(*C->LIS), HasHighPressure(false) {}
 
 void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
   GenericScheduler::initialize(DAG);
@@ -743,16 +742,17 @@ void GCNScheduleDAGMILive::runSchedStages() {
       if (GCNTrackers) {
         GCNDownwardRPTracker *TheTracker = S.getTracker();
         GCNUpwardRPTracker *TheUpwardTracker = S.getUpwardTracker();
-        GCNRPTracker::LiveRegSet *RegionLiveIns = &LiveIns[Stage->getRegionIdx()];
-        GCNRPTracker::LiveRegSet *RegionLiveOuts = &LiveOuts[Stage->getRegionIdx()];
-
-        reinterpret_cast<GCNRPTracker *>(TheTracker)->reset(
-            Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
-            *RegionLiveIns);
-        reinterpret_cast<GCNRPTracker *>(TheUpwardTracker)->reset(
-            Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
-            *RegionLiveOuts);
-
+        GCNRPTracker::LiveRegSet *RegionLiveIns =
+            &LiveIns[Stage->getRegionIdx()];
+        GCNRPTracker::LiveRegSet *RegionLiveOuts =
+            &LiveOuts[Stage->getRegionIdx()];
+
+        reinterpret_cast<GCNRPTracker *>(TheTracker)
+            ->reset(Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
+                    *RegionLiveIns);
+        reinterpret_cast<GCNRPTracker *>(TheUpwardTracker)
+            ->reset(Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
+                    *RegionLiveOuts);
       }
 
       ScheduleDAGMILive::schedule();
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index b666da267d117..251b8058c29ba 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -128,7 +128,6 @@ class GCNSchedStrategy : public GenericScheduler {
   GCNDownwardRPTracker *getTracker() { return &TheTracker; }
 
   GCNUpwardRPTracker *getUpwardTracker() { return &TheUpwardTracker; }
-
 };
 
 /// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
@@ -331,7 +330,7 @@ class GCNSchedStage {
   bool isRegionWithExcessRP() const {
     return DAG.RegionsWithExcessRP[RegionIdx];
   }
-  
+
   // The region number this stage is currently working on
   unsigned getRegionIdx() { return RegionIdx; }
 

>From 7b6eb7e52f0c121488ba5bb20de4977c2cedf1b3 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 27 May 2024 10:43:43 -0700
Subject: [PATCH 9/9] Actually use the iterative trackers

Change-Id: I198925f5ed91b0a49ac265e19fdbe2208139f09a
---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 2881330c0c742..b5e1c8e0f08aa 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -158,7 +158,7 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
     if (AtTop) {
       GCNDownwardRPTracker TempTopTracker(TheTracker);
       auto MI = SU->getInstr();
-      TempTopTracker.advance(MI, true, DAG->getLIS());
+      TempTopTracker.advance(MI, false, DAG->getLIS());
 
       NewSGPRPressure = TempTopTracker.getPressure().getSGPRNum();
       NewVGPRPressure = TempTopTracker.getPressure().getVGPRNum(false);
@@ -167,7 +167,7 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
     else {
       GCNUpwardRPTracker TempBotTracker(TheUpwardTracker);
       auto MI = SU->getInstr();
-      TempBotTracker.recede(*MI, true);
+      TempBotTracker.recede(*MI, false);
 
       NewSGPRPressure = TempBotTracker.getPressure().getSGPRNum();
       NewVGPRPressure = TempBotTracker.getPressure().getVGPRNum(false);
@@ -396,8 +396,8 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
 void GCNSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
   if (GCNTrackers) {
     MachineInstr *MI = SU->getInstr();
-    IsTopNode ? TheTracker.advance(MI, true, DAG->getLIS())
-              : TheUpwardTracker.recede(*MI, true);
+    IsTopNode ? (void)TheTracker.advance(MI, false, DAG->getLIS())
+              : TheUpwardTracker.recede(*MI, false);
   }
 
   return GenericScheduler::schedNode(SU, IsTopNode);



More information about the llvm-commits mailing list