[llvm] [AMDGPU] Optionally Use GCNRPTrackers during scheduling (PR #93090)

Jeffrey Byrnes via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 13 12:23:04 PDT 2024


https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/93090

>From 47ac3d5e4c30c62c63067a54d6e6ad95ead1a558 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 21 May 2024 12:55:07 -0700
Subject: [PATCH 01/14] [AMDGPU] NFC: Add BBLiveOutMap & LiveOut Cache

Change-Id: I63cfd44e635cc4bee0e6780ca43b692c46e940b7
---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 58 ++++++++++++++++++---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.h   | 42 ++++++++++++++-
 2 files changed, 91 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 217279211531b..f1f28ed30c5e7 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -58,6 +58,11 @@ static cl::opt<bool>
                         "Wave Limited (amdgpu-limit-wave-threshold)."),
                cl::init(false));
 
+static cl::opt<bool> GCNTrackers(
+    "amdgpu-use-amdgpu-trackers", cl::Hidden,
+    cl::desc("Use the AMDGPU specific RPTrackers during scheduling"),
+    cl::init(false));
+
 const unsigned ScheduleMetrics::ScaleFactor = 100;
 
 GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
@@ -571,7 +576,8 @@ GCNScheduleDAGMILive::GCNScheduleDAGMILive(
     MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)
     : ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()),
       MFI(*MF.getInfo<SIMachineFunctionInfo>()),
-      StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy) {
+      StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy),
+      RegionLiveOuts(this, /*IsLiveOut=*/true) {
 
   LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
   if (RelaxedOcc) {
@@ -613,6 +619,14 @@ GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
   return RPTracker.moveMaxPressure();
 }
 
+static MachineInstr *getLastMIForRegion(MachineBasicBlock::iterator RegionBegin,
+                                        MachineBasicBlock::iterator RegionEnd) {
+  auto REnd = RegionEnd == RegionBegin->getParent()->end()
+                  ? std::prev(RegionEnd)
+                  : RegionEnd;
+  return &*skipDebugInstructionsBackward(REnd, RegionBegin);
+}
+
 void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
                                                 const MachineBasicBlock *MBB) {
   GCNDownwardRPTracker RPTracker(*LIS);
@@ -687,20 +701,45 @@ void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
 }
 
 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
-GCNScheduleDAGMILive::getBBLiveInMap() const {
+GCNScheduleDAGMILive::getRegionLiveInMap() const {
   assert(!Regions.empty());
-  std::vector<MachineInstr *> BBStarters;
-  BBStarters.reserve(Regions.size());
+  std::vector<MachineInstr *> RegionFirstMIs;
+  RegionFirstMIs.reserve(Regions.size());
   auto I = Regions.rbegin(), E = Regions.rend();
   auto *BB = I->first->getParent();
   do {
     auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
-    BBStarters.push_back(MI);
+    RegionFirstMIs.push_back(MI);
     do {
       ++I;
     } while (I != E && I->first->getParent() == BB);
   } while (I != E);
-  return getLiveRegMap(BBStarters, false /*After*/, *LIS);
+  return getLiveRegMap(RegionFirstMIs, /*After=*/false, *LIS);
+}
+
+DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
+GCNScheduleDAGMILive::getRegionLiveOutMap() const {
+  assert(!Regions.empty());
+  std::vector<MachineInstr *> RegionLastMIs;
+  RegionLastMIs.reserve(Regions.size());
+  for (auto &[RegionBegin, RegionEnd] : reverse(Regions))
+    RegionLastMIs.push_back(getLastMIForRegion(RegionBegin, RegionEnd));
+
+  return getLiveRegMap(RegionLastMIs, /*After=*/true, *LIS);
+}
+
+void RegionPressureMap::buildLiveRegMap() {
+  IdxToInstruction.clear();
+
+  BBLiveRegMap =
+      IsLiveOut ? DAG->getRegionLiveOutMap() : DAG->getRegionLiveInMap();
+  for (unsigned I = 0; I < DAG->Regions.size(); I++) {
+    MachineInstr *RegionKey =
+        IsLiveOut
+            ? getLastMIForRegion(DAG->Regions[I].first, DAG->Regions[I].second)
+            : &*DAG->Regions[I].first;
+    IdxToInstruction[I] = RegionKey;
+  }
 }
 
 void GCNScheduleDAGMILive::finalizeSchedule() {
@@ -726,8 +765,11 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
 void GCNScheduleDAGMILive::runSchedStages() {
   LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
 
-  if (!Regions.empty())
-    BBLiveInMap = getBBLiveInMap();
+  if (!Regions.empty()) {
+    BBLiveInMap = getRegionLiveInMap();
+    if (GCNTrackers)
+      RegionLiveOuts.buildLiveRegMap();
+  }
 
   GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
   while (S.advanceStage()) {
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index f0aea2bc4ab86..c402fb1ef373c 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -163,6 +163,32 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) {
   return OS;
 }
 
+class GCNScheduleDAGMILive;
+class RegionPressureMap {
+  GCNScheduleDAGMILive *DAG;
+  // The live in/out pressure as indexed by the first or last MI in the region
+  // before scheduling.
+  DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveRegMap;
+  // The mapping of RegionIDx to key instruction
+  DenseMap<unsigned, MachineInstr *> IdxToInstruction;
+  // Whether we are calculating LiveOuts or LiveIns
+  bool IsLiveOut;
+
+public:
+  RegionPressureMap() {}
+  RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut)
+      : DAG(GCNDAG), IsLiveOut(LiveOut) {}
+  // Build the Instr->LiveReg and RegionIdx->Instr maps
+  void buildLiveRegMap();
+
+  // Retrieve the LiveReg for a given RegionIdx
+  GCNRPTracker::LiveRegSet &getLiveRegsForRegionIdx(unsigned RegionIdx) {
+    assert(IdxToInstruction.find(RegionIdx) != IdxToInstruction.end());
+    MachineInstr *Key = IdxToInstruction[RegionIdx];
+    return BBLiveRegMap[Key];
+  }
+};
+
 class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
   friend class GCNSchedStage;
   friend class OccInitialScheduleStage;
@@ -170,6 +196,7 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
   friend class ClusteredLowOccStage;
   friend class PreRARematStage;
   friend class ILPInitialScheduleStage;
+  friend class RegionPressureMap;
 
   const GCNSubtarget &ST;
 
@@ -211,9 +238,22 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
   // Temporary basic block live-in cache.
   DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns;
 
+  // The map of the initial first region instruction to region live in registers
   DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;
 
-  DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveInMap() const;
+  // Calculate the map of the initial first region instruction to region live in
+  // registers
+  DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getRegionLiveInMap() const;
+
+  // Calculate the map of the initial last region instruction to region live out
+  // registers
+  DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
+  getRegionLiveOutMap() const;
+
+  // The live out registers per region. These are internally stored as a map of
+  // the initial last region instruction to region live out registers, but can
+  // be retreived with the regionIdx by calls to getLiveRegsForRegionIdx.
+  RegionPressureMap RegionLiveOuts;
 
   // Return current region pressure.
   GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;

>From fdc457d964c88e0e41078f939d1fc6b67a62af33 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 21 May 2024 13:34:59 -0700
Subject: [PATCH 02/14] [AMDGPU] NFC: Provide RPTracker interface for external
 iterators

Change-Id: I79b54722e6e858961486248d94766c3f3c161160
---
 llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 284 ++++++++++++++++++++--
 llvm/lib/Target/AMDGPU/GCNRegPressure.h   |  95 ++++++--
 2 files changed, 330 insertions(+), 49 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index c83af729f501f..c6bffc6440136 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -288,6 +288,72 @@ collectVirtualRegUses(SmallVectorImpl<RegisterMaskPair> &RegMaskPairs,
   }
 }
 
+static LaneBitmask getRegLanes(ArrayRef<RegisterMaskPair> RegUnits,
+                               Register RegUnit) {
+  auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+    return Other.RegUnit == RegUnit;
+  });
+  if (I == RegUnits.end())
+    return LaneBitmask::getNone();
+  return I->LaneMask;
+}
+
+static LaneBitmask
+getLanesWithProperty(const LiveIntervals &LIS, const MachineRegisterInfo &MRI,
+                     bool TrackLaneMasks, Register RegUnit, SlotIndex Pos,
+                     LaneBitmask SafeDefault,
+                     bool (*Property)(const LiveRange &LR, SlotIndex Pos)) {
+  if (RegUnit.isVirtual()) {
+    const LiveInterval &LI = LIS.getInterval(RegUnit);
+    LaneBitmask Result;
+    if (TrackLaneMasks && LI.hasSubRanges()) {
+      for (const LiveInterval::SubRange &SR : LI.subranges()) {
+        if (Property(SR, Pos))
+          Result |= SR.LaneMask;
+      }
+    } else if (Property(LI, Pos)) {
+      Result = TrackLaneMasks ? MRI.getMaxLaneMaskForVReg(RegUnit)
+                              : LaneBitmask::getAll();
+    }
+
+    return Result;
+  } else {
+    const LiveRange *LR = LIS.getCachedRegUnit(RegUnit);
+    // Be prepared for missing liveranges: We usually do not compute liveranges
+    // for physical registers on targets with many registers (GPUs).
+    if (LR == nullptr)
+      return SafeDefault;
+    return Property(*LR, Pos) ? LaneBitmask::getAll() : LaneBitmask::getNone();
+  }
+}
+
+/// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx).
+/// The query starts with a lane bitmask which gets lanes/bits removed for every
+/// use we find.
+static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
+                                  SlotIndex PriorUseIdx, SlotIndex NextUseIdx,
+                                  const MachineRegisterInfo &MRI,
+                                  const LiveIntervals *LIS,
+                                  bool Upward = false) {
+  const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+  for (const MachineOperand &MO : MRI.use_nodbg_operands(Reg)) {
+    if (MO.isUndef())
+      continue;
+    const MachineInstr *MI = MO.getParent();
+    SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot();
+    bool InRange = Upward ? (InstSlot > PriorUseIdx && InstSlot <= NextUseIdx)
+                          : (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx);
+    if (InRange) {
+      unsigned SubRegIdx = MO.getSubReg();
+      LaneBitmask UseMask = TRI.getSubRegIndexLaneMask(SubRegIdx);
+      LastUseMask &= ~UseMask;
+      if (LastUseMask.none())
+        return LaneBitmask::getNone();
+    }
+  }
+  return LastUseMask;
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 // GCNRPTracker
 
@@ -343,17 +409,47 @@ void GCNRPTracker::reset(const MachineInstr &MI,
   MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs);
 }
 
-////////////////////////////////////////////////////////////////////////////////
-// GCNUpwardRPTracker
-
-void GCNUpwardRPTracker::reset(const MachineRegisterInfo &MRI_,
-                               const LiveRegSet &LiveRegs_) {
+void GCNRPTracker::reset(const MachineRegisterInfo &MRI_,
+                         const LiveRegSet &LiveRegs_) {
   MRI = &MRI_;
   LiveRegs = LiveRegs_;
   LastTrackedMI = nullptr;
   MaxPressure = CurPressure = getRegPressure(MRI_, LiveRegs_);
 }
 
+void GCNRPTracker::bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs) {
+  for (const RegisterMaskPair &P : DeadDefs) {
+    Register Reg = P.RegUnit;
+    if (!Reg.isVirtual())
+      continue;
+    LaneBitmask LiveMask = LiveRegs[Reg];
+    LaneBitmask BumpedMask = LiveMask | P.LaneMask;
+    CurPressure.inc(Reg, LiveMask, BumpedMask, *MRI);
+  }
+  MaxPressure = max(MaxPressure, CurPressure);
+  for (const RegisterMaskPair &P : DeadDefs) {
+    Register Reg = P.RegUnit;
+    if (!Reg.isVirtual())
+      continue;
+    LaneBitmask LiveMask = LiveRegs[Reg];
+    LaneBitmask BumpedMask = LiveMask | P.LaneMask;
+    CurPressure.inc(Reg, BumpedMask, LiveMask, *MRI);
+  }
+}
+
+LaneBitmask GCNRPTracker::getLastUsedLanes(Register RegUnit,
+                                           SlotIndex Pos) const {
+  return getLanesWithProperty(
+      LIS, *MRI, true, RegUnit, Pos.getBaseIndex(), LaneBitmask::getNone(),
+      [](const LiveRange &LR, SlotIndex Pos) {
+        const LiveRange::Segment *S = LR.getSegmentContaining(Pos);
+        return S != nullptr && S->end == Pos.getRegSlot();
+      });
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// GCNUpwardRPTracker
+
 void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
   assert(MRI && "call reset first");
 
@@ -414,6 +510,63 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
   assert(CurPressure == getRegPressure(*MRI, LiveRegs));
 }
 
+void GCNUpwardRPTracker::bumpUpwardPressure(const MachineInstr *MI) {
+  assert(!MI->isDebugOrPseudoInstr() && "Expect a nondebug instruction.");
+
+  SlotIndex SlotIdx = LIS.getInstructionIndex(*MI).getRegSlot();
+
+  // Account for register pressure similar to RegPressureTracker::recede().
+  RegisterOperands RegOpers;
+  const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+  RegOpers.collect(*MI, *TRI, *MRI, true, /*IgnoreDead=*/true);
+  assert(RegOpers.DeadDefs.empty());
+  RegOpers.adjustLaneLiveness(LIS, *MRI, SlotIdx);
+  RegOpers.detectDeadDefs(*MI, LIS);
+
+  // Boost max pressure for all dead defs together.
+  // Since CurrSetPressure and MaxSetPressure
+  bumpDeadDefs(RegOpers.DeadDefs);
+
+  // Kill liveness at live defs.
+  for (const RegisterMaskPair &P : RegOpers.Defs) {
+    Register Reg = P.RegUnit;
+    if (!Reg.isVirtual())
+      continue;
+    LaneBitmask LiveAfter = LiveRegs[Reg];
+    LaneBitmask UseLanes = getRegLanes(RegOpers.Uses, Reg);
+    LaneBitmask DefLanes = P.LaneMask;
+    LaneBitmask LiveBefore = (LiveAfter & ~DefLanes) | UseLanes;
+
+    // There may be parts of the register that were dead before the
+    // instruction, but became live afterwards. Similarly, some parts
+    // may have been killed in this instruction.
+    CurPressure.inc(Reg, LiveAfter, LiveAfter & LiveBefore, *MRI);
+    CurPressure.inc(Reg, LiveAfter, ~LiveAfter & LiveBefore, *MRI);
+    MaxPressure = max(MaxPressure, CurPressure);
+  }
+  // Generate liveness for uses.
+  for (const RegisterMaskPair &P : RegOpers.Uses) {
+    Register Reg = P.RegUnit;
+    if (!Reg.isVirtual())
+      continue;
+    // If this register was also in a def operand, we've handled it
+    // with defs.
+    if (getRegLanes(RegOpers.Defs, Reg).any())
+      continue;
+    LaneBitmask LiveAfter = LiveRegs[Reg];
+    SlotIndex CurrIdx =
+        LastTrackedMI ? LIS.getInstructionIndex(*LastTrackedMI).getRegSlot()
+                      : LIS.getMBBEndIdx(MI->getParent());
+    ;
+    LaneBitmask LastUseMask =
+        findUseBetween(Reg, P.LaneMask, SlotIdx, CurrIdx, *MRI, &LIS, true);
+    LastUseMask &= ~LiveAfter;
+    LaneBitmask LiveBefore = (LiveAfter | LastUseMask);
+    CurPressure.inc(Reg, LiveAfter, LiveBefore, *MRI);
+  }
+  MaxPressure = max(MaxPressure, CurPressure);
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // GCNDownwardRPTracker
 
@@ -430,28 +583,44 @@ bool GCNDownwardRPTracker::reset(const MachineInstr &MI,
   return true;
 }
 
-bool GCNDownwardRPTracker::advanceBeforeNext() {
+bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
+                                             bool UseInternalIterator,
+                                             LiveIntervals *TheLIS) {
   assert(MRI && "call reset first");
-  if (!LastTrackedMI)
-    return NextMI == MBBEnd;
-
-  assert(NextMI == MBBEnd || !NextMI->isDebugInstr());
+  SlotIndex SI;
+  LiveIntervals *CurrLIS;
+  MachineInstr *CurrMI;
+  if (UseInternalIterator) {
+    if (!LastTrackedMI)
+      return NextMI == MBBEnd;
+
+    assert(NextMI == MBBEnd || !NextMI->isDebugInstr());
+    CurrLIS = const_cast<LiveIntervals *>(&LIS);
+    CurrMI = const_cast<MachineInstr *>(LastTrackedMI);
+
+    SI = NextMI == MBBEnd
+             ? CurrLIS->getInstructionIndex(*LastTrackedMI).getDeadSlot()
+             : CurrLIS->getInstructionIndex(*NextMI).getBaseIndex();
+  } else { //! UseInternalIterator
+    CurrLIS = TheLIS;
+    SI = CurrLIS->getInstructionIndex(*MI).getBaseIndex();
+    CurrMI = MI;
+  }
 
-  SlotIndex SI = NextMI == MBBEnd
-                     ? LIS.getInstructionIndex(*LastTrackedMI).getDeadSlot()
-                     : LIS.getInstructionIndex(*NextMI).getBaseIndex();
   assert(SI.isValid());
 
   // Remove dead registers or mask bits.
   SmallSet<Register, 8> SeenRegs;
-  for (auto &MO : LastTrackedMI->operands()) {
+  for (auto &MO : CurrMI->operands()) {
     if (!MO.isReg() || !MO.getReg().isVirtual())
       continue;
     if (MO.isUse() && !MO.readsReg())
       continue;
+    if (!UseInternalIterator && MO.isDef())
+      continue;
     if (!SeenRegs.insert(MO.getReg()).second)
       continue;
-    const LiveInterval &LI = LIS.getInterval(MO.getReg());
+    const LiveInterval &LI = CurrLIS->getInterval(MO.getReg());
     if (LI.hasSubRanges()) {
       auto It = LiveRegs.end();
       for (const auto &S : LI.subranges()) {
@@ -481,15 +650,22 @@ bool GCNDownwardRPTracker::advanceBeforeNext() {
 
   LastTrackedMI = nullptr;
 
-  return NextMI == MBBEnd;
+  return UseInternalIterator && (NextMI == MBBEnd);
 }
 
-void GCNDownwardRPTracker::advanceToNext() {
-  LastTrackedMI = &*NextMI++;
-  NextMI = skipDebugInstructionsForward(NextMI, MBBEnd);
+void GCNDownwardRPTracker::advanceToNext(MachineInstr *MI,
+                                         bool UseInternalIterator) {
+  if (UseInternalIterator) {
+    LastTrackedMI = &*NextMI++;
+    NextMI = skipDebugInstructionsForward(NextMI, MBBEnd);
+  } else {
+    LastTrackedMI = MI;
+  }
+
+  MachineInstr *CurrMI = const_cast<MachineInstr *>(LastTrackedMI);
 
   // Add new registers or mask bits.
-  for (const auto &MO : LastTrackedMI->all_defs()) {
+  for (const auto &MO : CurrMI->all_defs()) {
     Register Reg = MO.getReg();
     if (!Reg.isVirtual())
       continue;
@@ -502,11 +678,12 @@ void GCNDownwardRPTracker::advanceToNext() {
   MaxPressure = max(MaxPressure, CurPressure);
 }
 
-bool GCNDownwardRPTracker::advance() {
-  if (NextMI == MBBEnd)
+bool GCNDownwardRPTracker::advance(MachineInstr *MI, bool UseInternalIterator,
+                                   LiveIntervals *TheLIS) {
+  if (UseInternalIterator && NextMI == MBBEnd)
     return false;
-  advanceBeforeNext();
-  advanceToNext();
+  advanceBeforeNext(MI, UseInternalIterator, TheLIS);
+  advanceToNext(MI, UseInternalIterator);
   return true;
 }
 
@@ -548,6 +725,65 @@ Printable llvm::reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
   });
 }
 
+void GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI) {
+  assert(!MI->isDebugOrPseudoInstr() && "Expect a nondebug instruction.");
+
+  SlotIndex SlotIdx;
+  SlotIdx = LIS.getInstructionIndex(*MI).getRegSlot();
+
+  // Account for register pressure similar to RegPressureTracker::recede().
+  RegisterOperands RegOpers;
+  const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+  RegOpers.collect(*MI, *TRI, *MRI, true, /*IgnoreDead=*/false);
+  RegOpers.adjustLaneLiveness(LIS, *MRI, SlotIdx);
+
+  for (const RegisterMaskPair &Use : RegOpers.Uses) {
+    Register Reg = Use.RegUnit;
+    if (!Reg.isVirtual())
+      continue;
+    LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx);
+    if (LastUseMask.none())
+      continue;
+    // The LastUseMask is queried from the liveness information of instruction
+    // which may be further down the schedule. Some lanes may actually not be
+    // last uses for the current position.
+    // FIXME: allow the caller to pass in the list of vreg uses that remain
+    // to be bottom-scheduled to avoid searching uses at each query.
+    SlotIndex CurrIdx;
+    const MachineBasicBlock *MBB = MI->getParent();
+    MachineBasicBlock::const_iterator IdxPos = skipDebugInstructionsForward(
+        LastTrackedMI ? LastTrackedMI : MBB->begin(), MBB->end());
+    if (IdxPos == MBB->end()) {
+      CurrIdx = LIS.getMBBEndIdx(MBB);
+    } else {
+      CurrIdx = LIS.getInstructionIndex(*IdxPos).getRegSlot();
+    }
+
+    LastUseMask =
+        findUseBetween(Reg, LastUseMask, CurrIdx, SlotIdx, *MRI, &LIS);
+    if (LastUseMask.none())
+      continue;
+
+    LaneBitmask LiveMask = LiveRegs[Reg];
+    LaneBitmask NewMask = LiveMask & ~LastUseMask;
+    CurPressure.inc(Reg, LiveMask, NewMask, *MRI);
+  }
+
+  // Generate liveness for defs.
+  for (const RegisterMaskPair &Def : RegOpers.Defs) {
+    Register Reg = Def.RegUnit;
+    if (!Reg.isVirtual())
+      continue;
+    LaneBitmask LiveMask = LiveRegs[Reg];
+    LaneBitmask NewMask = LiveMask | Def.LaneMask;
+    CurPressure.inc(Reg, LiveMask, NewMask, *MRI);
+  }
+  MaxPressure = max(MaxPressure, CurPressure);
+
+  // Boost pressure for all dead defs together.
+  bumpDeadDefs(RegOpers.DeadDefs);
+}
+
 bool GCNUpwardRPTracker::isValid() const {
   const auto &SI = LIS.getInstructionIndex(*LastTrackedMI).getBaseIndex();
   const auto LISLR = llvm::getLiveRegs(SI, LIS, *MRI);
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 54dc1972d2761..a79e412ce3344 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -19,6 +19,7 @@
 
 #include "GCNSubtarget.h"
 #include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/RegisterPressure.h"
 #include <algorithm>
 
 namespace llvm {
@@ -149,6 +150,9 @@ inline GCNRegPressure operator-(const GCNRegPressure &P1,
   return Diff;
 }
 
+///////////////////////////////////////////////////////////////////////////////
+// GCNRPTracker
+
 class GCNRPTracker {
 public:
   using LiveRegSet = DenseMap<unsigned, LaneBitmask>;
@@ -165,7 +169,14 @@ class GCNRPTracker {
   void reset(const MachineInstr &MI, const LiveRegSet *LiveRegsCopy,
              bool After);
 
+  void bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs);
+
+  LaneBitmask getLastUsedLanes(Register RegUnit, SlotIndex Pos) const;
+
 public:
+  // reset tracker and set live register set to the specified value.
+  void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_);
+
   // live regs for the current state
   const decltype(LiveRegs) &getLiveRegs() const { return LiveRegs; }
   const MachineInstr *getLastTrackedMI() const { return LastTrackedMI; }
@@ -182,34 +193,40 @@ class GCNRPTracker {
 GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
                                      const MachineRegisterInfo &MRI);
 
+////////////////////////////////////////////////////////////////////////////////
+// GCNUpwardRPTracker
+
 class GCNUpwardRPTracker : public GCNRPTracker {
 public:
   GCNUpwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {}
 
-  // reset tracker and set live register set to the specified value.
-  void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_);
+  using GCNRPTracker::reset;
 
-  // reset tracker at the specified slot index.
+  /// reset tracker at the specified slot index \p SI.
   void reset(const MachineRegisterInfo &MRI, SlotIndex SI) {
-    reset(MRI, llvm::getLiveRegs(SI, LIS, MRI));
+    GCNRPTracker::reset(MRI, llvm::getLiveRegs(SI, LIS, MRI));
   }
 
-  // reset tracker to the end of the MBB.
+  /// reset tracker to the end of the \p MBB.
   void reset(const MachineBasicBlock &MBB) {
     reset(MBB.getParent()->getRegInfo(),
           LIS.getSlotIndexes()->getMBBEndIdx(&MBB));
   }
 
-  // reset tracker to the point just after MI (in program order).
+  /// reset tracker to the point just after \p MI (in program order).
   void reset(const MachineInstr &MI) {
     reset(MI.getMF()->getRegInfo(), LIS.getInstructionIndex(MI).getDeadSlot());
   }
 
-  // move to the state just before the MI (in program order).
+  /// Move to the state of RP just before the \p MI . If \p UseInternalIterator
+  /// is set, also update the internal iterators. Setting \p UseInternalIterator
+  /// to false allows for an externally managed iterator / program order.
   void recede(const MachineInstr &MI);
 
-  // checks whether the tracker's state after receding MI corresponds
-  // to reported by LIS.
+  void bumpUpwardPressure(const MachineInstr *MI);
+
+  /// \p returns whether the tracker's state after receding MI corresponds
+  /// to reported by LIS.
   bool isValid() const;
 
   const GCNRegPressure &getMaxPressure() const { return MaxPressure; }
@@ -223,6 +240,9 @@ class GCNUpwardRPTracker : public GCNRPTracker {
   }
 };
 
+////////////////////////////////////////////////////////////////////////////////
+// GCNDownwardRPTracker
+
 class GCNDownwardRPTracker : public GCNRPTracker {
   // Last position of reset or advanceBeforeNext
   MachineBasicBlock::const_iterator NextMI;
@@ -232,37 +252,62 @@ class GCNDownwardRPTracker : public GCNRPTracker {
 public:
   GCNDownwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {}
 
+  using GCNRPTracker::reset;
+
   MachineBasicBlock::const_iterator getNext() const { return NextMI; }
 
-  // Return MaxPressure and clear it.
+  /// \p return MaxPressure and clear it.
   GCNRegPressure moveMaxPressure() {
     auto Res = MaxPressure;
     MaxPressure.clear();
     return Res;
   }
 
-  // Reset tracker to the point before the MI
-  // filling live regs upon this point using LIS.
-  // Returns false if block is empty except debug values.
+  /// Reset tracker to the point before the \p MI
+  /// filling \p LiveRegs upon this point using LIS.
+  /// \p returns false if block is empty except debug values.
   bool reset(const MachineInstr &MI, const LiveRegSet *LiveRegs = nullptr);
 
-  // Move to the state right before the next MI or after the end of MBB.
-  // Returns false if reached end of the block.
-  bool advanceBeforeNext();
-
-  // Move to the state at the MI, advanceBeforeNext has to be called first.
-  void advanceToNext();
-
-  // Move to the state at the next MI. Returns false if reached end of block.
-  bool advance();
-
-  // Advance instructions until before End.
+  /// Move to the state right before the next MI or after the end of MBB.
+  /// \p returns false if reached end of the block.
+  /// If \p UseInternalIterator is true, then internal iterators are used and
+  /// set to process in program order. If \p UseInternalIterator is false, then
+  /// it is assumed that the tracker is using an externally managed iterator,
+  /// and advance* calls will not update the state of the iterator. In such
+  /// cases, the tracker will move to the state right before the provided \p MI
+  /// and use the provided \p TheLIS for RP calculations.
+  bool advanceBeforeNext(MachineInstr *MI = nullptr,
+                         bool UseInternalIterator = true,
+                         LiveIntervals *TheLIS = nullptr);
+
+  /// Move to the state at the MI, advanceBeforeNext has to be called first.
+  /// If \p UseInternalIterator is true, then internal iterators are used and
+  /// set to process in program order. If \p UseInternalIterator is false, then
+  /// it is assumed that the tracker is using an externally managed iterator,
+  /// and advance* calls will not update the state of the iterator. In such
+  /// cases, the tracker will move to the state at the provided \p MI .
+  void advanceToNext(MachineInstr *MI = nullptr,
+                     bool UseInternalIterator = true);
+
+  /// Move to the state at the next MI. \p returns false if reached end of
+  /// block. If \p UseInternalIterator is true, then internal iterators are used
+  /// and set to process in program order. If \p UseInternalIterator is false,
+  /// then it is assumed that the tracker is using an externally managed
+  /// iterator, and advance* calls will not update the state of the iterator. In
+  /// such cases, the tracker will move to the state right before the provided
+  /// \p MI and use the provided \p TheLIS for RP calculations.
+  bool advance(MachineInstr *MI = nullptr, bool UseInternalIterator = true,
+               LiveIntervals *TheLIS = nullptr);
+
+  /// Advance instructions until before \p End.
   bool advance(MachineBasicBlock::const_iterator End);
 
-  // Reset to Begin and advance to End.
+  /// Reset to \p Begin and advance to \p End.
   bool advance(MachineBasicBlock::const_iterator Begin,
                MachineBasicBlock::const_iterator End,
                const LiveRegSet *LiveRegsCopy = nullptr);
+
+  void bumpDownwardPressure(const MachineInstr *MI);
 };
 
 LaneBitmask getLiveLaneMask(unsigned Reg,

>From eb3bf2f6cfc801df54288efad7f187afa6894a35 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 21 May 2024 18:04:25 -0700
Subject: [PATCH 03/14] [AMDGPU] Optionally Use AMDGPU RPTrackers during
 scheduling

Change-Id: I6ae56149c1eb49ea85362267174cc6274c416330
---
 .../Target/AMDGPU/GCNIterativeScheduler.cpp   |  2 +-
 llvm/lib/Target/AMDGPU/GCNRegPressure.h       |  1 -
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp   | 90 ++++++++++++++++---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.h     | 19 +++-
 4 files changed, 96 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index 061b0515031b1..79656f5b2b9f4 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -480,7 +480,7 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
   LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
                        "target occupancy = "
                     << TgtOcc << '\n');
-  GCNMaxOccupancySchedStrategy LStrgy(Context);
+  GCNMaxOccupancySchedStrategy LStrgy(Context, /*IsLegacyScheduler*/ true);
   unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
 
   for (int I = 0; I < NumPasses; ++I) {
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index a79e412ce3344..f78e4d7da0a1d 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -176,7 +176,6 @@ class GCNRPTracker {
 public:
   // reset tracker and set live register set to the specified value.
   void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_);
-
   // live regs for the current state
   const decltype(LiveRegs) &getLiveRegs() const { return LiveRegs; }
   const MachineInstr *getLastTrackedMI() const { return LastTrackedMI; }
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index f1f28ed30c5e7..4533b61baa72f 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -67,6 +67,7 @@ const unsigned ScheduleMetrics::ScaleFactor = 100;
 
 GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
     : GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
+      TheTracker(*C->LIS), TheUpwardTracker(*C->LIS),
       HasHighPressure(false) {}
 
 void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
@@ -156,14 +157,37 @@ static bool canUsePressureDiffs(const SUnit &SU) {
 static void getRegisterPressures(bool AtTop,
                                  const RegPressureTracker &RPTracker, SUnit *SU,
                                  std::vector<unsigned> &Pressure,
-                                 std::vector<unsigned> &MaxPressure) {
+                                 std::vector<unsigned> &MaxPressure,
+                                 GCNDownwardRPTracker &TheTracker,
+                                 GCNUpwardRPTracker &TheUpwardTracker,
+                                 ScheduleDAGMI *DAG) {
   // getDownwardPressure() and getUpwardPressure() make temporary changes to
   // the tracker, so we need to pass those function a non-const copy.
   RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
-  if (AtTop)
-    TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
-  else
-    TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
+  if (!GCNTrackers) {
+    if (AtTop)
+      TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
+    else
+      TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
+  } else {
+    if (AtTop) {
+      GCNDownwardRPTracker TempTopTracker(TheTracker);
+      auto MI = SU->getInstr();
+      TempTopTracker.advance(MI, true, DAG->getLIS());
+
+      Pressure[AMDGPU::RegisterPressureSets::SReg_32] = TempTopTracker.getPressure().getSGPRNum();
+      Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = TempTopTracker.getPressure().getVGPRNum(false);
+    }
+
+    else {
+      GCNUpwardRPTracker TempBotTracker(TheUpwardTracker);
+      auto MI = SU->getInstr();
+      TempBotTracker.recede(*MI, true);
+
+      Pressure[AMDGPU::RegisterPressureSets::SReg_32] = TempBotTracker.getPressure().getSGPRNum();
+      Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = TempBotTracker.getPressure().getVGPRNum(false);
+    }
+  }
 }
 
 void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
@@ -192,8 +216,8 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
   //
   // In EXPENSIVE_CHECKS, we always query RPTracker to verify the results of
   // PressureDiffs.
-  if (AtTop || !canUsePressureDiffs(*SU)) {
-    getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure);
+  if (AtTop || !canUsePressureDiffs(*SU) || GCNTrackers) {
+    getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure, TheTracker, TheUpwardTracker, DAG);
   } else {
     // Reserve 4 slots.
     Pressure.resize(4, 0);
@@ -211,7 +235,11 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
 
 #ifdef EXPENSIVE_CHECKS
     std::vector<unsigned> CheckPressure, CheckMaxPressure;
+<<<<<<< HEAD
     getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure);
+=======
+    getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure,TheTracker,TheUpwardTracker, DAG);
+>>>>>>> 3fc6929b4a78... [AMDGPU] Optionally Use AMDGPU RPTrackers during scheduling
     if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
             CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
         Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
@@ -299,8 +327,16 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
   unsigned SGPRPressure = 0;
   unsigned VGPRPressure = 0;
   if (DAG->isTrackingPressure()) {
-    SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
-    VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
+    SGPRPressure =
+        GCNTrackers
+            ? (Zone.isTop() ? TheTracker.getPressure().getSGPRNum()
+                            : TheUpwardTracker.getPressure().getSGPRNum())
+            : Pressure[AMDGPU::RegisterPressureSets::SReg_32];
+    VGPRPressure =
+        GCNTrackers
+            ? (Zone.isTop() ? TheTracker.getPressure().getVGPRNum(false)
+                            : TheUpwardTracker.getPressure().getVGPRNum(false))
+            : Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
   }
   ReadyQueue &Q = Zone.Available;
   for (SUnit *SU : Q) {
@@ -449,6 +485,16 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
   return SU;
 }
 
+void GCNSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
+  if (GCNTrackers) {
+    MachineInstr *MI = SU->getInstr();
+    IsTopNode ? (void)TheTracker.advance(MI, true, DAG->getLIS())
+              : TheUpwardTracker.recede(*MI, true);
+  }
+
+  return GenericScheduler::schedNode(SU, IsTopNode);
+}
+
 GCNSchedStageID GCNSchedStrategy::getCurrentStage() {
   assert(CurrentStage && CurrentStage != SchedStages.end());
   return *CurrentStage;
@@ -475,12 +521,13 @@ GCNSchedStageID GCNSchedStrategy::getNextStage() const {
 }
 
 GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
-    const MachineSchedContext *C)
+    const MachineSchedContext *C, bool IsLegacyScheduler)
     : GCNSchedStrategy(C) {
   SchedStages.push_back(GCNSchedStageID::OccInitialSchedule);
   SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule);
   SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule);
   SchedStages.push_back(GCNSchedStageID::PreRARematerialize);
+  GCNTrackers = GCNTrackers & !IsLegacyScheduler;
 }
 
 GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy(const MachineSchedContext *C)
@@ -787,6 +834,20 @@ void GCNScheduleDAGMILive::runSchedStages() {
         continue;
       }
 
+      if (GCNTrackers) {
+        GCNDownwardRPTracker *TheTracker = S.getTracker();
+        GCNUpwardRPTracker *TheUpwardTracker = S.getUpwardTracker();
+        GCNRPTracker::LiveRegSet *RegionLiveIns = &LiveIns[Stage->getRegionIdx()];
+
+        reinterpret_cast<GCNRPTracker *>(TheTracker)->reset(
+            Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
+            *RegionLiveIns);
+        reinterpret_cast<GCNRPTracker *>(TheUpwardTracker)->reset(
+            Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
+            RegionLiveOuts.getLiveRegsForRegionIdx(Stage->getRegionIdx()));
+
+      }
+
       ScheduleDAGMILive::schedule();
       Stage->finalizeGCNRegion();
     }
@@ -1057,6 +1118,7 @@ void GCNSchedStage::finalizeGCNRegion() {
 void GCNSchedStage::checkScheduling() {
   // Check the results of scheduling.
   PressureAfter = DAG.getRealRegPressure(RegionIdx);
+
   LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));
   LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
 
@@ -1608,9 +1670,6 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
     MachineInstr *MI = Entry.first;
     MachineInstr *OldMI = Entry.second;
 
-    // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
-    DAG.BBLiveInMap.erase(OldMI);
-
     // Remove OldMI and update LIS
     Register Reg = MI->getOperand(0).getReg();
     LIS->RemoveMachineInstrFromMaps(*OldMI);
@@ -1628,6 +1687,11 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
   DAG.Regions = NewRegions;
   DAG.RescheduleRegions = NewRescheduleRegions;
 
+  DAG.BBLiveInMap = DAG.getBBLiveInMap();
+
+  if (GCNTrackers)
+    DAG.RegionLiveOuts.buildLiveRegMap();
+
   SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
   MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
 
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index c402fb1ef373c..8088339fbd26c 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -70,6 +70,12 @@ class GCNSchedStrategy : public GenericScheduler {
   // Pointer to the current SchedStageID.
   SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr;
 
+  // GCN RP Tracker for top-down scheduling
+  mutable GCNDownwardRPTracker TheTracker;
+
+  // GCN RP Tracker for botttom-up scheduling
+  mutable GCNUpwardRPTracker TheUpwardTracker;
+
 public:
   // schedule() have seen register pressure over the critical limits and had to
   // track register pressure for actual scheduling heuristics.
@@ -102,6 +108,8 @@ class GCNSchedStrategy : public GenericScheduler {
 
   SUnit *pickNode(bool &IsTopNode) override;
 
+  void schedNode(SUnit *SU, bool IsTopNode) override;
+
   void initialize(ScheduleDAGMI *DAG) override;
 
   unsigned getTargetOccupancy() { return TargetOccupancy; }
@@ -116,13 +124,19 @@ class GCNSchedStrategy : public GenericScheduler {
   bool hasNextStage() const;
 
   GCNSchedStageID getNextStage() const;
+
+  GCNDownwardRPTracker *getTracker() { return &TheTracker; }
+
+  GCNUpwardRPTracker *getUpwardTracker() { return &TheUpwardTracker; }
+
 };
 
 /// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
 /// maximum number of waves per simd).
 class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy {
 public:
-  GCNMaxOccupancySchedStrategy(const MachineSchedContext *C);
+  GCNMaxOccupancySchedStrategy(const MachineSchedContext *C,
+                               bool IsLegacyScheduler = false);
 };
 
 /// The goal of this scheduling strategy is to maximize ILP for a single wave
@@ -350,6 +364,9 @@ class GCNSchedStage {
   bool isRegionWithExcessRP() const {
     return DAG.RegionsWithExcessRP[RegionIdx];
   }
+  
+  // The region number this stage is currently working on
+  unsigned getRegionIdx() { return RegionIdx; }
 
   // Returns true if the new schedule may result in more spilling.
   bool mayCauseSpilling(unsigned WavesAfter);

>From 02e918d6d93203b1e4d07ec8da00f7d9b55ccf95 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 14 Jun 2024 14:46:28 -0700
Subject: [PATCH 04/14] Formatting

Change-Id: I1cb0a88e94f4156da6118fcd3724556939351c6d
---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 46 +++++++++++----------
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.h   |  3 +-
 2 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 4533b61baa72f..23eb4afd16635 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -66,9 +66,8 @@ static cl::opt<bool> GCNTrackers(
 const unsigned ScheduleMetrics::ScaleFactor = 100;
 
 GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
-    : GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
-      TheTracker(*C->LIS), TheUpwardTracker(*C->LIS),
-      HasHighPressure(false) {}
+    : GenericScheduler(C), TargetOccupancy(0), MF(nullptr), TheTracker(*C->LIS),
+      TheUpwardTracker(*C->LIS), HasHighPressure(false) {}
 
 void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
   GenericScheduler::initialize(DAG);
@@ -175,8 +174,10 @@ static void getRegisterPressures(bool AtTop,
       auto MI = SU->getInstr();
       TempTopTracker.advance(MI, true, DAG->getLIS());
 
-      Pressure[AMDGPU::RegisterPressureSets::SReg_32] = TempTopTracker.getPressure().getSGPRNum();
-      Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = TempTopTracker.getPressure().getVGPRNum(false);
+      Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
+          TempTopTracker.getPressure().getSGPRNum();
+      Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
+          TempTopTracker.getPressure().getVGPRNum(false);
     }
 
     else {
@@ -184,8 +185,10 @@ static void getRegisterPressures(bool AtTop,
       auto MI = SU->getInstr();
       TempBotTracker.recede(*MI, true);
 
-      Pressure[AMDGPU::RegisterPressureSets::SReg_32] = TempBotTracker.getPressure().getSGPRNum();
-      Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = TempBotTracker.getPressure().getVGPRNum(false);
+      Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
+          TempBotTracker.getPressure().getSGPRNum();
+      Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
+          TempBotTracker.getPressure().getVGPRNum(false);
     }
   }
 }
@@ -217,7 +220,8 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
   // In EXPENSIVE_CHECKS, we always query RPTracker to verify the results of
   // PressureDiffs.
   if (AtTop || !canUsePressureDiffs(*SU) || GCNTrackers) {
-    getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure, TheTracker, TheUpwardTracker, DAG);
+    getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure,
+                         TheTracker, TheUpwardTracker, DAG);
   } else {
     // Reserve 4 slots.
     Pressure.resize(4, 0);
@@ -235,11 +239,8 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
 
 #ifdef EXPENSIVE_CHECKS
     std::vector<unsigned> CheckPressure, CheckMaxPressure;
-<<<<<<< HEAD
-    getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure);
-=======
-    getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure,TheTracker,TheUpwardTracker, DAG);
->>>>>>> 3fc6929b4a78... [AMDGPU] Optionally Use AMDGPU RPTrackers during scheduling
+    getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure,
+                         TheTracker, TheUpwardTracker, DAG);
     if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
             CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
         Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
@@ -837,15 +838,16 @@ void GCNScheduleDAGMILive::runSchedStages() {
       if (GCNTrackers) {
         GCNDownwardRPTracker *TheTracker = S.getTracker();
         GCNUpwardRPTracker *TheUpwardTracker = S.getUpwardTracker();
-        GCNRPTracker::LiveRegSet *RegionLiveIns = &LiveIns[Stage->getRegionIdx()];
-
-        reinterpret_cast<GCNRPTracker *>(TheTracker)->reset(
-            Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
-            *RegionLiveIns);
-        reinterpret_cast<GCNRPTracker *>(TheUpwardTracker)->reset(
-            Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
-            RegionLiveOuts.getLiveRegsForRegionIdx(Stage->getRegionIdx()));
-
+        GCNRPTracker::LiveRegSet *RegionLiveIns =
+            &LiveIns[Stage->getRegionIdx()];
+
+        reinterpret_cast<GCNRPTracker *>(TheTracker)
+            ->reset(Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
+                    *RegionLiveIns);
+        reinterpret_cast<GCNRPTracker *>(TheUpwardTracker)
+            ->reset(
+                Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
+                RegionLiveOuts.getLiveRegsForRegionIdx(Stage->getRegionIdx()));
       }
 
       ScheduleDAGMILive::schedule();
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 8088339fbd26c..e8c89b2f1baf2 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -128,7 +128,6 @@ class GCNSchedStrategy : public GenericScheduler {
   GCNDownwardRPTracker *getTracker() { return &TheTracker; }
 
   GCNUpwardRPTracker *getUpwardTracker() { return &TheUpwardTracker; }
-
 };
 
 /// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
@@ -364,7 +363,7 @@ class GCNSchedStage {
   bool isRegionWithExcessRP() const {
     return DAG.RegionsWithExcessRP[RegionIdx];
   }
-  
+
   // The region number this stage is currently working on
   unsigned getRegionIdx() { return RegionIdx; }
 

>From 8c94313e827273dec2dc49816fde9a2344d3fc20 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 27 May 2024 10:43:43 -0700
Subject: [PATCH 05/14] Actually use the iterative trackers

Change-Id: I198925f5ed91b0a49ac265e19fdbe2208139f09a
---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 23eb4afd16635..ead06c2dd6395 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -172,7 +172,7 @@ static void getRegisterPressures(bool AtTop,
     if (AtTop) {
       GCNDownwardRPTracker TempTopTracker(TheTracker);
       auto MI = SU->getInstr();
-      TempTopTracker.advance(MI, true, DAG->getLIS());
+      TempTopTracker.advance(MI, false, DAG->getLIS());
 
       Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
           TempTopTracker.getPressure().getSGPRNum();
@@ -183,7 +183,7 @@ static void getRegisterPressures(bool AtTop,
     else {
       GCNUpwardRPTracker TempBotTracker(TheUpwardTracker);
       auto MI = SU->getInstr();
-      TempBotTracker.recede(*MI, true);
+      TempBotTracker.recede(*MI, false);
 
       Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
           TempBotTracker.getPressure().getSGPRNum();
@@ -489,8 +489,8 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
 void GCNSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
   if (GCNTrackers) {
     MachineInstr *MI = SU->getInstr();
-    IsTopNode ? (void)TheTracker.advance(MI, true, DAG->getLIS())
-              : TheUpwardTracker.recede(*MI, true);
+    IsTopNode ? (void)TheTracker.advance(MI, false, DAG->getLIS())
+              : TheUpwardTracker.recede(*MI, false);
   }
 
   return GenericScheduler::schedNode(SU, IsTopNode);

>From 08560336e957daa7558740764d0a9df68cbca51d Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 28 May 2024 13:24:09 -0700
Subject: [PATCH 06/14] Review Comments

Change-Id: Ifa69110bf0a239ea14d25c0bad03215d1b018656
---
 .../Target/AMDGPU/GCNIterativeScheduler.cpp   |  2 +-
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp   | 51 +++++++++----------
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.h     |  8 +--
 3 files changed, 30 insertions(+), 31 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index 79656f5b2b9f4..1929ee6b89f4e 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -480,7 +480,7 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
   LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
                        "target occupancy = "
                     << TgtOcc << '\n');
-  GCNMaxOccupancySchedStrategy LStrgy(Context, /*IsLegacyScheduler*/ true);
+  GCNMaxOccupancySchedStrategy LStrgy(Context, /*IsLegacyScheduler=*/ true);
   unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
 
   for (int I = 0; I < NumPasses; ++I) {
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index ead06c2dd6395..0d64968297605 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -66,8 +66,8 @@ static cl::opt<bool> GCNTrackers(
 const unsigned ScheduleMetrics::ScaleFactor = 100;
 
 GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
-    : GenericScheduler(C), TargetOccupancy(0), MF(nullptr), TheTracker(*C->LIS),
-      TheUpwardTracker(*C->LIS), HasHighPressure(false) {}
+    : GenericScheduler(C), TargetOccupancy(0), MF(nullptr), DownwardTracker(*C->LIS),
+      UpwardTracker(*C->LIS), HasHighPressure(false) {}
 
 void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
   GenericScheduler::initialize(DAG);
@@ -157,8 +157,8 @@ static void getRegisterPressures(bool AtTop,
                                  const RegPressureTracker &RPTracker, SUnit *SU,
                                  std::vector<unsigned> &Pressure,
                                  std::vector<unsigned> &MaxPressure,
-                                 GCNDownwardRPTracker &TheTracker,
-                                 GCNUpwardRPTracker &TheUpwardTracker,
+                                 GCNDownwardRPTracker &DownwardTracker,
+                                 GCNUpwardRPTracker &UpwardTracker,
                                  ScheduleDAGMI *DAG) {
   // getDownwardPressure() and getUpwardPressure() make temporary changes to
   // the tracker, so we need to pass those function a non-const copy.
@@ -170,7 +170,7 @@ static void getRegisterPressures(bool AtTop,
       TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
   } else {
     if (AtTop) {
-      GCNDownwardRPTracker TempTopTracker(TheTracker);
+      GCNDownwardRPTracker TempTopTracker(DownwardTracker);
       auto MI = SU->getInstr();
       TempTopTracker.advance(MI, false, DAG->getLIS());
 
@@ -181,7 +181,7 @@ static void getRegisterPressures(bool AtTop,
     }
 
     else {
-      GCNUpwardRPTracker TempBotTracker(TheUpwardTracker);
+      GCNUpwardRPTracker TempBotTracker(UpwardTracker);
       auto MI = SU->getInstr();
       TempBotTracker.recede(*MI, false);
 
@@ -221,7 +221,7 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
   // PressureDiffs.
   if (AtTop || !canUsePressureDiffs(*SU) || GCNTrackers) {
     getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure,
-                         TheTracker, TheUpwardTracker, DAG);
+                         DownwardTracker, UpwardTracker, DAG);
   } else {
     // Reserve 4 slots.
     Pressure.resize(4, 0);
@@ -240,7 +240,7 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
 #ifdef EXPENSIVE_CHECKS
     std::vector<unsigned> CheckPressure, CheckMaxPressure;
     getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure,
-                         TheTracker, TheUpwardTracker, DAG);
+                         TheTracker, UpwardTracker, DAG);
     if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
             CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
         Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
@@ -330,13 +330,13 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
   if (DAG->isTrackingPressure()) {
     SGPRPressure =
         GCNTrackers
-            ? (Zone.isTop() ? TheTracker.getPressure().getSGPRNum()
-                            : TheUpwardTracker.getPressure().getSGPRNum())
+            ? (Zone.isTop() ? DownwardTracker.getPressure().getSGPRNum()
+                            : UpwardTracker.getPressure().getSGPRNum())
             : Pressure[AMDGPU::RegisterPressureSets::SReg_32];
     VGPRPressure =
         GCNTrackers
-            ? (Zone.isTop() ? TheTracker.getPressure().getVGPRNum(false)
-                            : TheUpwardTracker.getPressure().getVGPRNum(false))
+            ? (Zone.isTop() ? DownwardTracker.getPressure().getVGPRNum(false)
+                            : UpwardTracker.getPressure().getVGPRNum(false))
             : Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
   }
   ReadyQueue &Q = Zone.Available;
@@ -489,8 +489,8 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
 void GCNSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
   if (GCNTrackers) {
     MachineInstr *MI = SU->getInstr();
-    IsTopNode ? (void)TheTracker.advance(MI, false, DAG->getLIS())
-              : TheUpwardTracker.recede(*MI, false);
+    IsTopNode ? (void)DownwardTracker.advance(MI, false, DAG->getLIS())
+              : UpwardTracker.recede(*MI, false);
   }
 
   return GenericScheduler::schedNode(SU, IsTopNode);
@@ -836,18 +836,17 @@ void GCNScheduleDAGMILive::runSchedStages() {
       }
 
       if (GCNTrackers) {
-        GCNDownwardRPTracker *TheTracker = S.getTracker();
-        GCNUpwardRPTracker *TheUpwardTracker = S.getUpwardTracker();
-        GCNRPTracker::LiveRegSet *RegionLiveIns =
-            &LiveIns[Stage->getRegionIdx()];
-
-        reinterpret_cast<GCNRPTracker *>(TheTracker)
-            ->reset(Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
-                    *RegionLiveIns);
-        reinterpret_cast<GCNRPTracker *>(TheUpwardTracker)
-            ->reset(
-                Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
-                RegionLiveOuts.getLiveRegsForRegionIdx(Stage->getRegionIdx()));
+        GCNDownwardRPTracker *DownwardTracker = S.getDownwardTracker();
+        GCNUpwardRPTracker *UpwardTracker = S.getUpwardTracker();
+        GCNRPTracker::LiveRegSet *RegionLiveIns = &LiveIns[Stage->getRegionIdx()];
+
+        reinterpret_cast<GCNRPTracker *>(DownwardTracker)->reset(
+            Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
+            *RegionLiveIns);
+        reinterpret_cast<GCNRPTracker *>(UpwardTracker)->reset(
+            Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
+            RegionLiveOuts.getLiveRegsForRegionIdx(Stage->getRegionIdx()));
+
       }
 
       ScheduleDAGMILive::schedule();
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index e8c89b2f1baf2..91b4c0c63d2bb 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -71,10 +71,10 @@ class GCNSchedStrategy : public GenericScheduler {
   SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr;
 
   // GCN RP Tracker for top-down scheduling
-  mutable GCNDownwardRPTracker TheTracker;
+  mutable GCNDownwardRPTracker DownwardTracker;
 
   // GCN RP Tracker for botttom-up scheduling
-  mutable GCNUpwardRPTracker TheUpwardTracker;
+  mutable GCNUpwardRPTracker UpwardTracker;
 
 public:
   // schedule() have seen register pressure over the critical limits and had to
@@ -125,9 +125,9 @@ class GCNSchedStrategy : public GenericScheduler {
 
   GCNSchedStageID getNextStage() const;
 
-  GCNDownwardRPTracker *getTracker() { return &TheTracker; }
+  GCNDownwardRPTracker *getDownwardTracker() { return &DownwardTracker; }
 
-  GCNUpwardRPTracker *getUpwardTracker() { return &TheUpwardTracker; }
+  GCNUpwardRPTracker *getUpwardTracker() { return &UpwardTracker; }
 };
 
 /// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.

>From 1d2c412ecda697efdd27d5f2fde4cbe982d5123f Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 28 May 2024 13:29:41 -0700
Subject: [PATCH 07/14] Use DAG.MRI

Change-Id: I9f0275a0cede9e77dfd29262124f2a856f436c8c
---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 0d64968297605..e1bbb7ccd1312 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -840,13 +840,11 @@ void GCNScheduleDAGMILive::runSchedStages() {
         GCNUpwardRPTracker *UpwardTracker = S.getUpwardTracker();
         GCNRPTracker::LiveRegSet *RegionLiveIns = &LiveIns[Stage->getRegionIdx()];
 
-        reinterpret_cast<GCNRPTracker *>(DownwardTracker)->reset(
-            Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
-            *RegionLiveIns);
-        reinterpret_cast<GCNRPTracker *>(UpwardTracker)->reset(
-            Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
-            RegionLiveOuts.getLiveRegsForRegionIdx(Stage->getRegionIdx()));
-
+        reinterpret_cast<GCNRPTracker *>(DownwardTracker)
+            ->reset(MRI, *RegionLiveIns);
+        reinterpret_cast<GCNRPTracker *>(UpwardTracker)
+            ->reset(MRI, RegionLiveOuts.getLiveRegsForRegionIdx(
+                             Stage->getRegionIdx()));
       }
 
       ScheduleDAGMILive::schedule();

>From 9be299795e80d44c096dc3663fd8d01d47abe555 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 28 May 2024 13:52:29 -0700
Subject: [PATCH 08/14] Formatting

Change-Id: I74c19a2cf20d2325178933f81e0e8716d7c62f17
---
 llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp |  2 +-
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp      | 15 ++++++++-------
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index 1929ee6b89f4e..085eb8e37e3cd 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -480,7 +480,7 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
   LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
                        "target occupancy = "
                     << TgtOcc << '\n');
-  GCNMaxOccupancySchedStrategy LStrgy(Context, /*IsLegacyScheduler=*/ true);
+  GCNMaxOccupancySchedStrategy LStrgy(Context, /*IsLegacyScheduler=*/true);
   unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
 
   for (int I = 0; I < NumPasses; ++I) {
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index e1bbb7ccd1312..cc03b69a61470 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -66,8 +66,9 @@ static cl::opt<bool> GCNTrackers(
 const unsigned ScheduleMetrics::ScaleFactor = 100;
 
 GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
-    : GenericScheduler(C), TargetOccupancy(0), MF(nullptr), DownwardTracker(*C->LIS),
-      UpwardTracker(*C->LIS), HasHighPressure(false) {}
+    : GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
+      DownwardTracker(*C->LIS), UpwardTracker(*C->LIS), HasHighPressure(false) {
+}
 
 void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
   GenericScheduler::initialize(DAG);
@@ -329,10 +330,9 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
   unsigned VGPRPressure = 0;
   if (DAG->isTrackingPressure()) {
     SGPRPressure =
-        GCNTrackers
-            ? (Zone.isTop() ? DownwardTracker.getPressure().getSGPRNum()
-                            : UpwardTracker.getPressure().getSGPRNum())
-            : Pressure[AMDGPU::RegisterPressureSets::SReg_32];
+        GCNTrackers ? (Zone.isTop() ? DownwardTracker.getPressure().getSGPRNum()
+                                    : UpwardTracker.getPressure().getSGPRNum())
+                    : Pressure[AMDGPU::RegisterPressureSets::SReg_32];
     VGPRPressure =
         GCNTrackers
             ? (Zone.isTop() ? DownwardTracker.getPressure().getVGPRNum(false)
@@ -838,7 +838,8 @@ void GCNScheduleDAGMILive::runSchedStages() {
       if (GCNTrackers) {
         GCNDownwardRPTracker *DownwardTracker = S.getDownwardTracker();
         GCNUpwardRPTracker *UpwardTracker = S.getUpwardTracker();
-        GCNRPTracker::LiveRegSet *RegionLiveIns = &LiveIns[Stage->getRegionIdx()];
+        GCNRPTracker::LiveRegSet *RegionLiveIns =
+            &LiveIns[Stage->getRegionIdx()];
 
         reinterpret_cast<GCNRPTracker *>(DownwardTracker)
             ->reset(MRI, *RegionLiveIns);

>From f9b5af5f20796652e6f2f88186e64d59698ec299 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 14 Jun 2024 15:03:02 -0700
Subject: [PATCH 09/14] Review comments

Change-Id: I09f9ca74c07b516daed0e93a85937df8b9aa922b
---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index cc03b69a61470..b9b701ba538fb 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -329,15 +329,16 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
   unsigned SGPRPressure = 0;
   unsigned VGPRPressure = 0;
   if (DAG->isTrackingPressure()) {
-    SGPRPressure =
-        GCNTrackers ? (Zone.isTop() ? DownwardTracker.getPressure().getSGPRNum()
-                                    : UpwardTracker.getPressure().getSGPRNum())
-                    : Pressure[AMDGPU::RegisterPressureSets::SReg_32];
-    VGPRPressure =
-        GCNTrackers
-            ? (Zone.isTop() ? DownwardTracker.getPressure().getVGPRNum(false)
-                            : UpwardTracker.getPressure().getVGPRNum(false))
-            : Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
+    if (!GCNTrackers) {
+      SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
+      VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
+    } else {
+      GCNRPTracker *T = &UpwardTracker;
+      if (Zone.isTop())
+        T = &DownwardTracker;
+      SGPRPressure = T->getPressure().getSGPRNum();
+      VGPRPressure = T->getPressure().getVGPRNum(false);
+    }
   }
   ReadyQueue &Q = Zone.Available;
   for (SUnit *SU : Q) {

>From c4d8ebb57604387bc3019c4650b70e0a00683491 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 14 Jun 2024 16:14:57 -0700
Subject: [PATCH 10/14] Allocate Pressure vector

Change-Id: I5effce973fa2d945076e89b4453a844f0fc85fc9
---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index b9b701ba538fb..48d0d6e8d6667 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -170,6 +170,7 @@ static void getRegisterPressures(bool AtTop,
     else
       TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
   } else {
+    Pressure.resize(4, 0);
     if (AtTop) {
       GCNDownwardRPTracker TempTopTracker(DownwardTracker);
       auto MI = SU->getInstr();

>From fcce7ffe7f438a386f2e2153f397b0bcd1ebf5b6 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 18 Jun 2024 11:39:48 -0700
Subject: [PATCH 11/14] Remove flag from upward RPTracker

Change-Id: I6217c03f56d34f584e5b23cf7c4462842bc7173b
---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 48d0d6e8d6667..27003cdd67d59 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -185,7 +185,7 @@ static void getRegisterPressures(bool AtTop,
     else {
       GCNUpwardRPTracker TempBotTracker(UpwardTracker);
       auto MI = SU->getInstr();
-      TempBotTracker.recede(*MI, false);
+      TempBotTracker.recede(*MI);
 
       Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
           TempBotTracker.getPressure().getSGPRNum();
@@ -492,7 +492,7 @@ void GCNSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
   if (GCNTrackers) {
     MachineInstr *MI = SU->getInstr();
     IsTopNode ? (void)DownwardTracker.advance(MI, false, DAG->getLIS())
-              : UpwardTracker.recede(*MI, false);
+              : UpwardTracker.recede(*MI);
   }
 
   return GenericScheduler::schedNode(SU, IsTopNode);

>From 0e70bac9e055553da6940dcad5b658f1927ff36c Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Wed, 19 Jun 2024 11:45:32 -0700
Subject: [PATCH 12/14] Review comments

Change-Id: Ibeaba6cab034636472b20c36adfadabbbc2c19ef
---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 53 ++++++++++-----------
 1 file changed, 25 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 27003cdd67d59..8976ae111037f 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -165,33 +165,30 @@ static void getRegisterPressures(bool AtTop,
   // the tracker, so we need to pass those function a non-const copy.
   RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
   if (!GCNTrackers) {
-    if (AtTop)
-      TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
-    else
-      TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
-  } else {
-    Pressure.resize(4, 0);
-    if (AtTop) {
-      GCNDownwardRPTracker TempTopTracker(DownwardTracker);
-      auto MI = SU->getInstr();
-      TempTopTracker.advance(MI, false, DAG->getLIS());
-
-      Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
-          TempTopTracker.getPressure().getSGPRNum();
-      Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
-          TempTopTracker.getPressure().getVGPRNum(false);
-    }
+    AtTop
+        ? TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure)
+        : TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
 
-    else {
-      GCNUpwardRPTracker TempBotTracker(UpwardTracker);
-      auto MI = SU->getInstr();
-      TempBotTracker.recede(*MI);
+    return;
+  }
 
-      Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
-          TempBotTracker.getPressure().getSGPRNum();
-      Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
-          TempBotTracker.getPressure().getVGPRNum(false);
-    }
+  // GCNTrackers
+  Pressure.resize(4, 0);
+  MachineInstr *MI = SU->getInstr();
+  if (AtTop) {
+    GCNDownwardRPTracker TempDownwardTracker(DownwardTracker);
+    TempDownwardTracker.advance(MI, false, DAG->getLIS());
+    Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
+        TempDownwardTracker.getPressure().getSGPRNum();
+    Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
+        TempDownwardTracker.getPressure().getVGPRNum(false);
+  } else {
+    GCNUpwardRPTracker TempUpwardTracker(UpwardTracker);
+    TempUpwardTracker.recede(*MI);
+    Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
+        TempUpwardTracker.getPressure().getSGPRNum();
+    Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
+        TempUpwardTracker.getPressure().getVGPRNum(false);
   }
 }
 
@@ -334,9 +331,9 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
       SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
       VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
     } else {
-      GCNRPTracker *T = &UpwardTracker;
-      if (Zone.isTop())
-        T = &DownwardTracker;
+      GCNRPTracker *T = Zone.isTop()
+                            ? static_cast<GCNRPTracker *>(&UpwardTracker)
+                            : static_cast<GCNRPTracker *>(&DownwardTracker);
       SGPRPressure = T->getPressure().getSGPRNum();
       VGPRPressure = T->getPressure().getVGPRNum(false);
     }

>From 415e531e98a99b052edf9f788a674fa35ab8cbd9 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 20 Jun 2024 08:49:26 -0700
Subject: [PATCH 13/14] Dont modify existing PreRARematStage LiveIn handling

Change-Id: I96c99f12c59ef0eea86f7fbf134913ecc47dd6f2
---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 8976ae111037f..8c2b7ffb3f202 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1669,6 +1669,9 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
     MachineInstr *MI = Entry.first;
     MachineInstr *OldMI = Entry.second;
 
+    // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
+    DAG.BBLiveInMap.erase(OldMI);
+
     // Remove OldMI and update LIS
     Register Reg = MI->getOperand(0).getReg();
     LIS->RemoveMachineInstrFromMaps(*OldMI);
@@ -1686,8 +1689,6 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
   DAG.Regions = NewRegions;
   DAG.RescheduleRegions = NewRescheduleRegions;
 
-  DAG.BBLiveInMap = DAG.getBBLiveInMap();
-
   if (GCNTrackers)
     DAG.RegionLiveOuts.buildLiveRegMap();
 

>From 5ce0ab561d7f0ea1b1a1f450101b9bf7f53724ce Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 12 Aug 2024 13:55:44 -0700
Subject: [PATCH 14/14] Use GCNTracker RP speculation

Change-Id: I3e893ca2ffcf1032fe157b537c9563565215b123
---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 8c2b7ffb3f202..8c50f5b35d122 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -177,18 +177,18 @@ static void getRegisterPressures(bool AtTop,
   MachineInstr *MI = SU->getInstr();
   if (AtTop) {
     GCNDownwardRPTracker TempDownwardTracker(DownwardTracker);
-    TempDownwardTracker.advance(MI, false, DAG->getLIS());
+    TempDownwardTracker.bumpDownwardPressure(MI);
     Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
         TempDownwardTracker.getPressure().getSGPRNum();
     Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
-        TempDownwardTracker.getPressure().getVGPRNum(false);
+        TempDownwardTracker.getPressure().getArchVGPRNum();
   } else {
     GCNUpwardRPTracker TempUpwardTracker(UpwardTracker);
-    TempUpwardTracker.recede(*MI);
+    TempUpwardTracker.bumpUpwardPressure(MI);
     Pressure[AMDGPU::RegisterPressureSets::SReg_32] =
         TempUpwardTracker.getPressure().getSGPRNum();
     Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
-        TempUpwardTracker.getPressure().getVGPRNum(false);
+        TempUpwardTracker.getPressure().getArchVGPRNum();
   }
 }
 



More information about the llvm-commits mailing list