[llvm] [AMDGPU][Scheduler] Refactor ArchVGPR rematerialization during scheduling (PR #125885)

Jeffrey Byrnes via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 28 16:44:43 PDT 2025


================
@@ -1688,174 +1696,426 @@ bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat,
   return true;
 }
 
-void PreRARematStage::collectRematerializableInstructions() {
-  const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
-  for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
-    Register Reg = Register::index2VirtReg(I);
-    if (!DAG.LIS->hasInterval(Reg))
-      continue;
+namespace {
+/// Models excess register pressure in a region and tracks our progress as we
+/// identify rematerialization opportunities.
+struct ExcessRP {
+  /// Number of excess ArchVGPRs.
+  unsigned ArchVGPRs = 0;
+  /// Number of excess AGPRs.
+  unsigned AGPRs = 0;
+  /// For unified register files, number of excess VGPRs.
+  unsigned VGPRs = 0;
+  /// For unified register files with AGPR usage, number of excess ArchVGPRs to
+  /// save before we are able to save a whole allocation granule.
+  unsigned ArchVGPRsToAlignment = 0;
+  /// Whether the region uses AGPRs.
+  bool HasAGPRs = false;
+
+  /// Constructs the excess RP model; determines the excess pressure w.r.t. a
+  /// maximum number of allowed VGPRs.
+  ExcessRP(const GCNSubtarget &ST, const GCNRegPressure &RP, unsigned MaxVGPRs);
+
+  /// Accounts for \p NumRegs saved ArchVGPRs in the model. If \p
+  /// UseArchVGPRForAGPRSpill is true, saved ArchVGPRs are used to save excess
+  /// AGPRs once excess ArchVGPR pressure has been eliminated. Returns whether
+  /// saving these ArchVGPRs helped reduce excess pressure.
+  bool saveArchVGPRs(unsigned NumRegs, bool UseArchVGPRForAGPRSpill);
+
+  /// Accounts for \p NumRegs saved AGPRS in the model. Returns whether saving
+  /// these ArchVGPRs helped reduce excess pressure.
+  bool saveAGPRs(unsigned NumRegs);
+
+  /// Returns whether there is any excess register pressure.
+  operator bool() const { return ArchVGPRs != 0 || AGPRs != 0 || VGPRs != 0; }
 
-    // TODO: Handle AGPR and SGPR rematerialization
-    if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
-        !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg))
-      continue;
+#ifndef NDEBUG
+  friend raw_ostream &operator<<(raw_ostream &OS, const ExcessRP &Excess) {
+    OS << Excess.ArchVGPRs << " ArchVGPRs, " << Excess.AGPRs << " AGPRs, and "
+       << Excess.VGPRs << " VGPRs (next ArchVGPR aligment in "
+       << Excess.ArchVGPRsToAlignment << " registers)\n";
+    return OS;
+  }
+#endif
 
-    MachineOperand *Op = DAG.MRI.getOneDef(Reg);
-    MachineInstr *Def = Op->getParent();
-    if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
-      continue;
+private:
+  static inline bool saveRegs(unsigned &LeftToSave, unsigned &NumRegs) {
+    unsigned NumSaved = std::min(LeftToSave, NumRegs);
+    NumRegs -= NumSaved;
+    LeftToSave -= NumSaved;
+    return NumSaved;
+  }
+};
+} // namespace
+
+ExcessRP::ExcessRP(const GCNSubtarget &ST, const GCNRegPressure &RP,
+                   unsigned MaxVGPRs) {
+  unsigned NumArchVGPRs = RP.getArchVGPRNum();
+  unsigned NumAGPRs = RP.getAGPRNum();
+  HasAGPRs = NumAGPRs;
+
+  if (!ST.hasGFX90AInsts()) {
+    // Non-unified RF. Account for excess pressure for ArchVGPRs and AGPRs
+    // independently.
+    if (NumArchVGPRs > MaxVGPRs)
+      ArchVGPRs = NumArchVGPRs - MaxVGPRs;
+    if (NumAGPRs > MaxVGPRs)
+      AGPRs = NumAGPRs - MaxVGPRs;
+    return;
+  }
 
-    MachineInstr *UseI = &*DAG.MRI.use_instr_nodbg_begin(Reg);
-    if (Def->getParent() == UseI->getParent())
-      continue;
+  // Independently of whether overall VGPR pressure is under the limit, we still
+  // have to check whether ArchVGPR pressure or AGPR pressure alone exceeds the
+  // number of addressable registers in each category.
+  const unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
+  if (NumArchVGPRs > MaxArchVGPRs) {
+    ArchVGPRs = NumArchVGPRs - MaxArchVGPRs;
+    NumArchVGPRs = MaxArchVGPRs;
+  }
+  if (NumAGPRs > MaxArchVGPRs) {
+    AGPRs = NumAGPRs - MaxArchVGPRs;
+    NumAGPRs = MaxArchVGPRs;
+  }
 
-    bool HasRematDependency = false;
-    // Check if this instruction uses any registers that are planned to be
-    // rematerialized
-    for (auto &RematEntry : RematerializableInsts) {
-      if (find_if(RematEntry.second,
-                  [&Def](std::pair<MachineInstr *, MachineInstr *> &Remat) {
-                    for (MachineOperand &MO : Def->operands()) {
-                      if (!MO.isReg())
-                        continue;
-                      if (MO.getReg() == Remat.first->getOperand(0).getReg())
-                        return true;
-                    }
-                    return false;
-                  }) != RematEntry.second.end()) {
-        HasRematDependency = true;
-        break;
-      }
+  // Check overall VGPR usage against the limit; any excess above addressable
+  // register limits has already been accounted for.
+  unsigned NumVGPRs = GCNRegPressure::getUnifiedVGPRNum(NumArchVGPRs, NumAGPRs);
+  if (NumVGPRs > MaxVGPRs) {
+    VGPRs = NumVGPRs - MaxVGPRs;
+    ArchVGPRsToAlignment = NumArchVGPRs - alignDown(NumArchVGPRs, 4);
+    if (!ArchVGPRsToAlignment)
+      ArchVGPRsToAlignment = 4;
+  }
+}
+
+bool ExcessRP::saveArchVGPRs(unsigned NumRegs, bool UseArchVGPRForAGPRSpill) {
+  bool Progress = saveRegs(ArchVGPRs, NumRegs);
+
+  if (HasAGPRs) {
+    // ArchVGPRs can only be allocated as a multiple of a granule.
+    const unsigned Granule = 4;
+    unsigned NumSavedRegs = 0;
+
+    // Count the number of whole ArchVGPR allocation granules we can save.
+    if (unsigned NumGranules = NumRegs / Granule; NumGranules) {
+      NumSavedRegs = NumGranules * Granule;
+      NumRegs -= NumSavedRegs;
     }
-    // Do not rematerialize an instruction if it uses an instruction that we
-    // have designated for rematerialization.
-    // FIXME: Allow for rematerialization chains: this requires 1. updating
-    // remat points to account for uses that are rematerialized, and 2. either
-    // rematerializing the candidates in careful ordering, or deferring the MBB
-    // RP walk until the entire chain has been rematerialized.
-    if (HasRematDependency)
-      continue;
 
-    // Similarly, check if the UseI is planned to be remat.
-    for (auto &RematEntry : RematerializableInsts) {
-      if (find_if(RematEntry.second,
-                  [&UseI](std::pair<MachineInstr *, MachineInstr *> &Remat) {
-                    return Remat.first == UseI;
-                  }) != RematEntry.second.end()) {
-        HasRematDependency = true;
-        break;
-      }
+    // We may be able to save one more whole ArchVGPR allocation granule.
+    if (NumRegs >= ArchVGPRsToAlignment) {
+      NumSavedRegs += Granule;
+      ArchVGPRsToAlignment = Granule - (NumRegs - ArchVGPRsToAlignment);
----------------
jrbyrnes wrote:

I think `NumRegs - ArchVGPRsToAlignment` can be greater than `Granule`

https://github.com/llvm/llvm-project/pull/125885


More information about the llvm-commits mailing list