[llvm] [AMDGPU][Scheduler] Refactor ArchVGPR rematerialization during scheduling (PR #125885)

Mon Feb 17 04:38:13 PST 2025

================
@@ -1673,174 +1675,249 @@ bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat,
   return true;
 }
 
-void PreRARematStage::collectRematerializableInstructions() {
+bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
   const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
-  for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
-    Register Reg = Register::index2VirtReg(I);
-    if (!DAG.LIS->hasInterval(Reg))
-      continue;
-
-    // TODO: Handle AGPR and SGPR rematerialization
-    if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
-        !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg))
-      continue;
-
-    MachineOperand *Op = DAG.MRI.getOneDef(Reg);
-    MachineInstr *Def = Op->getParent();
-    if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
-      continue;
-
-    MachineInstr *UseI = &*DAG.MRI.use_instr_nodbg_begin(Reg);
-    if (Def->getParent() == UseI->getParent())
-      continue;
 
-    bool HasRematDependency = false;
-    // Check if this instruction uses any registers that are planned to be
-    // rematerialized
-    for (auto &RematEntry : RematerializableInsts) {
-      if (find_if(RematEntry.second,
-                  [&Def](std::pair<MachineInstr *, MachineInstr *> &Remat) {
-                    for (MachineOperand &MO : Def->operands()) {
-                      if (!MO.isReg())
-                        continue;
-                      if (MO.getReg() == Remat.first->getOperand(0).getReg())
-                        return true;
-                    }
-                    return false;
-                  }) != RematEntry.second.end()) {
-        HasRematDependency = true;
-        break;
+  REMAT_DEBUG(dbgs() << "Collecting rematerializable instructions in "
+                     << MF.getFunction().getName() << '\n');
+
+  // Maps optimizable regions (i.e., regions at minimum and VGPR-limited
+  // occupancy, or regions with VGPR spilling) to their excess RP.
+  DenseMap<unsigned, unsigned> OptRegions;
+
+  // Note that the maximum number of VGPRs to use to eliminate spill may be
+  // lower than the maximum number to increase occupancy when the function has
+  // the "amdgpu-num-vgpr" attribute.
+  const std::pair<unsigned, unsigned> OccBounds =
+      ST.getOccupancyWithWorkGroupSizes(MF);
+  // FIXME: we should be able to just call ST.getMaxNumArchVGPRs() but that
+  // would use the occupancy bounds as determined by
+  // MF.getFunction().getWavesPerEU(), which look incorrect in some cases.
+  const unsigned MaxVGPRsNoSpill =
+      ST.getBaseMaxNumVGPRs(MF.getFunction(),
+                            {ST.getMinNumArchVGPRs(OccBounds.second),
+                             ST.getMaxNumArchVGPRs(OccBounds.first)},
+                            false);
+  const unsigned MaxVGPRsIncOcc = ST.getMaxNumArchVGPRs(DAG.MinOccupancy + 1);
+  IncreaseOccupancy = OccBounds.second > DAG.MinOccupancy;
+
+  // Collect optimizable regions. If there is spilling in any region we will
+  // just try to reduce it. Otherwise we will try to increase occupancy by one.
+  for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
+    GCNRegPressure &RP = DAG.Pressure[I];
+    unsigned NumVGPRs = RP.getArchVGPRNum();
+    unsigned ExcessRP = 0;
+    if (NumVGPRs > MaxVGPRsNoSpill) {
+      if (IncreaseOccupancy) {
+        // We won't try to increase occupancy.
+        IncreaseOccupancy = false;
+        OptRegions.clear();
       }
-    }
-    // Do not rematerialize an instruction if it uses an instruction that we
-    // have designated for rematerialization.
-    // FIXME: Allow for rematerialization chains: this requires 1. updating
-    // remat points to account for uses that are rematerialized, and 2. either
-    // rematerializing the candidates in careful ordering, or deferring the MBB
-    // RP walk until the entire chain has been rematerialized.
-    if (HasRematDependency)
-      continue;
-
-    // Similarly, check if the UseI is planned to be remat.
-    for (auto &RematEntry : RematerializableInsts) {
-      if (find_if(RematEntry.second,
-                  [&UseI](std::pair<MachineInstr *, MachineInstr *> &Remat) {
-                    return Remat.first == UseI;
-                  }) != RematEntry.second.end()) {
-        HasRematDependency = true;
-        break;
-      }
-    }
-
-    if (HasRematDependency)
-      break;
-
-    // We are only collecting defs that are defined in another block and are
-    // live-through or used inside regions at MinOccupancy. This means that the
-    // register must be in the live-in set for the region.
-    bool AddedToRematList = false;
-    for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
-      auto It = DAG.LiveIns[I].find(Reg);
-      if (It != DAG.LiveIns[I].end() && !It->second.none()) {
-        if (DAG.RegionsWithMinOcc[I]) {
-          SlotIndex DefIdx = DAG.LIS->getInstructionIndex(*Def);
-          SlotIndex UseIdx =
-              DAG.LIS->getInstructionIndex(*UseI).getRegSlot(true);
-          if (allUsesAvailableAt(Def, DefIdx, UseIdx)) {
-            RematerializableInsts[I][Def] = UseI;
-            AddedToRematList = true;
-          }
-        }
-
-        // Collect regions with rematerializable reg as live-in to avoid
-        // searching later when updating RP.
-        RematDefToLiveInRegions[Def].push_back(I);
+      // Region has VGPR spilling, we will try to reduce spilling as much as
+      // possible.
+      ExcessRP = NumVGPRs - MaxVGPRsNoSpill;
+      REMAT_DEBUG(dbgs() << "Region " << I << " is spilling VGPRs, save "
+                         << ExcessRP << " VGPR(s) to eliminate spilling\n");
+    } else if (IncreaseOccupancy) {
+      if (ST.getOccupancyWithNumSGPRs(RP.getSGPRNum()) == DAG.MinOccupancy) {
+        // Occupancy is SGPR-limited in the region, no point in trying to
+        // increase it through VGPR usage.
+        IncreaseOccupancy = false;
+        OptRegions.clear();
+      } else if (NumVGPRs > MaxVGPRsIncOcc) {
+        // Occupancy is VGPR-limited.
+        ExcessRP = NumVGPRs - MaxVGPRsIncOcc;
----------------
lucas-rami wrote:

Thanks for the catch, should be handled correctly by `PreRARematStage::hasExcessVGPRs` as well now.

https://github.com/llvm/llvm-project/pull/125885