[llvm] [AMDGPU][Scheduler] Scoring system for rematerialization candidates (PR #153092)

Jeffrey Byrnes via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 15 14:18:27 PDT 2025


================
@@ -1090,33 +1101,223 @@ bool ClusteredLowOccStage::initGCNSchedStage() {
 #define REMAT_PREFIX "[PreRARemat] "
 #define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void PreRARematStage::printTargetRegions(bool PrintAll) const {
+  if (PrintAll) {
+    for (auto [I, Target] : enumerate(RPTargets))
+      dbgs() << REMAT_PREFIX << "  [" << I << "] " << Target << '\n';
+    return;
+  }
+  if (TargetRegions.none()) {
+    dbgs() << REMAT_PREFIX << "No target regions\n";
+    return;
+  }
+  dbgs() << REMAT_PREFIX << "Target regions:\n";
+  for (unsigned I : TargetRegions.set_bits())
+    dbgs() << REMAT_PREFIX << "  [" << I << "] " << RPTargets[I] << '\n';
+}
+
+void PreRARematStage::RematReg::print() const {
+  dbgs() << REMAT_PREFIX << "  [" << DefRegion << "] " << *DefMI;
+  dbgs() << REMAT_PREFIX << "    -> used in [" << UseRegion << "] " << *UseMI;
+  dbgs() << REMAT_PREFIX << "    Guaranteed RP reduction in:";
+  for (unsigned I : Live.set_bits()) {
+    if (isUnusedLiveThrough(I))
+      dbgs() << " [" << I << "]";
+  }
+  dbgs() << '\n';
+  dbgs() << REMAT_PREFIX << "    Possible RP reduction in:";
+  for (unsigned I : Live.set_bits()) {
+    if (!isUnusedLiveThrough(I))
+      dbgs() << " [" << I << "]";
+  }
+  dbgs() << '\n';
+}
+
+void PreRARematStage::ScoredRemat::print() const {
+  ScoreTy ShiftScore = Score;
+  ScoreTy RegionImpact = ShiftScore & ((1 << RegionImpactWidth) - 1);
+  ShiftScore >>= RegionImpactWidth;
+  ScoreTy FreqDiff = ShiftScore & ((1 << FreqDiffWidth) - 1);
+  ShiftScore >>= FreqDiffWidth;
+  ScoreTy MaxFreq = ShiftScore;
+  dbgs() << '(' << MaxFreq << ", " << FreqDiff << ", " << RegionImpact << ')';
+}
+#endif
+
 bool PreRARematStage::initGCNSchedStage() {
   // FIXME: This pass will invalidate cached BBLiveInMap and MBBLiveIns for
   // regions inbetween the defs and region we sinked the def to. Will need to be
   // fixed if there is another pass after this pass.
   assert(!S.hasNextStage());
 
-  if (!GCNSchedStage::initGCNSchedStage() || DAG.Regions.size() == 1)
+  if (!GCNSchedStage::initGCNSchedStage() || DAG.Regions.size() <= 1)
     return false;
 
+  // Maps all MIs (except lone terminators, which are not part of any region) to
+  // their parent region. Non-lone terminators are considered part of the region
+  // they delimitate.
+  DenseMap<MachineInstr *, unsigned> MIRegion(MF.getInstructionCount());
+
   // Before performing any IR modification record the parent region of each MI
   // and the parent MBB of each region.
   const unsigned NumRegions = DAG.Regions.size();
-  RegionBB.reserve(NumRegions);
   for (unsigned I = 0; I < NumRegions; ++I) {
     RegionBoundaries Region = DAG.Regions[I];
     for (auto MI = Region.first; MI != Region.second; ++MI)
       MIRegion.insert({&*MI, I});
-    RegionBB.push_back(Region.first->getParent());
+    MachineBasicBlock *ParentMBB = Region.first->getParent();
+    if (Region.second != ParentMBB->end())
+      MIRegion.insert({&*Region.second, I});
+    RegionBB.push_back(ParentMBB);
   }
 
-  if (!canIncreaseOccupancyOrReduceSpill())
+  // Set an objective for the stage based on current RP in each region.
+  REMAT_DEBUG({
+    dbgs() << "Analyzing ";
+    MF.getFunction().printAsOperand(dbgs(), false);
+    dbgs() << ": ";
+  });
+  if (!setObjective()) {
+    LLVM_DEBUG(dbgs() << "no objective to achieve, occupancy is maximal at "
+                      << MFI.getMaxWavesPerEU() << '\n');
+    return false;
+  }
+  LLVM_DEBUG({
+    if (TargetOcc) {
+      dbgs() << "increase occupancy from " << *TargetOcc - 1 << '\n';
+    } else {
+      dbgs() << "reduce spilling (minimum target occupancy is "
+             << MFI.getMinWavesPerEU() << ")\n";
+    }
+    printTargetRegions(/*PrintAll=*/TargetRegions.none());
+  });
+
+  if (!collectRematRegs(MIRegion)) {
+    REMAT_DEBUG(dbgs() << "No rematerializable registers\n");
+    return false;
+  }
+  REMAT_DEBUG({
+    dbgs() << "Rematerializable registers:\n";
+    for (const RematReg &Remat : RematRegs)
+      Remat.print();
+  });
+
+  const ScoredRemat::FreqInfo FreqInfo(MF, DAG);
+  REMAT_DEBUG({
+    dbgs() << "Region frequencies\n";
+    for (auto [I, Freq] : enumerate(FreqInfo.Regions)) {
+      dbgs() << REMAT_PREFIX << "  [" << I << "] ";
+      if (Freq)
+        dbgs() << Freq;
+      else
+        dbgs() << "unknown ";
+      dbgs() << " | " << *DAG.Regions[I].first;
+    }
+  });
+
+  SmallVector<ScoredRemat> ScoredRemats;
+  for (const RematReg &Remat : RematRegs)
+    ScoredRemats.emplace_back(&Remat, FreqInfo, DAG);
+
+// Rematerialize registers in successive rounds until all RP targets are
+// satisifed or until we run out of rematerialization candidates.
+#ifndef NDEBUG
+  unsigned RoundNum = 0;
+#endif
+  BitVector RecomputeRP(NumRegions);
+  do {
+    // (Re-)Score and (re-)sort all remats in increasing score order.
+    for (ScoredRemat &Remat : ScoredRemats)
+      Remat.update(TargetRegions, RPTargets, FreqInfo, !TargetOcc);
+    sort(ScoredRemats);
+
+    REMAT_DEBUG({
+      dbgs() << "==== ROUND " << RoundNum << " ====\n";
+      for (const ScoredRemat &SRemat : ScoredRemats) {
+        dbgs() << REMAT_PREFIX;
+        SRemat.print();
+        dbgs() << " | " << *SRemat.Remat->DefMI;
+      }
+      printTargetRegions();
+    });
+
+    RecomputeRP.reset();
+    int RematIdx = ScoredRemats.size() - 1;
+
+    // Rematerialize registers in decreasing score order until we estimate
+    // that all RP targets are satisfied or until rematerialization candidates
+    // are no longer useful to decrease RP.
+    for (; RematIdx >= 0 && TargetRegions.any(); --RematIdx) {
+      const RematReg &Remat = *ScoredRemats[RematIdx].Remat;
+      // Stop on null score. Since scores monotonically decrease as we
+      // rematerialize, we know there is nothing useful left to do in such
+      // cases.
+      if (ScoredRemats[RematIdx].hasNullScore()) {
+        REMAT_DEBUG(dbgs() << "*** Stop on null score | " << *Remat.DefMI);
+        RematIdx = -1;
+        break;
+      }
+
+      // When previous rematerializations in this round have already satisfied
+      // RP targets in all regions this rematerialization can impact, we have a
+      // good indication that our scores have diverged significantly from
+      // reality, in which case we interrupt this round and re-score. This also
+      // ensures that every rematerialization we perform is possibly impactful
+      // in at least one target region.
+      if (!Remat.maybeBeneficial(TargetRegions, RPTargets)) {
+        REMAT_DEBUG(dbgs() << "*** Stop round on stale score | "
+                           << *Remat.DefMI);
+        break;
+      }
+
+      REMAT_DEBUG(dbgs() << "*** REMAT [" << Remat.DefRegion << " -> "
+                         << Remat.UseRegion << "] | " << *Remat.DefMI);
+      // Every rematerialization we do here is likely to move the instruction
+      // into a higher frequency region, increasing the total sum latency of the
+      // instruction itself. This is acceptable if we are eliminating a spill in
+      // the process, but when the goal is increasing occupancy we get nothing
+      // out of rematerialization if occupancy is not increased in the end; in
+      // such cases we want to roll back the rematerialization.
+      RollbackInfo *Rollback =
+          TargetOcc ? &Rollbacks.emplace_back(&Remat) : nullptr;
+      rematerialize(Remat, RecomputeRP, Rollback);
+      unsetSatisifedRPTargets(Remat.Live);
+    }
+
+#ifndef NDEBUG
+    ++RoundNum;
+#endif
+    REMAT_DEBUG({
+      if (!TargetRegions.any())
+        dbgs() << "*** Stop round on all targets achieved\n";
+      else if (RematIdx == -1)
+        dbgs() << "*** Stop round on exhausted remat opportunities\n";
+    });
+
+    // Peel off registers we already rematerialized from the vector's tail.
+    ScoredRemats.truncate(RematIdx + 1);
+  } while ((updateAndVerifyRPTargets(RecomputeRP) || TargetRegions.any()) &&
----------------
jrbyrnes wrote:

Is it possible to hit infinite loop ? If we have a `ScoredRemat` that is no longer beneficial, but non-empty `TargetRegions`? 

https://github.com/llvm/llvm-project/pull/153092


More information about the llvm-commits mailing list