[llvm-branch-commits] [llvm] [AMDGPU][Scheduler] Prepare remat stage for rematerializer integration (NFC) (PR #189489)

Lucas Ramirez via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Apr 2 03:53:45 PDT 2026


https://github.com/lucas-rami updated https://github.com/llvm/llvm-project/pull/189489

>From 77bc575f65d8d29f67774ccad80483c8f4ce27d7 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Fri, 27 Mar 2026 12:50:16 +0000
Subject: [PATCH 1/3] [AMDGPU][Scheduler] Prepare remat. stage for
 rematerializer integration (NFC)

This NFC prepares the scheduler's rematerialization stage for
integration with the target-independent rematerializer. It brings
various small design changes and optimizations to the stage's internal
state to make the not-exactly-NFC rematerializer integration as small as
possible.

The main changes are, in no particular order:

- Sort and pick useful rematerialization candidates by their index in
  the vector of candidates instead of directly sorting objects within
  the candidate vector. This reduces the amount of data movement and
  simplifies the candidate selection logic.
- Move some data members from `PreRARematStage::RematReg` to
  `PreRARematStage::ScoredRemat`. This makes the former a simplified
  version of the rematerializer's own internal register representation
  (`Rematerializer::Reg`), which can be cleanly deleted during
  integration.
- Remove an inferrable argument to `modifyRegionSchedule`. This allows
  the stage to stop tracking the parent block of each region.
- Use a boolean (`RevertAllRegions`) to track scheduling revert decision
  post rematerialization instead of clearing `RescheduleRegions`. This
  allows to avoid re-computing the latter during rollback.
- Estimate usefulness of rematerialization from `GCNRegPressure` instead
  of from `Register` (requires adding a new method variant in
  `GCNRPTarget`).
---
 llvm/lib/Target/AMDGPU/GCNRegPressure.cpp   |  11 +
 llvm/lib/Target/AMDGPU/GCNRegPressure.h     |   4 +
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 295 ++++++++++----------
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.h   | 109 ++++----
 4 files changed, 227 insertions(+), 192 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 54e67d9d2a808..0a217e931f6c1 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -424,6 +424,17 @@ bool GCNRPTarget::isSaveBeneficial(Register Reg) const {
   return (UnifiedRF && Excess.VGPR) || Excess.ArchVGPR;
 }
 
+bool GCNRPTarget::isSaveBeneficial(const GCNRegPressure &SaveRP) const {
+  RegExcess Excess(MF, RP, *this);
+  if (SaveRP.getSGPRNum() && Excess.SGPR)
+    return true;
+  if (SaveRP.getArchVGPRNum() && Excess.ArchVGPR)
+    return true;
+  if (SaveRP.getAGPRNum() && Excess.AGPR)
+    return true;
+  return UnifiedRF && SaveRP.getVGPRNum(true) && Excess.VGPR;
+}
+
 unsigned GCNRPTarget::getNumRegsBenefit(const GCNRegPressure &SaveRP) const {
   RegExcess Excess(MF, RP, *this);
   const unsigned NumVGPRAboveAddrLimit =
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 52064dca1c3af..704bf01d47b9d 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -250,6 +250,10 @@ class GCNRPTarget {
   /// towards achieving the RP target.
   bool isSaveBeneficial(Register Reg) const;
 
+  /// Returns whether the benefit that saving \p SaveRP represents will be
+  /// beneficial towads achieving the RP target.
+  bool isSaveBeneficial(const GCNRegPressure &SaveRP) const;
+
   /// Saves virtual register \p Reg with lanemask \p Mask.
   void saveReg(Register Reg, LaneBitmask Mask, const MachineRegisterInfo &MRI) {
     RP.inc(Reg, Mask, LaneBitmask::getNone(), MRI);
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index ad24bad1fd5d7..f6e598ce69752 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -35,11 +35,9 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
-#include "llvm/CodeGen/MachineCycleAnalysis.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/MC/LaneBitmask.h"
-#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/MC/MCSchedule.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -1469,7 +1467,6 @@ bool PreRARematStage::initGCNSchedStage() {
     MachineBasicBlock *ParentMBB = Region.first->getParent();
     if (Region.second != ParentMBB->end())
       MIRegion.insert({&*Region.second, I});
-    RegionBB.push_back(ParentMBB);
   }
 
 #ifndef NDEBUG
@@ -1482,14 +1479,15 @@ bool PreRARematStage::initGCNSchedStage() {
     for (unsigned I : TargetRegions.set_bits())
       dbgs() << REMAT_PREFIX << "  [" << I << "] " << RPTargets[I] << '\n';
   };
-  auto PrintRematReg = [&](const RematReg &Remat) -> Printable {
-    return Printable([&, Remat](raw_ostream &OS) {
+  auto PrintCandidate = [&](const ScoredRemat &Cand) -> Printable {
+    return Printable([&, Cand](raw_ostream &OS) {
       // Concatenate all region numbers in which the register is unused and
       // live-through.
+      const RematReg &Remat = *Cand.Remat;
       bool HasLiveThroughRegion = false;
       OS << '[' << Remat.DefRegion << " -";
       for (unsigned I = 0; I < NumRegions; ++I) {
-        if (Remat.isUnusedLiveThrough(I)) {
+        if (!Cand.UnpredictableRPSave[I]) {
           if (HasLiveThroughRegion) {
             OS << ',';
           } else {
@@ -1529,15 +1527,26 @@ bool PreRARematStage::initGCNSchedStage() {
     PrintTargetRegions();
   });
 
+  // Collect all rematerializable registers in the function, then create a
+  // corresponding scored rematerialization candidate for each one.
   if (!collectRematRegs(MIRegion)) {
     REMAT_DEBUG(dbgs() << "No rematerializable registers\n");
     return false;
   }
   const ScoredRemat::FreqInfo FreqInfo(MF, DAG);
+  SmallVector<ScoredRemat, 8> Candidates;
+  Candidates.reserve(RematRegs.size());
+  SmallVector<unsigned> CandidateOrder, NewCandidateOrder;
+  for (RematReg &Remat : RematRegs) {
+    ScoredRemat &Candidate = Candidates.emplace_back(&Remat, FreqInfo, DAG);
+    if (Candidate.update(TargetRegions, RPTargets, FreqInfo, !TargetOcc))
+      CandidateOrder.push_back(Candidates.size() - 1);
+  }
+
   REMAT_DEBUG({
     dbgs() << "Rematerializable registers:\n";
-    for (const RematReg &Remat : RematRegs)
-      dbgs() << REMAT_PREFIX << "  " << PrintRematReg(Remat) << '\n';
+    for (const ScoredRemat &Cand : Candidates)
+      dbgs() << REMAT_PREFIX << "  " << PrintCandidate(Cand) << '\n';
     dbgs() << REMAT_PREFIX << "Region frequencies\n";
     for (auto [I, Freq] : enumerate(FreqInfo.Regions)) {
       dbgs() << REMAT_PREFIX << "  [" << I << "] ";
@@ -1549,67 +1558,52 @@ bool PreRARematStage::initGCNSchedStage() {
     }
   });
 
-  SmallVector<ScoredRemat> ScoredRemats;
-  for (RematReg &Remat : RematRegs)
-    ScoredRemats.emplace_back(&Remat, FreqInfo, DAG);
-
-// Rematerialize registers in successive rounds until all RP targets are
-// satisifed or until we run out of rematerialization candidates.
-#ifndef NDEBUG
-  unsigned RoundNum = 0;
-#endif
-  BitVector RecomputeRP(NumRegions);
-  do {
-    assert(!ScoredRemats.empty() && "no more remat candidates");
+  // Rematerialize registers in successive rounds until all RP targets are
+  // satisifed or until we run out of rematerialization candidates.
+  BitVector RecomputeRP(DAG.Regions.size());
+  for (;;) {
+    RecomputeRP.reset();
 
-    // (Re-)Score and (re-)sort all remats in increasing score order.
-    for (ScoredRemat &Remat : ScoredRemats)
-      Remat.update(TargetRegions, RPTargets, FreqInfo, !TargetOcc);
-    sort(ScoredRemats);
+    // Sort candidates in increasing score order.
+    sort(CandidateOrder, [&](unsigned LHSIndex, unsigned RHSIndex) {
+      return Candidates[LHSIndex] < Candidates[RHSIndex];
+    });
 
     REMAT_DEBUG({
-      dbgs() << "==== ROUND " << RoundNum++ << " ====\n"
+      dbgs() << "==== NEW REMAT ROUND ====\n"
              << REMAT_PREFIX
              << "Candidates with non-null score, in rematerialization order:\n";
-      for (const ScoredRemat &RematDecision : reverse(ScoredRemats)) {
-        if (RematDecision.hasNullScore())
-          break;
-        dbgs() << REMAT_PREFIX << "  " << RematDecision.print() << " | "
-               << *RematDecision.Remat->DefMI;
+      for (const ScoredRemat &Cand : reverse(Candidates)) {
+        dbgs() << REMAT_PREFIX << "  " << Cand.print() << " | "
+               << PrintCandidate(Cand) << '\n';
       }
       PrintTargetRegions();
     });
 
-    RecomputeRP.reset();
-    unsigned RematIdx = ScoredRemats.size();
-
     // Rematerialize registers in decreasing score order until we estimate
     // that all RP targets are satisfied or until rematerialization candidates
     // are no longer useful to decrease RP.
-    for (; RematIdx && TargetRegions.any(); --RematIdx) {
-      const ScoredRemat &Candidate = ScoredRemats[RematIdx - 1];
-      // Stop rematerializing on encountering a null score. Since scores
-      // monotonically decrease as we rematerialize, we know there is nothing
-      // useful left to do in such cases, even if we were to re-score.
-      if (Candidate.hasNullScore()) {
-        RematIdx = 0;
-        break;
-      }
-
-      RematReg &Remat = *Candidate.Remat;
+    while (!CandidateOrder.empty()) {
+      const ScoredRemat &Cand = Candidates[CandidateOrder.back()];
       // When previous rematerializations in this round have already satisfied
       // RP targets in all regions this rematerialization can impact, we have a
       // good indication that our scores have diverged significantly from
       // reality, in which case we interrupt this round and re-score. This also
       // ensures that every rematerialization we perform is possibly impactful
       // in at least one target region.
-      if (!Remat.maybeBeneficial(TargetRegions, RPTargets))
+      if (!Cand.maybeBeneficial(TargetRegions, RPTargets)) {
+        REMAT_DEBUG(dbgs() << "Interrupt round on stale score for "
+                           << Cand.print() << " | " << *Cand.Remat->DefMI);
         break;
+      }
+      CandidateOrder.pop_back();
+      RematReg &Remat = *Cand.Remat;
 
-      REMAT_DEBUG(dbgs() << "** REMAT " << PrintRematReg(Remat) << '\n';);
-      MachineInstr *RematMI =
-          Candidate.rematerialize(RecomputeRP, RPTargets, DAG);
-      RescheduleRegions |= Remat.Live;
+      // Remove the register from all regions where it is a live-in or live-out
+      // and rematerialize it.
+      REMAT_DEBUG(dbgs() << "** REMAT " << PrintCandidate(Cand) << '\n');
+      removeFromLiveMaps(Remat.getReg(), Cand.LiveIn, Cand.LiveOut);
+      MachineInstr *RematMI = Cand.rematerialize(DAG);
 
       // Every rematerialization we do here is likely to move the instruction
       // into a higher frequency region, increasing the total sum latency of the
@@ -1618,7 +1612,8 @@ bool PreRARematStage::initGCNSchedStage() {
       // out of rematerialization if occupancy is not increased in the end; in
       // such cases we want to roll back the rematerialization.
       if (TargetOcc) {
-        RollbackInfo &Rollback = Rollbacks.emplace_back(&Remat);
+        RollbackInfo &Rollback =
+            Rollbacks.emplace_back(&Remat, Cand.LiveIn, Cand.LiveOut);
         Rollback.RematMI = RematMI;
         // Make the original MI a debug value so that it does not influence
         // scheduling and replace all read registers with a sentinel register to
@@ -1637,24 +1632,37 @@ bool PreRARematStage::initGCNSchedStage() {
         DAG.deleteMI(Remat.DefRegion, Remat.DefMI);
       }
 
-      unsetSatisfiedRPTargets(Remat.Live);
-    }
-
-    REMAT_DEBUG({
+      // Adjust RP targets. The save is guaranteed in regions in which the
+      // register is live-through and unused but optimistic in all other regions
+      // where the register is live.
+      updateRPTargets(Cand.Live, Cand.RPSave);
+      RecomputeRP |= Cand.UnpredictableRPSave;
+      RescheduleRegions |= Cand.Live;
       if (!TargetRegions.any()) {
-        dbgs() << "** Interrupt round on all targets achieved\n";
-      } else if (RematIdx) {
-        dbgs() << "** Interrupt round on stale score for "
-               << *ScoredRemats[RematIdx - 1].Remat->DefMI;
-      } else {
-        dbgs() << "** Stop on exhausted rematerialization candidates\n";
+        REMAT_DEBUG(dbgs() << "All targets cleared, verifying...\n");
+        break;
       }
-    });
+    }
+
+    if (!updateAndVerifyRPTargets(RecomputeRP) && !TargetRegions.any()) {
+      REMAT_DEBUG(dbgs() << "Objectives achieved!\n");
+      break;
+    }
+
+    // Update the score of remaining candidates.
+    NewCandidateOrder.clear();
+    for (unsigned CandIdx : CandidateOrder) {
+      ScoredRemat &Candidate = Candidates[CandIdx];
+      if (Candidate.update(TargetRegions, RPTargets, FreqInfo, !TargetOcc))
+        NewCandidateOrder.push_back(CandIdx);
+    }
+    if (NewCandidateOrder.empty()) {
+      REMAT_DEBUG(dbgs() << "Stop on exhausted rematerialization candidates\n");
+      break;
+    }
+    CandidateOrder = std::move(NewCandidateOrder);
+  }
 
-    // Peel off registers we already rematerialized from the vector's tail.
-    ScoredRemats.truncate(RematIdx);
-  } while ((updateAndVerifyRPTargets(RecomputeRP) || TargetRegions.any()) &&
-           !ScoredRemats.empty());
   if (RescheduleRegions.none())
     return false;
 
@@ -1817,7 +1825,8 @@ bool ClusteredLowOccStage::initGCNRegion() {
 }
 
 bool PreRARematStage::initGCNRegion() {
-  return RescheduleRegions[RegionIdx] && GCNSchedStage::initGCNRegion();
+  return !RevertAllRegions && RescheduleRegions[RegionIdx] &&
+         GCNSchedStage::initGCNRegion();
 }
 
 void GCNSchedStage::setupNewBlock() {
@@ -1861,7 +1870,7 @@ void PreRARematStage::finalizeGCNRegion() {
     REMAT_DEBUG(dbgs() << "Region " << RegionIdx
                        << " cannot meet occupancy target, interrupting "
                           "re-scheduling in all regions\n");
-    RescheduleRegions.reset();
+    RevertAllRegions = true;
   }
 }
 
@@ -1932,7 +1941,7 @@ void GCNSchedStage::checkScheduling() {
   // Revert if this region's schedule would cause a drop in occupancy or
   // spilling.
   if (shouldRevertScheduling(WavesAfter)) {
-    modifyRegionSchedule(RegionIdx, DAG.BB, Unsched);
+    modifyRegionSchedule(RegionIdx, Unsched);
     std::tie(DAG.RegionBegin, DAG.RegionEnd) = DAG.Regions[RegionIdx];
   } else {
     DAG.Pressure[RegionIdx] = PressureAfter;
@@ -2166,7 +2175,6 @@ bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
 }
 
 void GCNSchedStage::modifyRegionSchedule(unsigned RegionIdx,
-                                         MachineBasicBlock *MBB,
                                          ArrayRef<MachineInstr *> MIOrder) {
   assert(static_cast<size_t>(std::distance(DAG.Regions[RegionIdx].first,
                                            DAG.Regions[RegionIdx].second)) ==
@@ -2180,6 +2188,7 @@ void GCNSchedStage::modifyRegionSchedule(unsigned RegionIdx,
   // Reconstruct MI sequence by moving instructions in desired order before
   // the current region's start.
   MachineBasicBlock::iterator RegionEnd = DAG.Regions[RegionIdx].first;
+  MachineBasicBlock *MBB = MIOrder.front()->getParent();
   for (MachineInstr *MI : MIOrder) {
     // Either move the next MI in order before the end of the region or move the
     // region end past the MI if it is at the correct position.
@@ -2887,38 +2896,23 @@ bool PreRARematStage::collectRematRegs(
 PreRARematStage::RematReg::RematReg(
     MachineInstr *DefMI, MachineInstr *UseMI, GCNScheduleDAGMILive &DAG,
     const DenseMap<MachineInstr *, unsigned> &MIRegion)
-    : DefMI(DefMI), UseMI(UseMI), LiveIn(DAG.Regions.size()),
-      LiveOut(DAG.Regions.size()), Live(DAG.Regions.size()),
-      DefRegion(MIRegion.at(DefMI)), UseRegion(MIRegion.at(UseMI)) {
-
-  // Mark regions in which the rematerializable register is live.
-  Register Reg = getReg();
-  for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
-    auto LiveInIt = DAG.LiveIns[I].find(Reg);
-    if (LiveInIt != DAG.LiveIns[I].end())
-      LiveIn.set(I);
-    const auto &LiveOuts = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I);
-    if (auto LiveOutIt = LiveOuts.find(Reg); LiveOutIt != LiveOuts.end())
-      LiveOut.set(I);
-  }
-  Live |= LiveIn;
-  Live |= LiveOut;
-  Mask = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(DefRegion).at(Reg);
+    : DefMI(DefMI), UseMI(UseMI), DefRegion(MIRegion.at(DefMI)),
+      UseRegion(MIRegion.at(UseMI)),
+      Mask(DAG.RegionLiveOuts.getLiveRegsForRegionIdx(DefRegion).at(getReg())) {
 }
 
-bool PreRARematStage::RematReg::maybeBeneficial(
+bool PreRARematStage::ScoredRemat::maybeBeneficial(
     const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets) const {
-  Register Reg = getReg();
   for (unsigned I : TargetRegions.set_bits()) {
-    if (Live[I] && RPTargets[I].isSaveBeneficial(Reg))
+    if (Live[I] && RPTargets[I].isSaveBeneficial(RPSave))
       return true;
   }
   return false;
 }
 
-void PreRARematStage::RematReg::insertMI(unsigned RegionIdx,
-                                         MachineInstr *RematMI,
-                                         GCNScheduleDAGMILive &DAG) const {
+void PreRARematStage::ScoredRemat::insertMI(unsigned RegionIdx,
+                                          MachineInstr *RematMI,
+                                          GCNScheduleDAGMILive &DAG) const {
   RegionBoundaries &Bounds = DAG.Regions[RegionIdx];
   if (Bounds.first == std::next(MachineBasicBlock::iterator(RematMI)))
     Bounds.first = RematMI;
@@ -2958,12 +2952,29 @@ PreRARematStage::ScoredRemat::FreqInfo::FreqInfo(
 }
 
 PreRARematStage::ScoredRemat::ScoredRemat(RematReg *Remat, const FreqInfo &Freq,
-                                          const GCNScheduleDAGMILive &DAG)
-    : Remat(Remat), FreqDiff(getFreqDiff(Freq)) {
-  RPSave.inc(Remat->getReg(), LaneBitmask::getNone(), Remat->Mask, DAG.MRI);
-}
+                                          GCNScheduleDAGMILive &DAG)
+    : Remat(Remat), LiveIn(DAG.Regions.size()), LiveOut(DAG.Regions.size()),
+      Live(DAG.Regions.size()), UnpredictableRPSave(DAG.Regions.size()) {
+  Register DefReg = Remat->getReg();
+
+  // Mark regions in which the rematerializable register is live.
+  for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
+    auto LiveInIt = DAG.LiveIns[I].find(DefReg);
+    if (LiveInIt != DAG.LiveIns[I].end())
+      LiveIn.set(I);
+    const auto &LiveOuts = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I);
+    if (auto LiveOutIt = LiveOuts.find(DefReg); LiveOutIt != LiveOuts.end())
+      LiveOut.set(I);
+
+    // If the register is both unused and live-through in the region, the
+    // latter's RP is guaranteed to decrease.
+    if (!LiveIn[I] || !LiveOut[I] || I == Remat->UseRegion)
+      UnpredictableRPSave.set(I);
+  }
+  Live |= LiveIn;
+  Live |= LiveOut;
+  RPSave.inc(DefReg, LaneBitmask::getNone(), Remat->Mask, DAG.MRI);
 
-int64_t PreRARematStage::ScoredRemat::getFreqDiff(const FreqInfo &Freq) const {
   // Get frequencies of defining and using regions. A rematerialization from the
   // least frequent region to the most frequent region will yield the greatest
   // latency penalty and therefore should get minimum score. Reciprocally, a
@@ -2975,17 +2986,17 @@ int64_t PreRARematStage::ScoredRemat::getFreqDiff(const FreqInfo &Freq) const {
   int64_t UseOrMax = Freq.Regions[Remat->UseRegion];
   if (!UseOrMax)
     UseOrMax = Freq.MaxFreq;
-  return DefOrMin - UseOrMax;
+  FreqDiff = DefOrMin - UseOrMax;
 }
 
-void PreRARematStage::ScoredRemat::update(const BitVector &TargetRegions,
-                                          ArrayRef<GCNRPTarget> RPTargets,
-                                          const FreqInfo &FreqInfo,
-                                          bool ReduceSpill) {
+bool PreRARematStage::ScoredRemat::update(const BitVector &TargetRegions,
+                                        ArrayRef<GCNRPTarget> RPTargets,
+                                        const FreqInfo &FreqInfo,
+                                        bool ReduceSpill) {
   MaxFreq = 0;
   RegionImpact = 0;
   for (unsigned I : TargetRegions.set_bits()) {
-    if (!Remat->Live[I])
+    if (!Live[I])
       continue;
 
     // The rematerialization must contribute positively in at least one
@@ -2996,14 +3007,12 @@ void PreRARematStage::ScoredRemat::update(const BitVector &TargetRegions,
     if (!NumRegsBenefit)
       continue;
 
-    bool UnusedLT = Remat->isUnusedLiveThrough(I);
-
     // Regions in which RP is guaranteed to decrease have more weight.
-    RegionImpact += (UnusedLT ? 2 : 1) * NumRegsBenefit;
+    RegionImpact += (UnpredictableRPSave[I] ? 1 : 2) * NumRegsBenefit;
 
     if (ReduceSpill) {
       uint64_t Freq = FreqInfo.Regions[I];
-      if (!UnusedLT) {
+      if (UnpredictableRPSave[I]) {
         // Apply a frequency penalty in regions in which we are not sure that RP
         // will decrease.
         Freq /= 2;
@@ -3011,11 +3020,11 @@ void PreRARematStage::ScoredRemat::update(const BitVector &TargetRegions,
       MaxFreq = std::max(MaxFreq, Freq);
     }
   }
+  return !hasNullScore();
 }
 
-MachineInstr *PreRARematStage::ScoredRemat::rematerialize(
-    BitVector &RecomputeRP, SmallVectorImpl<GCNRPTarget> &RPTargets,
-    GCNScheduleDAGMILive &DAG) const {
+MachineInstr *
+PreRARematStage::ScoredRemat::rematerialize(GCNScheduleDAGMILive &DAG) const {
   const SIInstrInfo *TII = DAG.MF.getSubtarget<GCNSubtarget>().getInstrInfo();
   MachineInstr &DefMI = *Remat->DefMI;
   Register Reg = DefMI.getOperand(0).getReg();
@@ -3026,7 +3035,7 @@ MachineInstr *PreRARematStage::ScoredRemat::rematerialize(
   TII->reMaterialize(*InsertPos->getParent(), InsertPos, NewReg, 0, DefMI);
   MachineInstr *RematMI = &*std::prev(InsertPos);
   Remat->UseMI->substituteRegister(Reg, NewReg, 0, *DAG.TRI);
-  Remat->insertMI(Remat->UseRegion, RematMI, DAG);
+  insertMI(Remat->UseRegion, RematMI, DAG);
 
 #ifdef EXPENSIVE_CHECKS
   // All uses are known to be available / live at the remat point. Thus,
@@ -3056,19 +3065,6 @@ MachineInstr *PreRARematStage::ScoredRemat::rematerialize(
     }
   }
 #endif
-
-  // Remove the register from all regions where it is a live-in or live-out
-  // and adjust RP targets. The save is guaranteed in regions in which the
-  // register is live-through and unused but optimistic in all other regions
-  // where the register is live.
-  for (unsigned I : Remat->Live.set_bits()) {
-    RPTargets[I].saveRP(RPSave);
-    DAG.LiveIns[I].erase(Reg);
-    DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I).erase(Reg);
-    if (!Remat->isUnusedLiveThrough(I))
-      RecomputeRP.set(I);
-  }
-
   return RematMI;
 }
 
@@ -3078,8 +3074,10 @@ void PreRARematStage::commitRematerializations() const {
     DAG.deleteMI(Rollback.Remat->DefRegion, Rollback.Remat->DefMI);
 }
 
-void PreRARematStage::unsetSatisfiedRPTargets(const BitVector &Regions) {
+void PreRARematStage::updateRPTargets(const BitVector &Regions,
+                                      const GCNRegPressure &RPSave) {
   for (unsigned I : Regions.set_bits()) {
+    RPTargets[I].saveRP(RPSave);
     if (TargetRegions[I] && RPTargets[I].satisfied()) {
       REMAT_DEBUG(dbgs() << "  [" << I << "] Target reached!\n");
       TargetRegions.reset(I);
@@ -3123,6 +3121,28 @@ bool PreRARematStage::isReMaterializable(const MachineInstr &MI) {
   return true;
 }
 
+void PreRARematStage::removeFromLiveMaps(Register Reg, const BitVector &LiveIn,
+                                         const BitVector &LiveOut) {
+  assert(LiveIn.size() == DAG.Regions.size() && "region num mismatch");
+  assert(LiveOut.size() == DAG.Regions.size() && "region num mismatch");
+  for (unsigned I : LiveIn.set_bits())
+    DAG.LiveIns[I].erase(Reg);
+  for (unsigned I : LiveOut.set_bits())
+    DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I).erase(Reg);
+}
+
+void PreRARematStage::addToLiveMaps(Register Reg, LaneBitmask Mask,
+                                    const BitVector &LiveIn,
+                                    const BitVector &LiveOut) {
+  assert(LiveIn.size() == DAG.Regions.size() && "region num mismatch");
+  assert(LiveOut.size() == DAG.Regions.size() && "region num mismatch");
+  std::pair<Register, LaneBitmask> LiveReg(Reg, Mask);
+  for (unsigned I : LiveIn.set_bits())
+    DAG.LiveIns[I].insert(LiveReg);
+  for (unsigned I : LiveOut.set_bits())
+    DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I).insert(LiveReg);
+}
+
 void PreRARematStage::finalizeGCNSchedStage() {
   // We consider that reducing spilling is always beneficial so we never
   // rollback rematerializations or revert scheduling in such cases.
@@ -3147,9 +3167,10 @@ void PreRARematStage::finalizeGCNSchedStage() {
   // are treated as non-debug instructions by the revert logic.
   if (ShouldRollbackRemats) {
     for (const RollbackInfo &Rollback : Rollbacks) {
-      const auto &[Remat, RematMI, RegMap] = Rollback;
-      Remat->DefMI->setDesc(DAG.TII->get(RematMI->getOpcode()));
-      for (const auto &[MOIdx, Reg] : RegMap)
+      const RematReg *Remat = Rollback.Remat;
+      MachineInstr *RematMI = Rollback.RematMI;
+      Rollback.Remat->DefMI->setDesc(DAG.TII->get(RematMI->getOpcode()));
+      for (const auto &[MOIdx, Reg] : Rollback.RegMap)
         Remat->DefMI->getOperand(MOIdx).setReg(Reg);
     }
   }
@@ -3159,7 +3180,7 @@ void PreRARematStage::finalizeGCNSchedStage() {
     REMAT_DEBUG(dbgs() << "Reverting re-scheduling in region " << RegionIdx
                        << '\n');
     DAG.Pressure[RegionIdx] = MaxPressure;
-    modifyRegionSchedule(RegionIdx, RegionBB[RegionIdx], OrigMIOrder);
+    modifyRegionSchedule(RegionIdx, OrigMIOrder);
   }
 
   if (!ShouldRollbackRemats) {
@@ -3174,10 +3195,10 @@ void PreRARematStage::finalizeGCNSchedStage() {
   // Finish rolling back rematerializations, then recompute pressure in all
   // affected regions.
   REMAT_DEBUG(dbgs() << "==== ROLLBACK ====\n");
-  BitVector RecomputeRP(DAG.Regions.size());
   DenseSet<Register> RecomputeLI;
   for (const RollbackInfo &Rollback : Rollbacks) {
-    const auto &[Remat, RematMI, RegMap] = Rollback;
+    const RematReg *Remat = Rollback.Remat;
+    MachineInstr *RematMI = Rollback.RematMI;
 
     // Switch back to using the original register and delete the
     // rematerialization.
@@ -3187,16 +3208,8 @@ void PreRARematStage::finalizeGCNSchedStage() {
     REMAT_DEBUG(dbgs() << '[' << Remat->UseRegion
                        << "] Deleting rematerialization " << *RematMI);
     DAG.deleteMI(Remat->UseRegion, RematMI);
+    addToLiveMaps(OriginalReg, Remat->Mask, Rollback.LiveIn, Rollback.LiveOut);
 
-    // Re-add the defined register as a live-in/live-out in all regions it used
-    // to be one in.
-    std::pair<Register, LaneBitmask> LiveReg(OriginalReg, Remat->Mask);
-    for (unsigned I : Remat->LiveIn.set_bits())
-      DAG.LiveIns[I].insert(LiveReg);
-    for (unsigned I : Remat->LiveOut.set_bits())
-      DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I).insert(LiveReg);
-
-    RecomputeRP |= Rollback.Remat->Live;
     // Regenerate intervals for all register operands of rematerialized MIs as
     // slot indices may have changed slightly from before re-scheduling.
     for (MachineOperand &MO : Rollback.Remat->DefMI->operands()) {
@@ -3213,7 +3226,7 @@ void PreRARematStage::finalizeGCNSchedStage() {
   // which reverts and/or rollbacks may have happened.
   MF.verify();
 #endif
-  for (unsigned I : RecomputeRP.set_bits())
+  for (unsigned I : RescheduleRegions.set_bits())
     DAG.Pressure[I] = DAG.getRealRegPressure(I);
 
   GCNSchedStage::finalizeGCNSchedStage();
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index ae86388af5545..262f5dcb2176a 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -15,10 +15,8 @@
 
 #include "GCNRegPressure.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/MapVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
-#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineScheduler.h"
 
@@ -417,11 +415,10 @@ class GCNSchedStage {
   // Returns true if the new schedule may result in more spilling.
   bool mayCauseSpilling(unsigned WavesAfter);
 
-  /// Sets the schedule of region \p RegionIdx in block \p MBB to \p MIOrder.
-  /// The MIs in \p MIOrder must be exactly the same as the ones currently
-  /// existing inside the region, only in a different order that honors def-use
-  /// chains.
-  void modifyRegionSchedule(unsigned RegionIdx, MachineBasicBlock *MBB,
+  /// Sets the schedule of region \p RegionIdx to \p MIOrder. The MIs in \p
+  /// MIOrder must be exactly the same as the ones currently existing inside the
+  /// region, only in a different order that honors def-use chains.
+  void modifyRegionSchedule(unsigned RegionIdx,
                             ArrayRef<MachineInstr *> MIOrder);
 
   void advanceRegion() { RegionIdx++; }
@@ -554,12 +551,10 @@ class PreRARematStage : public GCNSchedStage {
     MachineInstr *DefMI;
     /// Single user of the rematerializable register.
     MachineInstr *UseMI;
-    /// Regions in which the register is live-in/live-out/live anywhere.
-    BitVector LiveIn, LiveOut, Live;
-    /// The rematerializable register's lane bitmask.
-    LaneBitmask Mask;
     /// Defining and using regions.
     unsigned DefRegion, UseRegion;
+    /// The rematerializable register's lane bitmask.
+    LaneBitmask Mask;
 
     RematReg(MachineInstr *DefMI, MachineInstr *UseMI,
              GCNScheduleDAGMILive &DAG,
@@ -568,24 +563,6 @@ class PreRARematStage : public GCNSchedStage {
     /// Returns the rematerializable register. Do not call after deleting the
     /// original defining instruction.
     Register getReg() const { return DefMI->getOperand(0).getReg(); }
-
-    /// Determines whether this rematerialization may be beneficial in at least
-    /// one target region.
-    bool maybeBeneficial(const BitVector &TargetRegions,
-                         ArrayRef<GCNRPTarget> RPTargets) const;
-
-    /// Determines if the register is both unused and live-through in region \p
-    /// I. This guarantees that rematerializing it will reduce RP in the region.
-    bool isUnusedLiveThrough(unsigned I) const {
-      assert(I < Live.size() && "region index out of range");
-      return LiveIn[I] && LiveOut[I] && I != UseRegion;
-    }
-
-    /// Updates internal structures following a MI rematerialization. Part of
-    /// the stage instead of the DAG because it makes assumptions that are
-    /// specific to the rematerialization process.
-    void insertMI(unsigned RegionIdx, MachineInstr *RematMI,
-                  GCNScheduleDAGMILive &DAG) const;
   };
 
   /// A scored rematerialization candidate. Higher scores indicate more
@@ -594,6 +571,15 @@ class PreRARematStage : public GCNSchedStage {
   struct ScoredRemat {
     /// The rematerializable register under consideration.
     RematReg *Remat;
+    /// Regions in which the register is live-in/live-out/live anywhere.
+    BitVector LiveIn, LiveOut, Live;
+    /// Subset of \ref Live regions in which the rematerialization is not
+    /// guaranteed to reduce RP (i.e., regions in which the register is not
+    /// live-through and unused).
+    BitVector UnpredictableRPSave;
+    /// Expected register pressure decrease induced by rematerializing this
+    /// candidate.
+    GCNRegPressure RPSave;
 
     /// Execution frequency information required by scoring heuristics.
     /// Frequencies are scaled down if they are high to avoid overflow/underflow
@@ -613,7 +599,21 @@ class PreRARematStage : public GCNSchedStage {
     /// This only initializes state-independent characteristics of \p Remat, not
     /// the actual score.
     ScoredRemat(RematReg *Remat, const FreqInfo &Freq,
-                const GCNScheduleDAGMILive &DAG);
+                GCNScheduleDAGMILive &DAG);
+
+    /// Rematerializes the candidate at its use and returns the new MI.
+    MachineInstr *rematerialize(GCNScheduleDAGMILive &DAG) const;
+
+    /// Determines whether this rematerialization may be beneficial in at least
+    /// one target region.
+    bool maybeBeneficial(const BitVector &TargetRegions,
+                         ArrayRef<GCNRPTarget> RPTargets) const;
+
+    /// Updates internal structures following a MI rematerialization. Part of
+    /// the stage instead of the DAG because it makes assumptions that are
+    /// specific to the rematerialization process.
+    void insertMI(unsigned RegionIdx, MachineInstr *RematMI,
+                  GCNScheduleDAGMILive &DAG) const;
 
     /// Rematerializes the candidate and returns the new MI. This removes the
     /// rematerialized register from live-in/out lists in the \p DAG and updates
@@ -624,21 +624,20 @@ class PreRARematStage : public GCNSchedStage {
                                 GCNScheduleDAGMILive &DAG) const;
 
     /// Updates the rematerialization's score w.r.t. the current \p RPTargets.
-    /// \p RegionFreq indicates the frequency of each region
-    void update(const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets,
+    /// \p RegionFreq indicates the frequency of each region. Returns whether
+    /// the new score is null.
+    bool update(const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets,
                 const FreqInfo &Freq, bool ReduceSpill);
 
     /// Returns whether the current score is null, indicating the
     /// rematerialization is useless.
     bool hasNullScore() const { return !RegionImpact; }
 
-    /// Compare score components of non-null scores pair-wise. A null score is
-    /// always strictly lesser than another non-null score.
+    /// Compare score components of non-null scores pair-wise. Scores shouldn't
+    /// be null (as defined by \ref getNullScore).
     bool operator<(const ScoredRemat &O) const {
-      if (hasNullScore())
-        return !O.hasNullScore();
-      if (O.hasNullScore())
-        return false;
+      assert(!hasNullScore() && "this has null score");
+      assert(!O.hasNullScore() && "other has null score");
       if (MaxFreq != O.MaxFreq)
         return MaxFreq < O.MaxFreq;
       if (FreqDiff != O.FreqDiff)
@@ -657,11 +656,7 @@ class PreRARematStage : public GCNSchedStage {
     Printable print() const;
 #endif
 
-  private:
-    /// Expected register pressure decrease induced by rematerializing this
-    /// candidate.
-    GCNRegPressure RPSave;
-
+  private:    
     // The three members below are the scoring components, top to bottom from
     // most important to least important when comparing candidates.
 
@@ -676,12 +671,8 @@ class PreRARematStage : public GCNSchedStage {
     /// Expected number of target regions impacted by the rematerialization,
     /// scaled by the size of the register being rematerialized.
     unsigned RegionImpact;
-
-    int64_t getFreqDiff(const FreqInfo &Freq) const;
   };
 
-  /// Parent MBB to each region, in region order.
-  SmallVector<MachineBasicBlock *> RegionBB;
   /// Register pressure targets for all regions.
   SmallVector<GCNRPTarget> RPTargets;
   /// Regions which are above the stage's RP target.
@@ -703,12 +694,16 @@ class PreRARematStage : public GCNSchedStage {
   struct RollbackInfo {
     /// The rematerializable register under consideration.
     const RematReg *Remat;
+    /// Regions in which the original register was live-in or live-out.
+    BitVector LiveIn, LiveOut;
     /// The rematerialized MI replacing the original defining MI.
     MachineInstr *RematMI;
     /// Maps register machine operand indices to their original register.
     SmallDenseMap<unsigned, Register, 4> RegMap;
 
-    RollbackInfo(const RematReg *Remat) : Remat(Remat) {}
+    RollbackInfo(const RematReg *Remat, const BitVector &LiveIn,
+                 const BitVector &LiveOut)
+        : Remat(Remat), LiveIn(LiveIn), LiveOut(LiveOut) {}
   };
   /// List of rematerializations to rollback if rematerialization does not end
   /// up being beneficial.
@@ -732,6 +727,8 @@ class PreRARematStage : public GCNSchedStage {
   /// After re-scheduling, contains pre-re-scheduling data for all re-scheduled
   /// regions.
   SmallVector<RegionSchedRevert> RegionReverts;
+  /// Whether we should revert all re-scheduled regions.
+  bool RevertAllRegions = false;
 
   /// Returns the occupancy the stage is trying to achieve.
   unsigned getStageTargetOccupancy() const;
@@ -742,8 +739,9 @@ class PreRARematStage : public GCNSchedStage {
   /// TargetRegions. Returns whether there is any target region.
   bool setObjective();
 
-  /// Unsets target regions in \p Regions whose RP target has been reached.
-  void unsetSatisfiedRPTargets(const BitVector &Regions);
+  /// In all regions set in \p Regions, saves pressure \p RPSave and clear it as
+  /// a target if its RP target has been reached.
+  void updateRPTargets(const BitVector &Regions, const GCNRegPressure &RPSave);
 
   /// Fully recomputes RP from the DAG in \p Regions. Among those regions, sets
   /// again all \ref TargetRegions that were optimistically marked as satisfied
@@ -762,6 +760,16 @@ class PreRARematStage : public GCNSchedStage {
   /// Whether the MI is rematerializable
   bool isReMaterializable(const MachineInstr &MI);
 
+  /// Removes register \p Reg from the live-ins of regions set in \p LiveIn and
+  /// the live-outs of regions set in \p LiveOut.
+  void removeFromLiveMaps(Register Reg, const BitVector &LiveIn,
+                          const BitVector &LiveOut);
+
+  /// Adds register \p Reg with mask \p Mask to the live-ins of regions set in
+  /// \p LiveIn and the live-outs of regions set in \p LiveOut.
+  void addToLiveMaps(Register Reg, LaneBitmask Mask, const BitVector &LiveIn,
+                     const BitVector &LiveOut);
+
   /// If remat alone did not increase occupancy to the target one, rollbacks all
   /// rematerializations and resets live-ins/RP in all regions impacted by the
   /// stage to their pre-stage values.
@@ -781,7 +789,6 @@ class PreRARematStage : public GCNSchedStage {
         RescheduleRegions(DAG.Regions.size()) {
     const unsigned NumRegions = DAG.Regions.size();
     RPTargets.reserve(NumRegions);
-    RegionBB.reserve(NumRegions);
   }
 };
 

>From fcc69c380b3d4d791c533dc6f1d13009621797cb Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Mon, 30 Mar 2026 21:37:26 +0000
Subject: [PATCH 2/3] Format

---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 10 +++++-----
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.h   |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index f6e598ce69752..78b450c8814d9 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -2911,8 +2911,8 @@ bool PreRARematStage::ScoredRemat::maybeBeneficial(
 }
 
 void PreRARematStage::ScoredRemat::insertMI(unsigned RegionIdx,
-                                          MachineInstr *RematMI,
-                                          GCNScheduleDAGMILive &DAG) const {
+                                            MachineInstr *RematMI,
+                                            GCNScheduleDAGMILive &DAG) const {
   RegionBoundaries &Bounds = DAG.Regions[RegionIdx];
   if (Bounds.first == std::next(MachineBasicBlock::iterator(RematMI)))
     Bounds.first = RematMI;
@@ -2990,9 +2990,9 @@ PreRARematStage::ScoredRemat::ScoredRemat(RematReg *Remat, const FreqInfo &Freq,
 }
 
 bool PreRARematStage::ScoredRemat::update(const BitVector &TargetRegions,
-                                        ArrayRef<GCNRPTarget> RPTargets,
-                                        const FreqInfo &FreqInfo,
-                                        bool ReduceSpill) {
+                                          ArrayRef<GCNRPTarget> RPTargets,
+                                          const FreqInfo &FreqInfo,
+                                          bool ReduceSpill) {
   MaxFreq = 0;
   RegionImpact = 0;
   for (unsigned I : TargetRegions.set_bits()) {
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 262f5dcb2176a..b9c4dcdd8b7ac 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -656,7 +656,7 @@ class PreRARematStage : public GCNSchedStage {
     Printable print() const;
 #endif
 
-  private:    
+  private:
     // The three members below are the scoring components, top to bottom from
     // most important to least important when comparing candidates.
 

>From 9af070b42ebd73cb37749414b3ae5b0b6049cc70 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Thu, 2 Apr 2026 10:53:31 +0000
Subject: [PATCH 3/3] Address feedback

---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 32 +++++++++++----------
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.h   |  8 +++---
 2 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 78b450c8814d9..d63325c03cc71 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1534,13 +1534,13 @@ bool PreRARematStage::initGCNSchedStage() {
     return false;
   }
   const ScoredRemat::FreqInfo FreqInfo(MF, DAG);
-  SmallVector<ScoredRemat, 8> Candidates;
-  Candidates.reserve(RematRegs.size());
+  SmallVector<ScoredRemat, 8> Candidates(RematRegs.size());
   SmallVector<unsigned> CandidateOrder, NewCandidateOrder;
-  for (RematReg &Remat : RematRegs) {
-    ScoredRemat &Candidate = Candidates.emplace_back(&Remat, FreqInfo, DAG);
+  for (auto [I, Remat] : enumerate(RematRegs)) {
+    ScoredRemat &Candidate = Candidates[I];
+    Candidate.init(&Remat, FreqInfo, DAG);
     if (Candidate.update(TargetRegions, RPTargets, FreqInfo, !TargetOcc))
-      CandidateOrder.push_back(Candidates.size() - 1);
+      CandidateOrder.push_back(I);
   }
 
   REMAT_DEBUG({
@@ -2951,19 +2951,21 @@ PreRARematStage::ScoredRemat::FreqInfo::FreqInfo(
   }
 }
 
-PreRARematStage::ScoredRemat::ScoredRemat(RematReg *Remat, const FreqInfo &Freq,
-                                          GCNScheduleDAGMILive &DAG)
-    : Remat(Remat), LiveIn(DAG.Regions.size()), LiveOut(DAG.Regions.size()),
-      Live(DAG.Regions.size()), UnpredictableRPSave(DAG.Regions.size()) {
-  Register DefReg = Remat->getReg();
+void PreRARematStage::ScoredRemat::init(RematReg *Remat, const FreqInfo &Freq,
+                                        GCNScheduleDAGMILive &DAG) {
+  this->Remat = Remat;
+  const unsigned NumRegions = DAG.Regions.size();
+  LiveIn.resize(NumRegions);
+  LiveOut.resize(NumRegions);
+  Live.resize(NumRegions);
+  UnpredictableRPSave.resize(NumRegions);
 
   // Mark regions in which the rematerializable register is live.
-  for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
-    auto LiveInIt = DAG.LiveIns[I].find(DefReg);
-    if (LiveInIt != DAG.LiveIns[I].end())
+  Register DefReg = Remat->getReg();
+  for (unsigned I = 0, E = NumRegions; I != E; ++I) {
+    if (DAG.LiveIns[I].contains(DefReg))
       LiveIn.set(I);
-    const auto &LiveOuts = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I);
-    if (auto LiveOutIt = LiveOuts.find(DefReg); LiveOutIt != LiveOuts.end())
+    if (DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I).contains(DefReg))
       LiveOut.set(I);
 
     // If the register is both unused and live-through in the region, the
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index b9c4dcdd8b7ac..c6f5eec58b7e1 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -596,10 +596,10 @@ class PreRARematStage : public GCNSchedStage {
       static const uint64_t ScaleFactor = 1024;
     };
 
-    /// This only initializes state-independent characteristics of \p Remat, not
-    /// the actual score.
-    ScoredRemat(RematReg *Remat, const FreqInfo &Freq,
-                GCNScheduleDAGMILive &DAG);
+    /// Initializes the candidate with state-independent characteristics for a
+    /// particular \p Remat. This doesn't update the actual score (call \ref
+    /// update for this).
+    void init(RematReg *Remat, const FreqInfo &Freq, GCNScheduleDAGMILive &DAG);
 
     /// Rematerializes the candidate at its use and returns the new MI.
     MachineInstr *rematerialize(GCNScheduleDAGMILive &DAG) const;



More information about the llvm-branch-commits mailing list