[llvm] [AMDGPU][Scheduler] Scoring system for rematerialization candidates (PR #153092)

Jeffrey Byrnes via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 27 16:31:10 PDT 2025


================
@@ -432,65 +428,215 @@ class ClusteredLowOccStage : public GCNSchedStage {
 };
 
 /// Attempts to reduce function spilling or, if there is no spilling, to
-/// increase function occupancy by one with respect to ArchVGPR usage by sinking
-/// rematerializable instructions to their use. When the stage
-/// estimates reducing spilling or increasing occupancy is possible, as few
-/// instructions as possible are rematerialized to reduce potential negative
+/// increase function occupancy by one with respect to register usage by sinking
+/// rematerializable instructions to their use. When the stage estimates that
+/// reducing spilling or increasing occupancy is possible, it tries to
+/// rematerialize as few registers as possible to reduce potential negative
 /// effects on function latency.
+///
+/// The stage only supports rematerializing registers that meet all of the
+/// following constraints.
+/// 1. The register is virtual and has a single defining instruction.
+/// 2. The single defining instruction is either deemed rematerializable by the
+///    target-independent logic, or if not, has no non-constant and
+///    non-ignorable physical register use.
+/// 3  The register has no virtual register use whose live range would be
+///    extended by the rematerialization.
+/// 4. The register has a single non-debug user in a different region from its
+///    defining region.
+/// 5. The register is not used by or using another register that is going to be
+///    rematerialized.
 class PreRARematStage : public GCNSchedStage {
 private:
-  /// Useful information about a rematerializable instruction.
-  struct RematInstruction {
-    /// Single use of the rematerializable instruction's defined register,
-    /// located in a different block.
+  /// A rematerializable register.
+  struct RematReg {
+    /// Single MI defining the rematerializable register.
+    MachineInstr *DefMI;
+    /// Single user of the rematerializable register.
     MachineInstr *UseMI;
-    /// Rematerialized version of \p DefMI, set in
-    /// PreRARematStage::rematerialize. Used for reverting rematerializations.
+    /// Regions in which the register is live-in/live-out/live anywhere.
+    BitVector LiveIn, LiveOut, Live;
+    /// The rematerializable register's lane bitmask.
+    LaneBitmask Mask;
+    /// Defining and using regions.
+    unsigned DefRegion, UseRegion;
+
+    RematReg(MachineInstr *DefMI, MachineInstr *UseMI,
+             GCNScheduleDAGMILive &DAG,
+             const DenseMap<MachineInstr *, unsigned> &MIRegion);
+
+    /// Returns the rematerializable register. Do not call after deleting the
+    /// original defining instruction.
+    Register getReg() const { return DefMI->getOperand(0).getReg(); }
+
+    /// Determines whether this rematerialization may be beneficial in at least
+    /// one target region.
+    bool maybeBeneficial(const BitVector &TargetRegions,
+                         ArrayRef<GCNRPTarget> RPTargets) const;
+
+    /// Determines if the register is both unused and live-through in region \p
+    /// I. This guarantees that rematerializing it will reduce RP in the region.
+    bool isUnusedLiveThrough(unsigned I) const {
+      assert(I < Live.size() && "region index out of range");
+      return LiveIn[I] && LiveOut[I] && I != UseRegion;
+    }
+
+    /// Updates internal structures following a MI rematerialization. Part of
+    /// the stage instead of the DAG because it makes assumptions that are
+    /// specific to the rematerialization process.
+    void insertMI(unsigned RegionIdx, MachineInstr *RematMI,
+                  GCNScheduleDAGMILive &DAG) const;
+  };
+
+  /// A scored rematerialization candidate. Higher scores indicate more
+  /// beneficial rematerializations. A null score indicate the rematerialization
+  /// is not helpful to reduce RP in target regions.
+  struct ScoredRemat {
+    /// The rematerializable register under consideration.
+    const RematReg *Remat;
+
+    /// Execution frequency information required by scoring heuristics.
+    struct FreqInfo {
+      /// Per-region execution frequencies, normalized to minimum observed
+      /// frequency. 0 when unknown.
+      SmallVector<uint64_t> Regions;
+      /// Maximum observed frequency, normalized to minimum observed frequency.
+      uint64_t MaxFreq = 0;
+
+      FreqInfo(MachineFunction &MF, const GCNScheduleDAGMILive &DAG);
+    };
+
+    /// This only initializes state-independent characteristics of \p Remat, not
+    /// the actual score.
+    ScoredRemat(const RematReg *Remat, const FreqInfo &Freq,
+                const GCNScheduleDAGMILive &DAG);
+
+    /// Updates the rematerialization's score w.r.t. the current \p RPTargets.
+    /// \p RegionFreq indicates the frequency of each region
+    void update(const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets,
+                const FreqInfo &Freq, bool ReduceSpill);
+
+    /// Returns whether the current score is null, indicating the
+    /// rematerialization is useless.
+    bool hasNullScore() const { return !MaxFreq && !RegionImpact; }
+
+    /// For each pair of candidates the most important scoring component with
+    /// non-equal values determine the result of the comparison (higher is
+    /// better).
+    bool operator<(const ScoredRemat &O) const {
+      if (hasNullScore())
+        return true;
+      if (O.hasNullScore())
+        return false;
+      if (MaxFreq != O.MaxFreq)
+        return MaxFreq < O.MaxFreq;
+      if (FreqDiff != O.FreqDiff)
+        return FreqDiff < O.FreqDiff;
+      if (RegionImpact != O.RegionImpact)
+        return RegionImpact < O.RegionImpact;
+      // Break ties using pointer to rematerializable register.
+      return Remat > O.Remat;
----------------
jrbyrnes wrote:

Should it be `Remat < 0.Remat` ?

https://github.com/llvm/llvm-project/pull/153092


More information about the llvm-commits mailing list