[llvm] [AMDGPU][Scheduler] Scoring system for rematerialization candidates (PR #153092)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 27 16:31:10 PDT 2025
================
@@ -432,65 +428,215 @@ class ClusteredLowOccStage : public GCNSchedStage {
};
/// Attempts to reduce function spilling or, if there is no spilling, to
-/// increase function occupancy by one with respect to ArchVGPR usage by sinking
-/// rematerializable instructions to their use. When the stage
-/// estimates reducing spilling or increasing occupancy is possible, as few
-/// instructions as possible are rematerialized to reduce potential negative
+/// increase function occupancy by one with respect to register usage by sinking
+/// rematerializable instructions to their use. When the stage estimates that
+/// reducing spilling or increasing occupancy is possible, it tries to
+/// rematerialize as few registers as possible to reduce potential negative
/// effects on function latency.
+///
+/// The stage only supports rematerializing registers that meet all of the
+/// following constraints.
+/// 1. The register is virtual and has a single defining instruction.
+/// 2. The single defining instruction is either deemed rematerializable by the
+/// target-independent logic, or if not, has no non-constant and
+/// non-ignorable physical register use.
+/// 3 The register has no virtual register use whose live range would be
+/// extended by the rematerialization.
+/// 4. The register has a single non-debug user in a different region from its
+/// defining region.
+/// 5. The register is not used by or using another register that is going to be
+/// rematerialized.
class PreRARematStage : public GCNSchedStage {
private:
- /// Useful information about a rematerializable instruction.
- struct RematInstruction {
- /// Single use of the rematerializable instruction's defined register,
- /// located in a different block.
+ /// A rematerializable register.
+ struct RematReg {
+ /// Single MI defining the rematerializable register.
+ MachineInstr *DefMI;
+ /// Single user of the rematerializable register.
MachineInstr *UseMI;
- /// Rematerialized version of \p DefMI, set in
- /// PreRARematStage::rematerialize. Used for reverting rematerializations.
+ /// Regions in which the register is live-in/live-out/live anywhere.
+ BitVector LiveIn, LiveOut, Live;
+ /// The rematerializable register's lane bitmask.
+ LaneBitmask Mask;
+ /// Defining and using regions.
+ unsigned DefRegion, UseRegion;
+
+ RematReg(MachineInstr *DefMI, MachineInstr *UseMI,
+ GCNScheduleDAGMILive &DAG,
+ const DenseMap<MachineInstr *, unsigned> &MIRegion);
+
+ /// Returns the rematerializable register. Do not call after deleting the
+ /// original defining instruction.
+ Register getReg() const { return DefMI->getOperand(0).getReg(); }
+
+ /// Determines whether this rematerialization may be beneficial in at least
+ /// one target region.
+ bool maybeBeneficial(const BitVector &TargetRegions,
+ ArrayRef<GCNRPTarget> RPTargets) const;
+
+ /// Determines if the register is both unused and live-through in region \p
+ /// I. This guarantees that rematerializing it will reduce RP in the region.
+ bool isUnusedLiveThrough(unsigned I) const {
+ assert(I < Live.size() && "region index out of range");
+ return LiveIn[I] && LiveOut[I] && I != UseRegion;
+ }
+
+ /// Updates internal structures following a MI rematerialization. Part of
+ /// the stage instead of the DAG because it makes assumptions that are
+ /// specific to the rematerialization process.
+ void insertMI(unsigned RegionIdx, MachineInstr *RematMI,
+ GCNScheduleDAGMILive &DAG) const;
+ };
+
+ /// A scored rematerialization candidate. Higher scores indicate more
+ /// beneficial rematerializations. A null score indicate the rematerialization
+ /// is not helpful to reduce RP in target regions.
+ struct ScoredRemat {
+ /// The rematerializable register under consideration.
+ const RematReg *Remat;
+
+ /// Execution frequency information required by scoring heuristics.
+ struct FreqInfo {
+ /// Per-region execution frequencies, normalized to minimum observed
+ /// frequency. 0 when unknown.
+ SmallVector<uint64_t> Regions;
+ /// Maximum observed frequency, normalized to minimum observed frequency.
+ uint64_t MaxFreq = 0;
+
+ FreqInfo(MachineFunction &MF, const GCNScheduleDAGMILive &DAG);
+ };
+
+ /// This only initializes state-independent characteristics of \p Remat, not
+ /// the actual score.
+ ScoredRemat(const RematReg *Remat, const FreqInfo &Freq,
+ const GCNScheduleDAGMILive &DAG);
+
+ /// Updates the rematerialization's score w.r.t. the current \p RPTargets.
+ /// \p RegionFreq indicates the frequency of each region
+ void update(const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets,
+ const FreqInfo &Freq, bool ReduceSpill);
+
+ /// Returns whether the current score is null, indicating the
+ /// rematerialization is useless.
+ bool hasNullScore() const { return !MaxFreq && !RegionImpact; }
+
+ /// For each pair of candidates the most important scoring component with
+ /// non-equal values determine the result of the comparison (higher is
+ /// better).
+ bool operator<(const ScoredRemat &O) const {
+ if (hasNullScore())
+ return true;
+ if (O.hasNullScore())
+ return false;
+ if (MaxFreq != O.MaxFreq)
+ return MaxFreq < O.MaxFreq;
+ if (FreqDiff != O.FreqDiff)
+ return FreqDiff < O.FreqDiff;
+ if (RegionImpact != O.RegionImpact)
+ return RegionImpact < O.RegionImpact;
+ // Break ties using pointer to rematerializable register.
+ return Remat > O.Remat;
----------------
jrbyrnes wrote:
Should it be `Remat < 0.Remat` ?
https://github.com/llvm/llvm-project/pull/153092
More information about the llvm-commits
mailing list