[llvm] [AMDGPU][Scheduler] Scoring system for rematerialization candidates (PR #153092)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 26 16:13:48 PDT 2025
================
@@ -432,66 +436,208 @@ class ClusteredLowOccStage : public GCNSchedStage {
};
/// Attempts to reduce function spilling or, if there is no spilling, to
-/// increase function occupancy by one with respect to ArchVGPR usage by sinking
-/// trivially rematerializable instructions to their use. When the stage
-/// estimates reducing spilling or increasing occupancy is possible, as few
-/// instructions as possible are rematerialized to reduce potential negative
+/// increase function occupancy by one with respect to register usage by sinking
+/// rematerializable instructions to their use. When the stage estimates that
+/// reducing spilling or increasing occupancy is possible, it tries to
+/// rematerialize as few registers as possible to reduce potential negative
/// effects on function latency.
class PreRARematStage : public GCNSchedStage {
private:
- /// Useful information about a rematerializable instruction.
- struct RematInstruction {
- /// Single use of the rematerializable instruction's defined register,
- /// located in a different block.
+ /// Groups information about a rematerializable register.
+ struct RematReg {
+ /// Single MI defining the rematerializable register.
+ MachineInstr *DefMI;
+ /// Single user of the rematerializable register.
MachineInstr *UseMI;
- /// Rematerialized version of \p DefMI, set in
- /// PreRARematStage::rematerialize. Used for reverting rematerializations.
- MachineInstr *RematMI;
- /// Set of regions in which the rematerializable instruction's defined
- /// register is a live-in.
- SmallDenseSet<unsigned, 4> LiveInRegions;
-
- RematInstruction(MachineInstr *UseMI) : UseMI(UseMI) {}
+ /// Using region.
+ unsigned UseRegion;
+ /// Regions in which the register is live-in/live-out/live anywhere.
+ BitVector LiveIn, LiveOut, Live;
+ /// The rematerializable register's lane bitmask.
+ LaneBitmask Mask;
+ /// Frequency of region defining/using the register. 0 when unknown.
+ unsigned DefFrequency, UseFrequency;
+
+ RematReg(MachineInstr *DefMI, MachineInstr *UseMI,
+ GCNScheduleDAGMILive &DAG,
+ const DenseMap<MachineInstr *, unsigned> &MIRegion,
+ ArrayRef<uint64_t> RegionFreq);
+
+ /// Returns whether the regions at which the register is live intersects
+ /// with the \p Target regions.
+ bool intersectWithTarget(BitVector Target) const {
+ Target &= Live;
+ return Target.any();
+ }
+
+ /// Returns whether is is always beneficial to rematerialize this register.
+ bool isAlwaysBeneficial() const {
+ // When the using region is executed a single time, we know
+ // rematerializing will be beneficial whatever the defining region's
+ // frequency.
+ if (UseFrequency == 1)
+ return true;
+ // When there is uncertainty on the defining or using frequency, we err on
+ // the conservative side and do not consider the rematerialization always
+ // beneficial.
+ if (!DefFrequency || !UseFrequency)
+ return false;
+ return UseFrequency <= DefFrequency;
+ }
+
+ /// Determines whether rematerializing the register is guaranteed to reduce
+ /// pressure in the region.
+ bool isBeneficialRegion(unsigned I) const {
+ assert(I < Live.size() && "region index out of range");
+ return LiveIn[I] && LiveOut[I] && I != UseRegion;
+ }
+
+ /// Determines whether rematerializing the register can but is not
+ /// guaranteed to reduce pressure in the region.
+ bool isMaybeBeneficialRegion(unsigned I) const {
+ assert(I < Live.size() && "region index out of range");
+ return Live[I] && !isBeneficialRegion(I);
+ }
+
+ /// Updates internal structures following a MI rematerialization. Part of
+ /// the stage instead of the DAG because it makes assumptions that are
+ /// specific to the rematerialization process.
+ MachineInstr *insertMI(unsigned RegionIdx,
+ MachineBasicBlock::iterator InsertPos,
+ GCNScheduleDAGMILive &DAG) const;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void print(const DenseMap<MachineInstr *, unsigned> &MIRegion) const;
+#endif
+ };
+
+ /// A scored rematerializable register. Higher scores indicate more beneficial
+ /// rematerializations. Non-positive scores indicate the rematerialization is
+ /// not helpful to reduce RP in target regions.
+ struct ScoredRemat {
+ /// The rematerializable register under consideration.
+ const RematReg *Remat;
+
+ /// This only initializes state-independent characteristics of \p Remat, not
+ /// the actual score.
+ ScoredRemat(const RematReg *Remat, const GCNSubtarget &ST,
+ const TargetInstrInfo &TII);
+
+ /// Updates the rematerialization's score w.r.t. the current \p RPTargets.
+ /// \p RegionFreq indicates the frequency of each region
+ void update(const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets,
+ ArrayRef<uint64_t> RegionFreq, bool ReduceSpill);
+
+ int getScore() const { return Score; }
+
+ bool operator<(const ScoredRemat &O) const {
+ // Break ties using pointer to rematerializable register. Since
+ // rematerializations are collected in instruction order, registers
+ // appearing earlier have a "higher score" than those appearing later.
+ if (Score == O.Score)
+ return Remat > O.Remat;
+ return Score < O.Score;
+ }
+
+ private:
+ /// Estimated save/restore latency costs for spilling a register to stack.
+ /// FIXME: These numbers are very arbitrary. Need a good rationale for them,
+ /// which I don't know where to get from.
+ static constexpr int SaveCost = 100, RestoreCost = 100;
----------------
jrbyrnes wrote:
May want to query the SchedModel for this.
https://github.com/llvm/llvm-project/pull/153092
More information about the llvm-commits
mailing list