[llvm] [AMDGPU][Scheduler] Scoring system for rematerialization candidates (PR #153092)
Lucas Ramirez via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 20 13:41:06 PDT 2025
================
@@ -1090,33 +1101,223 @@ bool ClusteredLowOccStage::initGCNSchedStage() {
#define REMAT_PREFIX "[PreRARemat] "
#define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void PreRARematStage::printTargetRegions(bool PrintAll) const {
+ if (PrintAll) {
+ for (auto [I, Target] : enumerate(RPTargets))
+ dbgs() << REMAT_PREFIX << " [" << I << "] " << Target << '\n';
+ return;
+ }
+ if (TargetRegions.none()) {
+ dbgs() << REMAT_PREFIX << "No target regions\n";
+ return;
+ }
+ dbgs() << REMAT_PREFIX << "Target regions:\n";
+ for (unsigned I : TargetRegions.set_bits())
+ dbgs() << REMAT_PREFIX << " [" << I << "] " << RPTargets[I] << '\n';
+}
+
+void PreRARematStage::RematReg::print() const {
+ dbgs() << REMAT_PREFIX << " [" << DefRegion << "] " << *DefMI;
+ dbgs() << REMAT_PREFIX << " -> used in [" << UseRegion << "] " << *UseMI;
+ dbgs() << REMAT_PREFIX << " Guaranteed RP reduction in:";
+ for (unsigned I : Live.set_bits()) {
+ if (isUnusedLiveThrough(I))
+ dbgs() << " [" << I << "]";
+ }
+ dbgs() << '\n';
+ dbgs() << REMAT_PREFIX << " Possible RP reduction in:";
+ for (unsigned I : Live.set_bits()) {
+ if (!isUnusedLiveThrough(I))
+ dbgs() << " [" << I << "]";
+ }
+ dbgs() << '\n';
+}
+
+void PreRARematStage::ScoredRemat::print() const {
+ ScoreTy ShiftScore = Score;
+ ScoreTy RegionImpact = ShiftScore & ((1 << RegionImpactWidth) - 1);
+ ShiftScore >>= RegionImpactWidth;
+ ScoreTy FreqDiff = ShiftScore & ((1 << FreqDiffWidth) - 1);
+ ShiftScore >>= FreqDiffWidth;
+ ScoreTy MaxFreq = ShiftScore;
+ dbgs() << '(' << MaxFreq << ", " << FreqDiff << ", " << RegionImpact << ')';
+}
+#endif
+
bool PreRARematStage::initGCNSchedStage() {
// FIXME: This pass will invalidate cached BBLiveInMap and MBBLiveIns for
// regions inbetween the defs and region we sinked the def to. Will need to be
// fixed if there is another pass after this pass.
assert(!S.hasNextStage());
- if (!GCNSchedStage::initGCNSchedStage() || DAG.Regions.size() == 1)
+ if (!GCNSchedStage::initGCNSchedStage() || DAG.Regions.size() <= 1)
return false;
+ // Maps all MIs (except lone terminators, which are not part of any region) to
+ // their parent region. Non-lone terminators are considered part of the region
+ // they delimitate.
+ DenseMap<MachineInstr *, unsigned> MIRegion(MF.getInstructionCount());
+
// Before performing any IR modification record the parent region of each MI
// and the parent MBB of each region.
const unsigned NumRegions = DAG.Regions.size();
- RegionBB.reserve(NumRegions);
for (unsigned I = 0; I < NumRegions; ++I) {
RegionBoundaries Region = DAG.Regions[I];
for (auto MI = Region.first; MI != Region.second; ++MI)
MIRegion.insert({&*MI, I});
- RegionBB.push_back(Region.first->getParent());
+ MachineBasicBlock *ParentMBB = Region.first->getParent();
+ if (Region.second != ParentMBB->end())
+ MIRegion.insert({&*Region.second, I});
+ RegionBB.push_back(ParentMBB);
}
- if (!canIncreaseOccupancyOrReduceSpill())
+ // Set an objective for the stage based on current RP in each region.
+ REMAT_DEBUG({
+ dbgs() << "Analyzing ";
+ MF.getFunction().printAsOperand(dbgs(), false);
+ dbgs() << ": ";
+ });
+ if (!setObjective()) {
+ LLVM_DEBUG(dbgs() << "no objective to achieve, occupancy is maximal at "
+ << MFI.getMaxWavesPerEU() << '\n');
+ return false;
+ }
+ LLVM_DEBUG({
+ if (TargetOcc) {
+ dbgs() << "increase occupancy from " << *TargetOcc - 1 << '\n';
+ } else {
+ dbgs() << "reduce spilling (minimum target occupancy is "
+ << MFI.getMinWavesPerEU() << ")\n";
+ }
+ printTargetRegions(/*PrintAll=*/TargetRegions.none());
----------------
lucas-rami wrote:
Indeed, this also allows me to remove the function argument altogether, thanks.
https://github.com/llvm/llvm-project/pull/153092
More information about the llvm-commits
mailing list