[llvm] [AMDGPU][Scheduler] Scoring system for rematerialization candidates (PR #153092)
Lucas Ramirez via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 30 07:51:07 PDT 2025
================
@@ -1089,33 +1100,224 @@ bool ClusteredLowOccStage::initGCNSchedStage() {
#define REMAT_PREFIX "[PreRARemat] "
#define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void PreRARematStage::printTargetRegions(bool PrintAll) const {
+ if (PrintAll) {
+ for (auto [I, Target] : enumerate(RPTargets))
+ dbgs() << REMAT_PREFIX << " [" << I << "] " << Target << '\n';
+ return;
+ }
+ if (TargetRegions.none()) {
+ dbgs() << REMAT_PREFIX << "No target regions\n";
+ return;
+ }
+ dbgs() << REMAT_PREFIX << "Target regions:\n";
+ for (unsigned I : TargetRegions.set_bits())
+ dbgs() << REMAT_PREFIX << " [" << I << "] " << RPTargets[I] << '\n';
+}
+
+void PreRARematStage::RematReg::print(
+ const DenseMap<MachineInstr *, unsigned> &MIRegion) const {
+ dbgs() << REMAT_PREFIX << " [" << MIRegion.at(DefMI) << "] " << *DefMI;
+ dbgs() << REMAT_PREFIX << " -> used in [" << UseRegion << "] " << *UseMI;
+ const unsigned NumRegions = Live.size();
+ dbgs() << REMAT_PREFIX << " Guaranteed RP reduction in:";
+ for (unsigned I = 0; I < NumRegions; ++I) {
+ if (isBeneficialRegion(I))
+ dbgs() << " [" << I << "]";
+ }
+ dbgs() << '\n';
+ dbgs() << REMAT_PREFIX << " Possible RP reduction in:";
+ for (unsigned I = 0; I < NumRegions; ++I) {
+ if (isMaybeBeneficialRegion(I))
+ dbgs() << " [" << I << "]";
+ }
+ dbgs() << '\n';
+}
+
+#endif
+
bool PreRARematStage::initGCNSchedStage() {
// FIXME: This pass will invalidate cached BBLiveInMap and MBBLiveIns for
// regions inbetween the defs and region we sinked the def to. Will need to be
// fixed if there is another pass after this pass.
assert(!S.hasNextStage());
- if (!GCNSchedStage::initGCNSchedStage() || DAG.Regions.size() == 1)
+ if (!GCNSchedStage::initGCNSchedStage() || DAG.Regions.size() <= 1)
return false;
// Before performing any IR modification record the parent region of each MI
// and the parent MBB of each region.
const unsigned NumRegions = DAG.Regions.size();
- RegionBB.reserve(NumRegions);
for (unsigned I = 0; I < NumRegions; ++I) {
RegionBoundaries Region = DAG.Regions[I];
for (auto MI = Region.first; MI != Region.second; ++MI)
MIRegion.insert({&*MI, I});
- RegionBB.push_back(Region.first->getParent());
+ MachineBasicBlock *ParentMBB = Region.first->getParent();
+ if (Region.second != ParentMBB->end())
+ MIRegion.insert({&*Region.second, I});
+ RegionBB.push_back(ParentMBB);
}
- if (!canIncreaseOccupancyOrReduceSpill())
+ setObjective();
----------------
lucas-rami wrote:
Yes it is possible however I think we still want to do all the "always beneficial" remats in those cases as they reduce live ranges and may reduce latency as well by moving instructions to lower-frequency regions.
https://github.com/llvm/llvm-project/pull/153092
More information about the llvm-commits
mailing list