[llvm] [AMDGPU][Scheduler] Refactor ArchVGPR rematerialization during scheduling (PR #125885)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 19 14:21:17 PST 2025
================
@@ -1673,174 +1675,319 @@ bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat,
return true;
}
-void PreRARematStage::collectRematerializableInstructions() {
- const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
- for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
- Register Reg = Register::index2VirtReg(I);
- if (!DAG.LIS->hasInterval(Reg))
- continue;
-
- // TODO: Handle AGPR and SGPR rematerialization
- if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
- !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg))
- continue;
-
- MachineOperand *Op = DAG.MRI.getOneDef(Reg);
- MachineInstr *Def = Op->getParent();
- if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
- continue;
-
- MachineInstr *UseI = &*DAG.MRI.use_instr_nodbg_begin(Reg);
- if (Def->getParent() == UseI->getParent())
- continue;
-
- bool HasRematDependency = false;
- // Check if this instruction uses any registers that are planned to be
- // rematerialized
- for (auto &RematEntry : RematerializableInsts) {
- if (find_if(RematEntry.second,
- [&Def](std::pair<MachineInstr *, MachineInstr *> &Remat) {
- for (MachineOperand &MO : Def->operands()) {
- if (!MO.isReg())
- continue;
- if (MO.getReg() == Remat.first->getOperand(0).getReg())
- return true;
- }
- return false;
- }) != RematEntry.second.end()) {
- HasRematDependency = true;
- break;
- }
+bool PreRARematStage::hasExcessVGPRs(const GCNRegPressure &RP,
+ unsigned MaxVGPRs,
+ unsigned &ExcessArchVGPRs,
+ bool &AGPRLimited) {
+ unsigned NumAGPRs = RP.getAGPRNum();
+ if (!ST.hasGFX90AInsts() || !NumAGPRs) {
+ // Non-unified RF. We can only reduce ArchVGPR excess pressure at this
+ // point, but still want to identify when there is AGPR excess pressure.
+ bool HasSpill = false;
+ unsigned NumArchVGPRs = RP.getArchVGPRNum();
+ if (NumArchVGPRs > MaxVGPRs) {
+ ExcessArchVGPRs = NumArchVGPRs - MaxVGPRs;
+ HasSpill = true;
}
- // Do not rematerialize an instruction if it uses an instruction that we
- // have designated for rematerialization.
- // FIXME: Allow for rematerialization chains: this requires 1. updating
- // remat points to account for uses that are rematerialized, and 2. either
- // rematerializing the candidates in careful ordering, or deferring the MBB
- // RP walk until the entire chain has been rematerialized.
- if (HasRematDependency)
- continue;
-
- // Similarly, check if the UseI is planned to be remat.
- for (auto &RematEntry : RematerializableInsts) {
- if (find_if(RematEntry.second,
- [&UseI](std::pair<MachineInstr *, MachineInstr *> &Remat) {
- return Remat.first == UseI;
- }) != RematEntry.second.end()) {
- HasRematDependency = true;
- break;
- }
+ if (NumAGPRs > MaxVGPRs) {
+ AGPRLimited = true;
+ HasSpill = true;
+ }
+ return HasSpill;
+ }
+ if (RP.getVGPRNum(true) > MaxVGPRs) {
+ // Unified RF. We can only remat ArchVGPRs; AGPR pressure alone may prevent
+ // us from eliminating spilling.
+ unsigned NumArchVGPRs = RP.getArchVGPRNum();
+ if (NumAGPRs >= MaxVGPRs) {
+ AGPRLimited = true;
+ ExcessArchVGPRs = NumArchVGPRs;
+ } else {
+ ExcessArchVGPRs = NumArchVGPRs - alignDown(MaxVGPRs - NumAGPRs, 4);
}
+ return true;
+ }
+ return false;
+}
- if (HasRematDependency)
- break;
+bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
+ const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
- // We are only collecting defs that are defined in another block and are
- // live-through or used inside regions at MinOccupancy. This means that the
- // register must be in the live-in set for the region.
- bool AddedToRematList = false;
- for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
- auto It = DAG.LiveIns[I].find(Reg);
- if (It != DAG.LiveIns[I].end() && !It->second.none()) {
- if (DAG.RegionsWithMinOcc[I]) {
- SlotIndex DefIdx = DAG.LIS->getInstructionIndex(*Def);
- SlotIndex UseIdx =
- DAG.LIS->getInstructionIndex(*UseI).getRegSlot(true);
- if (allUsesAvailableAt(Def, DefIdx, UseIdx)) {
- RematerializableInsts[I][Def] = UseI;
- AddedToRematList = true;
- }
+ REMAT_DEBUG(dbgs() << "Collecting rematerializable instructions in "
+ << MF.getFunction().getName() << '\n');
+
+ // Maps optimizable regions (i.e., regions at minimum and VGPR-limited
+ // occupancy, or regions with VGPR spilling) to their excess RP.
+ DenseMap<unsigned, unsigned> OptRegions;
+ const Function &F = MF.getFunction();
+ const bool UnifiedRF = ST.hasGFX90AInsts();
+
+ // Adjust workgroup size induced occupancy bounds with the
+ // "amdgpu-waves-per-eu" attribute. This should be offloaded to a subtarget
+ // method, but at this point is if unclear how other parts of the codebase
+ // interpret this attribute and the default behavior produces unexpected
+ // bounds. Here we want to allow users to ask for target occupancies lower
+ // than the default lower bound.
+ std::pair<unsigned, unsigned> OccBounds =
+ ST.getOccupancyWithWorkGroupSizes(MF);
+ std::pair<unsigned, unsigned> WavesPerEU =
+ AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", {0, 0}, true);
+ if (WavesPerEU.first <= WavesPerEU.second) {
+ if (WavesPerEU.first && WavesPerEU.first <= OccBounds.second)
+ OccBounds.first = WavesPerEU.first;
+ if (WavesPerEU.second)
+ OccBounds.second = std::min(OccBounds.second, WavesPerEU.second);
+ }
+
+ // We call the "base max functions" directly because otherwise it uses the
+ // subtarget's logic for combining "amdgpu-waves-per-eu" with the function's
+ // groupsize induced occupancy bounds, producing unexpected results.
+ const unsigned MaxSGPRsNoSpill = ST.getBaseMaxNumSGPRs(
+ F, OccBounds, ST.getMaxNumPreloadedSGPRs(), ST.getReservedNumSGPRs(F));
+ const unsigned MaxVGPRsNoSpill =
+ ST.getBaseMaxNumVGPRs(F, {ST.getMinNumVGPRs(OccBounds.second),
+ ST.getMaxNumVGPRs(OccBounds.first)});
+ const unsigned MaxSGPRsMinOcc = ST.getMaxNumSGPRs(DAG.MinOccupancy, false);
+ const unsigned MaxVGPRsIncOcc = ST.getMaxNumVGPRs(DAG.MinOccupancy + 1);
+ IncreaseOccupancy = OccBounds.second > DAG.MinOccupancy;
+
+ auto ClearOptRegionsIf = [&](bool Cond) -> bool {
+ if (Cond) {
+ // We won't try to increase occupancy.
+ IncreaseOccupancy = false;
+ OptRegions.clear();
+ }
+ return Cond;
+ };
+
+ // Collect optimizable regions. If there is spilling in any region we will
+ // just try to reduce ArchVGPR spilling. Otherwise we will try to increase
+ // occupancy by one in the whole function.
+ for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
+ GCNRegPressure &RP = DAG.Pressure[I];
+ unsigned ExcessRP = 0;
+ unsigned NumSGPRs = RP.getSGPRNum();
+
+ // Check whether SGPR pressures prevents us from eliminating spilling.
+ if (NumSGPRs > MaxSGPRsNoSpill)
+ ClearOptRegionsIf(IncreaseOccupancy);
+
+ bool OccAGPRLimited = false;
+ if (hasExcessVGPRs(RP, MaxVGPRsNoSpill, ExcessRP, OccAGPRLimited)) {
+ ClearOptRegionsIf(IncreaseOccupancy);
+ REMAT_DEBUG({
+ if (ExcessRP) {
+ StringRef RegClass = UnifiedRF ? "VGPRs" : "ArchVGPRs";
+ dbgs() << "Region " << I << " is spilling " << RegClass << ", save "
+ << ExcessRP << " to eliminate " << RegClass << "-spilling\n";
}
+ });
+ } else if (IncreaseOccupancy) {
+ // Check whether SGPR pressure prevents us from increasing occupancy.
+ if (ClearOptRegionsIf(NumSGPRs > MaxSGPRsMinOcc))
+ continue;
- // Collect regions with rematerializable reg as live-in to avoid
- // searching later when updating RP.
- RematDefToLiveInRegions[Def].push_back(I);
+ if (hasExcessVGPRs(RP, MaxVGPRsIncOcc, ExcessRP, OccAGPRLimited)) {
+ // Check whether AGPR pressure prevents us from increasing occupancy.
+ if (ClearOptRegionsIf(OccAGPRLimited))
+ continue;
----------------
jrbyrnes wrote:
I think we can just return false if our DAG.MinOcc > OccBounds.first
https://github.com/llvm/llvm-project/pull/125885
More information about the llvm-commits
mailing list