[llvm] [AMDGPU][Scheduler] Refactor ArchVGPR rematerialization during scheduling (PR #125885)
Lucas Ramirez via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 17 04:34:25 PST 2025
================
@@ -1673,174 +1675,249 @@ bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat,
return true;
}
-void PreRARematStage::collectRematerializableInstructions() {
+bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
- for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
- Register Reg = Register::index2VirtReg(I);
- if (!DAG.LIS->hasInterval(Reg))
- continue;
-
- // TODO: Handle AGPR and SGPR rematerialization
- if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
- !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg))
- continue;
-
- MachineOperand *Op = DAG.MRI.getOneDef(Reg);
- MachineInstr *Def = Op->getParent();
- if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
- continue;
-
- MachineInstr *UseI = &*DAG.MRI.use_instr_nodbg_begin(Reg);
- if (Def->getParent() == UseI->getParent())
- continue;
- bool HasRematDependency = false;
- // Check if this instruction uses any registers that are planned to be
- // rematerialized
- for (auto &RematEntry : RematerializableInsts) {
- if (find_if(RematEntry.second,
- [&Def](std::pair<MachineInstr *, MachineInstr *> &Remat) {
- for (MachineOperand &MO : Def->operands()) {
- if (!MO.isReg())
- continue;
- if (MO.getReg() == Remat.first->getOperand(0).getReg())
- return true;
- }
- return false;
- }) != RematEntry.second.end()) {
- HasRematDependency = true;
- break;
+ REMAT_DEBUG(dbgs() << "Collecting rematerializable instructions in "
+ << MF.getFunction().getName() << '\n');
+
+ // Maps optimizable regions (i.e., regions at minimum and VGPR-limited
+ // occupancy, or regions with VGPR spilling) to their excess RP.
+ DenseMap<unsigned, unsigned> OptRegions;
+
+ // Note that the maximum number of VGPRs to use to eliminate spill may be
+ // lower than the maximum number to increase occupancy when the function has
+ // the "amdgpu-num-vgpr" attribute.
+ const std::pair<unsigned, unsigned> OccBounds =
+ ST.getOccupancyWithWorkGroupSizes(MF);
+ // FIXME: we should be able to just call ST.getMaxNumArchVGPRs() but that
+ // would use the occupancy bounds as determined by
+ // MF.getFunction().getWavesPerEU(), which look incorrect in some cases.
+ const unsigned MaxVGPRsNoSpill =
+ ST.getBaseMaxNumVGPRs(MF.getFunction(),
+ {ST.getMinNumArchVGPRs(OccBounds.second),
+ ST.getMaxNumArchVGPRs(OccBounds.first)},
+ false);
----------------
lucas-rami wrote:
It didn't before but does now. Since the default handling of this attribute yields unexpected results I handle it directly in the stage instead of relying on the subtarget (which imo should be fixed). Basically I want to allow the user to request lower occupancies than the bounds induced by workgroup sizes.
https://github.com/llvm/llvm-project/pull/125885
More information about the llvm-commits
mailing list