[llvm] [AMDGPU][Scheduler] Refactor ArchVGPR rematerialization during scheduling (PR #125885)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon May 5 04:38:51 PDT 2025
================
@@ -1688,174 +1696,421 @@ bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat,
return true;
}
-void PreRARematStage::collectRematerializableInstructions() {
- const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
- for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
- Register Reg = Register::index2VirtReg(I);
- if (!DAG.LIS->hasInterval(Reg))
- continue;
+namespace {
+/// Models excess register pressure in a region and tracks our progress as we
+/// identify rematerialization opportunities.
+struct ExcessRP {
+ /// Number of excess ArchVGPRs.
+ unsigned ArchVGPRs = 0;
+ /// Number of excess AGPRs.
+ unsigned AGPRs = 0;
+ /// For unified register files, number of excess VGPRs.
+ unsigned VGPRs = 0;
+ /// For unified register files with AGPR usage, number of excess ArchVGPRs to
+ /// save before we are able to save a whole allocation granule.
+ unsigned ArchVGPRsToAlignment = 0;
+ /// Whether the region uses AGPRs.
+ bool HasAGPRs = false;
+ /// Whether the subtarget has a unified RF.
+ bool UnifiedRF;
+
+ /// Constructs the excess RP model; determines the excess pressure w.r.t. a
+ /// maximum number of allowed VGPRs.
+ ExcessRP(const GCNSubtarget &ST, const GCNRegPressure &RP, unsigned MaxVGPRs);
+
+ /// Accounts for \p NumRegs saved ArchVGPRs in the model. If \p
+ /// UseArchVGPRForAGPRSpill is true, saved ArchVGPRs are used to save excess
+ /// AGPRs once excess ArchVGPR pressure has been eliminated. Returns whether
+ /// saving these ArchVGPRs helped reduce excess pressure.
+ bool saveArchVGPRs(unsigned NumRegs, bool UseArchVGPRForAGPRSpill);
+
+ /// Accounts for \p NumRegs saved AGPRS in the model. Returns whether saving
+ /// these ArchVGPRs helped reduce excess pressure.
+ bool saveAGPRs(unsigned NumRegs);
+
+ /// Returns whether there is any excess register pressure.
+ operator bool() const { return ArchVGPRs != 0 || AGPRs != 0 || VGPRs != 0; }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ friend raw_ostream &operator<<(raw_ostream &OS, const ExcessRP &Excess) {
+ OS << Excess.ArchVGPRs << " ArchVGPRs, " << Excess.AGPRs << " AGPRs, and "
+ << Excess.VGPRs << " VGPRs (next ArchVGPR aligment in "
+ << Excess.ArchVGPRsToAlignment << " registers)\n";
+ return OS;
+ }
+#endif
- // TODO: Handle AGPR and SGPR rematerialization
- if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
- !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg))
- continue;
+private:
+ static inline bool saveRegs(unsigned &LeftToSave, unsigned &NumRegs) {
+ unsigned NumSaved = std::min(LeftToSave, NumRegs);
+ NumRegs -= NumSaved;
+ LeftToSave -= NumSaved;
+ return NumSaved;
+ }
+};
+} // namespace
+
+ExcessRP::ExcessRP(const GCNSubtarget &ST, const GCNRegPressure &RP,
+ unsigned MaxVGPRs)
+ : UnifiedRF(ST.hasGFX90AInsts()) {
+ unsigned NumArchVGPRs = RP.getArchVGPRNum();
+ unsigned NumAGPRs = RP.getAGPRNum();
+ HasAGPRs = NumAGPRs;
+
+ if (!UnifiedRF) {
+ // Non-unified RF. Account for excess pressure for ArchVGPRs and AGPRs
+ // independently.
+ if (NumArchVGPRs > MaxVGPRs)
+ ArchVGPRs = NumArchVGPRs - MaxVGPRs;
+ if (NumAGPRs > MaxVGPRs)
+ AGPRs = NumAGPRs - MaxVGPRs;
+ return;
+ }
- MachineOperand *Op = DAG.MRI.getOneDef(Reg);
- MachineInstr *Def = Op->getParent();
- if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
- continue;
+ // Independently of whether overall VGPR pressure is under the limit, we still
+ // have to check whether ArchVGPR pressure or AGPR pressure alone exceeds the
+ // number of addressable registers in each category.
+ const unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
+ if (NumArchVGPRs > MaxArchVGPRs) {
+ ArchVGPRs = NumArchVGPRs - MaxArchVGPRs;
+ NumArchVGPRs = MaxArchVGPRs;
+ }
+ if (NumAGPRs > MaxArchVGPRs) {
+ AGPRs = NumAGPRs - MaxArchVGPRs;
+ NumAGPRs = MaxArchVGPRs;
+ }
- MachineInstr *UseI = &*DAG.MRI.use_instr_nodbg_begin(Reg);
- if (Def->getParent() == UseI->getParent())
- continue;
+ // Check overall VGPR usage against the limit; any excess above addressable
+ // register limits has already been accounted for.
+ const unsigned Granule = AMDGPU::IsaInfo::getArchVGPRAllocGranule();
+ unsigned NumVGPRs = GCNRegPressure::getUnifiedVGPRNum(NumArchVGPRs, NumAGPRs);
+ if (NumVGPRs > MaxVGPRs) {
+ VGPRs = NumVGPRs - MaxVGPRs;
+ ArchVGPRsToAlignment = NumArchVGPRs - alignDown(NumArchVGPRs, Granule);
+ if (!ArchVGPRsToAlignment)
+ ArchVGPRsToAlignment = Granule;
+ }
+}
- bool HasRematDependency = false;
- // Check if this instruction uses any registers that are planned to be
- // rematerialized
- for (auto &RematEntry : RematerializableInsts) {
- if (find_if(RematEntry.second,
- [&Def](std::pair<MachineInstr *, MachineInstr *> &Remat) {
- for (MachineOperand &MO : Def->operands()) {
- if (!MO.isReg())
- continue;
- if (MO.getReg() == Remat.first->getOperand(0).getReg())
- return true;
- }
- return false;
- }) != RematEntry.second.end()) {
- HasRematDependency = true;
- break;
- }
+bool ExcessRP::saveArchVGPRs(unsigned NumRegs, bool UseArchVGPRForAGPRSpill) {
+ bool Progress = saveRegs(ArchVGPRs, NumRegs);
+ if (!NumRegs)
+ return Progress;
+
+ if (!UnifiedRF) {
+ if (UseArchVGPRForAGPRSpill)
+ Progress |= saveRegs(AGPRs, NumRegs);
+ } else if (HasAGPRs && (VGPRs || (UseArchVGPRForAGPRSpill && AGPRs))) {
+ // There is progress as long as there are VGPRs left to save, even if the
+ // save induced by this particular call does not cross an ArchVGPR alignment
+ // barrier.
+ Progress = true;
+
+ // ArchVGPRs can only be allocated as a multiple of a granule in unified RF.
+ unsigned NumSavedRegs = 0;
+
+ // Count the number of whole ArchVGPR allocation granules we can save.
+ const unsigned Granule = AMDGPU::IsaInfo::getArchVGPRAllocGranule();
+ if (unsigned NumGranules = NumRegs / Granule; NumGranules) {
+ NumSavedRegs = NumGranules * Granule;
+ NumRegs -= NumSavedRegs;
}
- // Do not rematerialize an instruction if it uses an instruction that we
- // have designated for rematerialization.
- // FIXME: Allow for rematerialization chains: this requires 1. updating
- // remat points to account for uses that are rematerialized, and 2. either
- // rematerializing the candidates in careful ordering, or deferring the MBB
- // RP walk until the entire chain has been rematerialized.
- if (HasRematDependency)
- continue;
- // Similarly, check if the UseI is planned to be remat.
- for (auto &RematEntry : RematerializableInsts) {
- if (find_if(RematEntry.second,
- [&UseI](std::pair<MachineInstr *, MachineInstr *> &Remat) {
- return Remat.first == UseI;
- }) != RematEntry.second.end()) {
- HasRematDependency = true;
- break;
- }
+ // We may be able to save one more whole ArchVGPR allocation granule.
+ if (NumRegs >= ArchVGPRsToAlignment) {
+ NumSavedRegs += Granule;
+ ArchVGPRsToAlignment = Granule - (NumRegs - ArchVGPRsToAlignment);
+ } else {
+ ArchVGPRsToAlignment -= NumRegs;
}
- if (HasRematDependency)
- break;
+ // Prioritize saving generic VGPRs, then AGPRs if we allow AGPR-to-ArchVGPR
+ // spilling and have some free ArchVGPR slots.
+ saveRegs(VGPRs, NumSavedRegs);
+ if (UseArchVGPRForAGPRSpill)
+ saveRegs(AGPRs, NumSavedRegs);
+ } else {
+ // No AGPR usage in the region i.e., no allocation granule to worry about.
+ Progress |= saveRegs(VGPRs, NumRegs);
+ }
- // We are only collecting defs that are defined in another block and are
- // live-through or used inside regions at MinOccupancy. This means that the
- // register must be in the live-in set for the region.
- bool AddedToRematList = false;
- for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
- auto It = DAG.LiveIns[I].find(Reg);
- if (It != DAG.LiveIns[I].end() && !It->second.none()) {
- if (DAG.RegionsWithMinOcc[I]) {
- SlotIndex DefIdx = DAG.LIS->getInstructionIndex(*Def);
- SlotIndex UseIdx =
- DAG.LIS->getInstructionIndex(*UseI).getRegSlot(true);
- if (allUsesAvailableAt(Def, DefIdx, UseIdx)) {
- RematerializableInsts[I][Def] = UseI;
- AddedToRematList = true;
- }
- }
+ return Progress;
+}
+
+bool ExcessRP::saveAGPRs(unsigned NumRegs) {
+ return saveRegs(AGPRs, NumRegs) || saveRegs(VGPRs, NumRegs);
+}
- // Collect regions with rematerializable reg as live-in to avoid
- // searching later when updating RP.
- RematDefToLiveInRegions[Def].push_back(I);
+bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
+ const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
+
+ REMAT_DEBUG(dbgs() << "Collecting rematerializable instructions in "
+ << MF.getFunction().getName() << '\n');
+
+ // Maps optimizable regions (i.e., regions at minimum and VGPR-limited
+ // occupancy, or regions with VGPR spilling) to a model of their excess RP.
+ DenseMap<unsigned, ExcessRP> OptRegions;
+ const Function &F = MF.getFunction();
+
+ std::pair<unsigned, unsigned> WavesPerEU = ST.getWavesPerEU(F);
+ const unsigned MaxSGPRsNoSpill = ST.getBaseMaxNumSGPRs(
+ F, WavesPerEU, ST.getMaxNumPreloadedSGPRs(), ST.getReservedNumSGPRs(F));
+ const unsigned MaxVGPRsNoSpill =
+ ST.getBaseMaxNumVGPRs(F, {ST.getMinNumVGPRs(WavesPerEU.second),
+ ST.getMaxNumVGPRs(WavesPerEU.first)});
+ const unsigned MaxSGPRsIncOcc =
+ ST.getMaxNumSGPRs(DAG.MinOccupancy + 1, false);
+ const unsigned MaxVGPRsIncOcc = ST.getMaxNumVGPRs(DAG.MinOccupancy + 1);
+ IncreaseOccupancy = WavesPerEU.second > DAG.MinOccupancy;
+
+ auto ClearOptRegionsIf = [&](bool Cond) -> bool {
+ if (Cond) {
+ // We won't try to increase occupancy.
+ IncreaseOccupancy = false;
+ OptRegions.clear();
+ }
+ return Cond;
+ };
+
+ // Collect optimizable regions. If there is spilling in any region we will
+ // just try to reduce ArchVGPR spilling. Otherwise we will try to increase
+ // occupancy by one in the whole function.
+ for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
+ GCNRegPressure &RP = DAG.Pressure[I];
+
+ // Check whether SGPR pressures prevents us from eliminating spilling.
+ unsigned NumSGPRs = RP.getSGPRNum();
+ if (NumSGPRs > MaxSGPRsNoSpill)
+ ClearOptRegionsIf(IncreaseOccupancy);
+
+ ExcessRP Excess(ST, RP, MaxVGPRsNoSpill);
+ if (Excess) {
+ ClearOptRegionsIf(IncreaseOccupancy);
+ } else if (IncreaseOccupancy) {
+ // Check whether SGPR pressure prevents us from increasing occupancy.
+ if (ClearOptRegionsIf(NumSGPRs > MaxSGPRsIncOcc)) {
+ if (DAG.MinOccupancy >= WavesPerEU.first)
+ return false;
+ continue;
+ }
+ if ((Excess = ExcessRP(ST, RP, MaxVGPRsIncOcc))) {
+ // We can only rematerialize ArchVGPRs at this point.
+ unsigned NumArchVGPRsToRemat = Excess.ArchVGPRs + Excess.VGPRs;
+ bool NotEnoughArchVGPRs = NumArchVGPRsToRemat > RP.getArchVGPRNum();
+ if (ClearOptRegionsIf(Excess.AGPRs || NotEnoughArchVGPRs)) {
+ if (DAG.MinOccupancy >= WavesPerEU.first)
+ return false;
+ continue;
+ }
}
}
- if (!AddedToRematList)
- RematDefToLiveInRegions.erase(Def);
+ if (Excess)
+ OptRegions.insert({I, Excess});
}
-}
+ if (OptRegions.empty())
+ return false;
-bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
- const TargetInstrInfo *TII) {
- // Temporary copies of cached variables we will be modifying and replacing if
- // sinking succeeds.
- SmallVector<
- std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, 32>
- NewRegions;
- DenseMap<unsigned, GCNRPTracker::LiveRegSet> NewLiveIns;
- DenseMap<unsigned, GCNRegPressure> NewPressure;
- BitVector NewRescheduleRegions;
- LiveIntervals *LIS = DAG.LIS;
+#ifndef NDEBUG
+ if (IncreaseOccupancy)
+ REMAT_DEBUG(dbgs() << "Occupancy minimal in regions:\n");
+ else
+ REMAT_DEBUG(dbgs() << "Spilling in regions:\n");
+ for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
+ if (auto OptIt = OptRegions.find(I); OptIt != OptRegions.end())
+ REMAT_DEBUG(dbgs() << " " << I << ": " << OptIt->getSecond() << '\n');
+ }
+#endif
+
+ // When we are reducing spilling, the target is the minimum target number of
+ // waves/EU determined by the subtarget.
+ TargetOcc = IncreaseOccupancy ? DAG.MinOccupancy + 1 : WavesPerEU.first;
+
+ // Accounts for a reduction in RP in an optimizable region. Returns whether we
+ // estimate that we have identified enough rematerialization opportunities to
+ // achieve our goal, and sets Progress to true when this particular reduction
+ // in pressure was helpful toward that goal.
+ auto ReduceRPInRegion = [&](auto OptIt, LaneBitmask Mask,
+ bool &Progress) -> bool {
+ ExcessRP &Excess = OptIt->getSecond();
+ // We allow saved ArchVGPRs to be considered as free spill slots for AGPRs
+ // only when we are just trying to eliminate spilling to memory. At this
+ // point we err on the conservative side and do not increase
+ // register-to-register spilling for the sake of increasing occupancy.
+ Progress |=
+ Excess.saveArchVGPRs(SIRegisterInfo::getNumCoveredRegs(Mask),
+ /*UseArchVGPRForAGPRSpill=*/!IncreaseOccupancy);
+ if (!Excess)
+ OptRegions.erase(OptIt->getFirst());
+ return OptRegions.empty();
+ };
+
+ // We need up-to-date live-out info. to query live-out register masks in
+ // regions containing rematerializable instructions.
+ DAG.RegionLiveOuts.buildLiveRegMap();
+
+ // Cache set of registers that are going to be rematerialized.
+ DenseSet<unsigned> RematRegs;
+
+ // Identify rematerializable instructions in the function.
+ for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
+ auto Region = DAG.Regions[I];
+ for (auto MI = Region.first; MI != Region.second; ++MI) {
+ // The instruction must be trivially rematerializable.
+ MachineInstr &DefMI = *MI;
+ if (!isTriviallyReMaterializable(DefMI))
+ continue;
- NewRegions.resize(DAG.Regions.size());
- NewRescheduleRegions.resize(DAG.Regions.size());
+ // We only support rematerializing virtual VGPRs with one definition.
+ Register Reg = DefMI.getOperand(0).getReg();
+ if (!Reg.isVirtual() || !DAG.LIS->hasInterval(Reg) ||
+ !SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
+ !DAG.MRI.hasOneDef(Reg))
+ continue;
- // Collect only regions that has a rematerializable def as a live-in.
- SmallSet<unsigned, 16> ImpactedRegions;
- for (const auto &It : RematDefToLiveInRegions)
- ImpactedRegions.insert_range(It.second);
+ // We only care to rematerialize the instruction if it has a single
+ // non-debug user in a different block. The using MI may not belong to a
+ // region if it is a lone region terminator.
+ MachineInstr *UseMI = DAG.MRI.getOneNonDBGUser(Reg);
+ auto UseRegion = MIRegion.find(UseMI);
+ if (!UseMI || (UseRegion != MIRegion.end() && UseRegion->second == I))
+ continue;
- // Make copies of register pressure and live-ins cache that will be updated
- // as we rematerialize.
- for (auto Idx : ImpactedRegions) {
- NewPressure[Idx] = DAG.Pressure[Idx];
- NewLiveIns[Idx] = DAG.LiveIns[Idx];
- }
- NewRegions = DAG.Regions;
- NewRescheduleRegions.reset();
+ // Do not rematerialize an instruction if it uses or is used by an
+ // instruction that we have designated for rematerialization.
+ // FIXME: Allow for rematerialization chains: this requires 1. updating
+ // remat points to account for uses that are rematerialized, and 2. either
+ // rematerializing the candidates in careful ordering, or deferring the
+ // MBB RP walk until the entire chain has been rematerialized.
+ if (Rematerializations.contains(UseMI) ||
+ llvm::any_of(DefMI.operands(), [&RematRegs](MachineOperand &MO) {
+ return MO.isReg() && RematRegs.contains(MO.getReg());
+ }))
+ continue;
- DenseMap<MachineInstr *, MachineInstr *> InsertedMIToOldDef;
- bool Improved = false;
- for (auto I : ImpactedRegions) {
- if (!DAG.RegionsWithMinOcc[I])
- continue;
+ // Do not rematerialize an instruction it it uses registers that aren't
+ // available at its use. This ensures that we are not extending any live
+ // range while rematerializing.
+ SlotIndex DefIdx = DAG.LIS->getInstructionIndex(DefMI);
+ SlotIndex UseIdx = DAG.LIS->getInstructionIndex(*UseMI).getRegSlot(true);
+ if (!allUsesAvailableAt(&DefMI, DefIdx, UseIdx))
+ continue;
- Improved = false;
- int VGPRUsage = NewPressure[I].getVGPRNum(ST.hasGFX90AInsts());
- int SGPRUsage = NewPressure[I].getSGPRNum();
+ REMAT_DEBUG(dbgs() << "Region " << I << ": remat instruction " << DefMI);
+ RematInstruction &Remat =
+ Rematerializations.try_emplace(&DefMI, UseMI).first->second;
+
+ bool RematUseful = false;
+ if (auto It = OptRegions.find(I); It != OptRegions.end()) {
+ // Optimistically consider that moving the instruction out of its
+ // defining region will reduce RP in the latter; this assumes that
+ // maximum RP in the region is reached somewhere between the defining
+ // instruction and the end of the region.
+ REMAT_DEBUG(dbgs() << " Defining region is optimizable\n");
+ LaneBitmask Mask = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I)[Reg];
+ if (ReduceRPInRegion(It, Mask, RematUseful))
+ return true;
+ }
- // TODO: Handle occupancy drop due to AGPR and SGPR.
- // Check if cause of occupancy drop is due to VGPR usage and not SGPR.
- if (ST.getOccupancyWithNumSGPRs(SGPRUsage) == DAG.MinOccupancy)
- break;
+ for (unsigned LIRegion = 0; LIRegion != E; ++LIRegion) {
+ // We are only collecting regions in which the register is a live-in
+ // (and may be live-through).
+ auto It = DAG.LiveIns[LIRegion].find(Reg);
+ if (It == DAG.LiveIns[LIRegion].end() || It->second.none())
+ continue;
+ Remat.LiveInRegions.insert(LIRegion);
+
+ // Account for the reduction in RP due to the rematerialization in an
+ // optimizable region in which the defined register is a live-in. This
+ // is exact for live-through region but optimistic in the using region,
+ // where RP is actually reduced only if maximum RP is reached somewhere
+ // between the beginning of the region and the rematerializable
+ // instruction's use.
+ if (auto It = OptRegions.find(LIRegion); It != OptRegions.end()) {
+ REMAT_DEBUG(dbgs() << " Live-in in region " << LIRegion << '\n');
+ if (ReduceRPInRegion(It, DAG.LiveIns[LIRegion][Reg], RematUseful))
+ return true;
+ }
+ }
- // The occupancy of this region could have been improved by a previous
- // iteration's sinking of defs.
- if (NewPressure[I].getOccupancy(ST) > DAG.MinOccupancy) {
- NewRescheduleRegions[I] = true;
- Improved = true;
- continue;
+ // If the instruction is not a live-in or live-out in any optimizable
+ // region then there is no point in rematerializing it.
+ if (!RematUseful) {
+ Rematerializations.pop_back();
+ REMAT_DEBUG(dbgs() << " No impact, not rematerializing instruction\n");
+ } else {
+ RematRegs.insert(Reg);
+ }
+ }
+ }
+
+ if (IncreaseOccupancy) {
+ // We were trying to increase occupancy but failed, abort the stage.
+ REMAT_DEBUG(dbgs() << "Cannot increase occupancy\n");
+ Rematerializations.clear();
+ return false;
+ }
+ REMAT_DEBUG(dbgs() << "Can reduce but not eliminate spilling\n");
+ return !Rematerializations.empty();
+}
+
+void PreRARematStage::rematerialize() {
+ const auto *TII =
+ static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ // Collect regions whose RP changes in unpredictable way; we will have to
+ // fully recompute their RP after all rematerailizations.
+ DenseSet<unsigned> RecomputeRP;
+
+ // Rematerialize all instructions.
+ for (auto &[DefMI, Remat] : Rematerializations) {
+ MachineBasicBlock::iterator InsertPos(Remat.UseMI);
+ Register Reg = DefMI->getOperand(0).getReg();
+ unsigned SubReg = DefMI->getOperand(0).getSubReg();
+ unsigned DefRegion = MIRegion.at(DefMI);
+
+ // Rematerialize DefMI to its use block.
+ TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg, SubReg, *DefMI,
+ *DAG.TRI);
+ Remat.RematMI = &*std::prev(InsertPos);
+ Remat.RematMI->getOperand(0).setSubReg(SubReg);
+ DAG.LIS->InsertMachineInstrInMaps(*Remat.RematMI);
+
+ // Update region boundaries in regions we sinked from (remove defining MI)
+ // and to (insert MI rematerialized in use block). Only then we can erase
+ // the original MI.
+ DAG.updateRegionBoundaries(DAG.Regions[DefRegion], DefMI, nullptr);
+ auto UseRegion = MIRegion.find(Remat.UseMI);
+ if (UseRegion != MIRegion.end()) {
+ DAG.updateRegionBoundaries(DAG.Regions[UseRegion->second], InsertPos,
+ Remat.RematMI);
}
+ DefMI->eraseFromParent();
+ DAG.LIS->RemoveMachineInstrFromMaps(*DefMI);
+
+ // Collect all regions impacted by the rematerialization and update their
+ // live-in/RP information.
+ for (unsigned I : Remat.LiveInRegions) {
+ ImpactedRegions.insert({I, DAG.Pressure[I]});
+ GCNRPTracker::LiveRegSet &RegionLiveIns = DAG.LiveIns[I];
- // First check if we have enough trivially rematerializable instructions to
- // improve occupancy. Optimistically assume all instructions we are able to
- // sink decreased RP.
- int TotalSinkableRegs = 0;
- for (const auto &It : RematerializableInsts[I]) {
- MachineInstr *Def = It.first;
- Register DefReg = Def->getOperand(0).getReg();
- TotalSinkableRegs +=
- SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]);
#ifdef EXPENSIVE_CHECKS
// All uses are known to be available / live at the remat point. Thus, the
// uses should already be live in to the region.
- for (MachineOperand &MO : Def->operands()) {
+ for (MachineOperand &MO : DefMI->operands()) {
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
continue;
Register UseReg = MO.getReg();
if (!UseReg.isVirtual())
continue;
- LiveInterval &LI = LIS->getInterval(UseReg);
+ LiveInterval &LI = DAG.LIS->getInterval(UseReg);
LaneBitmask LM = DAG.MRI.getMaxLaneMaskForVReg(MO.getReg());
if (LI.hasSubRanges() && MO.getSubReg())
LM = DAG.TRI->getSubRegIndexLaneMask(MO.getSubReg());
- assert(NewLiveIns[I].contains(UseReg));
- LaneBitmask LiveInMask = NewLiveIns[I][UseReg];
+ assert(RegionLiveIns.contains(UseReg));
+ LaneBitmask LiveInMask = RegionLiveIns[UseReg];
----------------
arsenm wrote:
Avoid double map lookup. Is this RegionLiveIns.at?
https://github.com/llvm/llvm-project/pull/125885
More information about the llvm-commits
mailing list