[llvm] Revert "[AMDGPU][Scheduler] Refactor ArchVGPR rematerialization during scheduling (#125885)" (PR #139341)
via llvm-commits
llvm-commits at lists.llvm.org
Fri May 9 17:33:55 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Vitaly Buka (vitalybuka)
<details>
<summary>Changes</summary>
And related "[AMDGPU] Regenerate mfma-loop.ll test"
Introduce memory error detected by Asan #<!-- -->125885.
This reverts commit 382a085a95b0abeac77b150b7b644b372bd08e78.
This reverts commit 067caaafb58a156d0d77229422607782a639f5b5.
---
Patch is 530.82 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139341.diff
13 Files Affected:
- (modified) llvm/include/llvm/CodeGen/MachineRegisterInfo.h (-4)
- (modified) llvm/lib/CodeGen/MachineRegisterInfo.cpp (-5)
- (modified) llvm/lib/Target/AMDGPU/GCNRegPressure.h (+1-10)
- (modified) llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp (+314-550)
- (modified) llvm/lib/Target/AMDGPU/GCNSchedStrategy.h (+28-78)
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.cpp (+26-12)
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+2-7)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (-2)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (-4)
- (removed) llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir (-2539)
- (modified) llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir (+302-1609)
- (modified) llvm/test/CodeGen/AMDGPU/mfma-loop.ll (+3-3)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index f20f4b16a5f17..8e288cf212360 100644
--- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -23,7 +23,6 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/RegisterBank.h"
@@ -586,9 +585,6 @@ class MachineRegisterInfo {
/// multiple uses.
bool hasOneNonDBGUser(Register RegNo) const;
- /// If the register has a single non-Debug instruction using the specified
- /// register, returns it; otherwise returns nullptr.
- MachineInstr *getOneNonDBGUser(Register RegNo) const;
/// hasAtMostUses - Return true if the given register has at most \p MaxUsers
/// non-debug user instructions.
diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index b7135251781ad..937f63f6c5e00 100644
--- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -432,11 +432,6 @@ bool MachineRegisterInfo::hasOneNonDBGUser(Register RegNo) const {
return hasSingleElement(use_nodbg_instructions(RegNo));
}
-MachineInstr *MachineRegisterInfo::getOneNonDBGUser(Register RegNo) const {
- auto RegNoDbgUsers = use_nodbg_instructions(RegNo);
- return hasSingleElement(RegNoDbgUsers) ? &*RegNoDbgUsers.begin() : nullptr;
-}
-
bool MachineRegisterInfo::hasAtMostUserInstrs(Register Reg,
unsigned MaxUsers) const {
return hasNItemsOrLess(use_instr_nodbg_begin(Reg), use_instr_nodbg_end(),
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 4d1e96ff04e8d..7554b9f578fcb 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -53,20 +53,11 @@ struct GCNRegPressure {
/// UnifiedVGPRFile
unsigned getVGPRNum(bool UnifiedVGPRFile) const {
if (UnifiedVGPRFile) {
- return Value[AGPR32] ? getUnifiedVGPRNum(Value[VGPR32], Value[AGPR32])
+ return Value[AGPR32] ? alignTo(Value[VGPR32], 4) + Value[AGPR32]
: Value[VGPR32] + Value[AGPR32];
}
return std::max(Value[VGPR32], Value[AGPR32]);
}
-
- /// Returns the aggregated VGPR pressure, assuming \p NumArchVGPRs ArchVGPRs
- /// and \p NumAGPRs AGPRS, for a target with a unified VGPR file.
- inline static unsigned getUnifiedVGPRNum(unsigned NumArchVGPRs,
- unsigned NumAGPRs) {
- return alignTo(NumArchVGPRs, AMDGPU::IsaInfo::getArchVGPRAllocGranule()) +
- NumAGPRs;
- }
-
/// \returns the ArchVGPR32 pressure
unsigned getArchVGPRNum() const { return Value[VGPR32]; }
/// \returns the AccVGPR32 pressure
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 0896d8716384e..5678512748569 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -25,13 +25,8 @@
#include "GCNSchedStrategy.h"
#include "AMDGPUIGroupLP.h"
-#include "GCNRegPressure.h"
#include "SIMachineFunctionInfo.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/MC/LaneBitmask.h"
-#include "llvm/Support/ErrorHandling.h"
#define DEBUG_TYPE "machine-scheduler"
@@ -306,11 +301,11 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
HasHighPressure = true;
if (SGPRDelta > VGPRDelta) {
Cand.RPDelta.CriticalMax =
- PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
+ PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
} else {
Cand.RPDelta.CriticalMax =
- PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
+ PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
}
}
@@ -323,7 +318,7 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
const RegPressureTracker &RPTracker,
SchedCandidate &Cand,
bool IsBottomUp) {
- const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);
+ const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
unsigned SGPRPressure = 0;
unsigned VGPRPressure = 0;
@@ -419,7 +414,7 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand,
/*IsBottomUp=*/false);
assert(TCand.SU == TopCand.SU &&
- "Last pick result should correspond to re-picking right now");
+ "Last pick result should correspond to re-picking right now");
}
#endif
}
@@ -895,13 +890,13 @@ GCNScheduleDAGMILive::getRegionLiveInMap() const {
std::vector<MachineInstr *> RegionFirstMIs;
RegionFirstMIs.reserve(Regions.size());
auto I = Regions.rbegin(), E = Regions.rend();
+ auto *BB = I->first->getParent();
do {
- const MachineBasicBlock *MBB = I->first->getParent();
auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
RegionFirstMIs.push_back(MI);
do {
++I;
- } while (I != E && I->first->getParent() == MBB);
+ } while (I != E && I->first->getParent() == BB);
} while (I != E);
return getLiveRegMap(RegionFirstMIs, /*After=*/false, *LIS);
}
@@ -1086,46 +1081,31 @@ bool ClusteredLowOccStage::initGCNSchedStage() {
return true;
}
-/// Allows to easily filter for this stage's debug output.
-#define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << "[PreRARemat] "; X;)
-
bool PreRARematStage::initGCNSchedStage() {
- // FIXME: This pass will invalidate cached BBLiveInMap and MBBLiveIns for
- // regions inbetween the defs and region we sinked the def to. Will need to be
- // fixed if there is another pass after this pass.
- assert(!S.hasNextStage());
+ if (!GCNSchedStage::initGCNSchedStage())
+ return false;
- if (!GCNSchedStage::initGCNSchedStage() || DAG.RegionsWithMinOcc.none() ||
- DAG.Regions.size() == 1)
+ if (DAG.RegionsWithMinOcc.none() || DAG.Regions.size() == 1)
return false;
- // Before performing any IR modification record the parent region of each MI
- // and the parent MBB of each region.
- const unsigned NumRegions = DAG.Regions.size();
- RegionBB.reserve(NumRegions);
- for (unsigned I = 0; I < NumRegions; ++I) {
- RegionBoundaries Region = DAG.Regions[I];
- for (auto MI = Region.first; MI != Region.second; ++MI)
- MIRegion.insert({&*MI, I});
- RegionBB.push_back(Region.first->getParent());
- }
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ // Rematerialization will not help if occupancy is not limited by reg usage.
+ if (ST.getOccupancyWithWorkGroupSizes(MF).second == DAG.MinOccupancy)
+ return false;
+
+ // FIXME: This pass will invalidate cached MBBLiveIns for regions
+ // inbetween the defs and region we sinked the def to. Cached pressure
+ // for regions where a def is sinked from will also be invalidated. Will
+ // need to be fixed if there is another pass after this pass.
+ assert(!S.hasNextStage());
- if (!canIncreaseOccupancyOrReduceSpill())
+ collectRematerializableInstructions();
+ if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII))
return false;
- // Rematerialize identified instructions and update scheduler's state.
- rematerialize();
- if (GCNTrackers)
- DAG.RegionLiveOuts.buildLiveRegMap();
- REMAT_DEBUG(
- dbgs() << "Retrying function scheduling with new min. occupancy of "
- << AchievedOcc << " from rematerializing (original was "
- << DAG.MinOccupancy << ", target was " << TargetOcc << ")\n");
- if (AchievedOcc > DAG.MinOccupancy) {
- DAG.MinOccupancy = AchievedOcc;
- SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
- MFI.increaseOccupancy(MF, DAG.MinOccupancy);
- }
+ LLVM_DEBUG(
+ dbgs() << "Retrying function scheduling with improved occupancy of "
+ << DAG.MinOccupancy << " from rematerializing\n");
return true;
}
@@ -1513,7 +1493,8 @@ bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) {
dbgs()
<< "\n\t *** In shouldRevertScheduling ***\n"
<< " *********** BEFORE UnclusteredHighRPStage ***********\n");
- ScheduleMetrics MBefore = getScheduleMetrics(DAG.SUnits);
+ ScheduleMetrics MBefore =
+ getScheduleMetrics(DAG.SUnits);
LLVM_DEBUG(
dbgs()
<< "\n *********** AFTER UnclusteredHighRPStage ***********\n");
@@ -1546,9 +1527,13 @@ bool ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter) {
}
bool PreRARematStage::shouldRevertScheduling(unsigned WavesAfter) {
- return GCNSchedStage::shouldRevertScheduling(WavesAfter) ||
- mayCauseSpilling(WavesAfter) ||
- (IncreaseOccupancy && WavesAfter < TargetOcc);
+ if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
+ return true;
+
+ if (mayCauseSpilling(WavesAfter))
+ return true;
+
+ return false;
}
bool ILPInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
@@ -1698,407 +1683,160 @@ bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat,
return true;
}
-namespace {
-/// Models excess register pressure in a region and tracks our progress as we
-/// identify rematerialization opportunities.
-struct ExcessRP {
- /// Number of excess ArchVGPRs.
- unsigned ArchVGPRs = 0;
- /// Number of excess AGPRs.
- unsigned AGPRs = 0;
- /// For unified register files, number of excess VGPRs.
- unsigned VGPRs = 0;
- /// For unified register files with AGPR usage, number of excess ArchVGPRs to
- /// save before we are able to save a whole allocation granule.
- unsigned ArchVGPRsToAlignment = 0;
- /// Whether the region uses AGPRs.
- bool HasAGPRs = false;
- /// Whether the subtarget has a unified RF.
- bool UnifiedRF;
-
- /// Constructs the excess RP model; determines the excess pressure w.r.t. a
- /// maximum number of allowed VGPRs.
- ExcessRP(const GCNSubtarget &ST, const GCNRegPressure &RP, unsigned MaxVGPRs);
-
- /// Accounts for \p NumRegs saved ArchVGPRs in the model. If \p
- /// UseArchVGPRForAGPRSpill is true, saved ArchVGPRs are used to save excess
- /// AGPRs once excess ArchVGPR pressure has been eliminated. Returns whether
- /// saving these ArchVGPRs helped reduce excess pressure.
- bool saveArchVGPRs(unsigned NumRegs, bool UseArchVGPRForAGPRSpill);
-
- /// Accounts for \p NumRegs saved AGPRS in the model. Returns whether saving
- /// these ArchVGPRs helped reduce excess pressure.
- bool saveAGPRs(unsigned NumRegs);
-
- /// Returns whether there is any excess register pressure.
- operator bool() const { return ArchVGPRs != 0 || AGPRs != 0 || VGPRs != 0; }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- friend raw_ostream &operator<<(raw_ostream &OS, const ExcessRP &Excess) {
- OS << Excess.ArchVGPRs << " ArchVGPRs, " << Excess.AGPRs << " AGPRs, and "
- << Excess.VGPRs << " VGPRs (next ArchVGPR aligment in "
- << Excess.ArchVGPRsToAlignment << " registers)\n";
- return OS;
- }
-#endif
+void PreRARematStage::collectRematerializableInstructions() {
+ const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
+ for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
+ Register Reg = Register::index2VirtReg(I);
+ if (!DAG.LIS->hasInterval(Reg))
+ continue;
-private:
- static inline bool saveRegs(unsigned &LeftToSave, unsigned &NumRegs) {
- unsigned NumSaved = std::min(LeftToSave, NumRegs);
- NumRegs -= NumSaved;
- LeftToSave -= NumSaved;
- return NumSaved;
- }
-};
-} // namespace
-
-ExcessRP::ExcessRP(const GCNSubtarget &ST, const GCNRegPressure &RP,
- unsigned MaxVGPRs)
- : UnifiedRF(ST.hasGFX90AInsts()) {
- unsigned NumArchVGPRs = RP.getArchVGPRNum();
- unsigned NumAGPRs = RP.getAGPRNum();
- HasAGPRs = NumAGPRs;
-
- if (!UnifiedRF) {
- // Non-unified RF. Account for excess pressure for ArchVGPRs and AGPRs
- // independently.
- if (NumArchVGPRs > MaxVGPRs)
- ArchVGPRs = NumArchVGPRs - MaxVGPRs;
- if (NumAGPRs > MaxVGPRs)
- AGPRs = NumAGPRs - MaxVGPRs;
- return;
- }
+ // TODO: Handle AGPR and SGPR rematerialization
+ if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
+ !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg))
+ continue;
- // Independently of whether overall VGPR pressure is under the limit, we still
- // have to check whether ArchVGPR pressure or AGPR pressure alone exceeds the
- // number of addressable registers in each category.
- const unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
- if (NumArchVGPRs > MaxArchVGPRs) {
- ArchVGPRs = NumArchVGPRs - MaxArchVGPRs;
- NumArchVGPRs = MaxArchVGPRs;
- }
- if (NumAGPRs > MaxArchVGPRs) {
- AGPRs = NumAGPRs - MaxArchVGPRs;
- NumAGPRs = MaxArchVGPRs;
- }
+ MachineOperand *Op = DAG.MRI.getOneDef(Reg);
+ MachineInstr *Def = Op->getParent();
+ if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
+ continue;
- // Check overall VGPR usage against the limit; any excess above addressable
- // register limits has already been accounted for.
- const unsigned Granule = AMDGPU::IsaInfo::getArchVGPRAllocGranule();
- unsigned NumVGPRs = GCNRegPressure::getUnifiedVGPRNum(NumArchVGPRs, NumAGPRs);
- if (NumVGPRs > MaxVGPRs) {
- VGPRs = NumVGPRs - MaxVGPRs;
- ArchVGPRsToAlignment = NumArchVGPRs - alignDown(NumArchVGPRs, Granule);
- if (!ArchVGPRsToAlignment)
- ArchVGPRsToAlignment = Granule;
- }
-}
+ MachineInstr *UseI = &*DAG.MRI.use_instr_nodbg_begin(Reg);
+ if (Def->getParent() == UseI->getParent())
+ continue;
-bool ExcessRP::saveArchVGPRs(unsigned NumRegs, bool UseArchVGPRForAGPRSpill) {
- bool Progress = saveRegs(ArchVGPRs, NumRegs);
- if (!NumRegs)
- return Progress;
-
- if (!UnifiedRF) {
- if (UseArchVGPRForAGPRSpill)
- Progress |= saveRegs(AGPRs, NumRegs);
- } else if (HasAGPRs && (VGPRs || (UseArchVGPRForAGPRSpill && AGPRs))) {
- // There is progress as long as there are VGPRs left to save, even if the
- // save induced by this particular call does not cross an ArchVGPR alignment
- // barrier.
- Progress = true;
-
- // ArchVGPRs can only be allocated as a multiple of a granule in unified RF.
- unsigned NumSavedRegs = 0;
-
- // Count the number of whole ArchVGPR allocation granules we can save.
- const unsigned Granule = AMDGPU::IsaInfo::getArchVGPRAllocGranule();
- if (unsigned NumGranules = NumRegs / Granule; NumGranules) {
- NumSavedRegs = NumGranules * Granule;
- NumRegs -= NumSavedRegs;
+ bool HasRematDependency = false;
+ // Check if this instruction uses any registers that are planned to be
+ // rematerialized
+ for (auto &RematEntry : RematerializableInsts) {
+ if (find_if(RematEntry.second,
+ [&Def](std::pair<MachineInstr *, MachineInstr *> &Remat) {
+ for (MachineOperand &MO : Def->operands()) {
+ if (!MO.isReg())
+ continue;
+ if (MO.getReg() == Remat.first->getOperand(0).getReg())
+ return true;
+ }
+ return false;
+ }) != RematEntry.second.end()) {
+ HasRematDependency = true;
+ break;
+ }
}
+ // Do not rematerialize an instruction if it uses an instruction that we
+ // have designated for rematerialization.
+ // FIXME: Allow for rematerialization chains: this requires 1. updating
+ // remat points to account for uses that are rematerialized, and 2. either
+ // rematerializing the candidates in careful ordering, or deferring the MBB
+ // RP walk until the entire chain has been rematerialized.
+ if (HasRematDependency)
+ continue;
- // We may be able to save one more whole ArchVGPR allocation granule.
- if (NumRegs >= ArchVGPRsToAlignment) {
- NumSavedRegs += Granule;
- ArchVGPRsToAlignment = Granule - (NumRegs - ArchVGPRsToAlignment);
- } else {
- ArchVGPRsToAlignment -= NumRegs;
+ // Similarly, check if the UseI is planned to be remat.
+ for (auto &RematEntry : RematerializableInsts) {
+ if (find_if(RematEntry.second,
+ [&UseI](std::pair<MachineInstr *, MachineInstr *> &Remat) {
+ return Remat.first == UseI;
+ }) != RematEntry.second.end()) {
+ HasRematDependency = true;
+ break;
+ }
}
- // Prioritize saving generic VGPRs, then AGPRs if we allow AGPR-to-ArchVGPR
- // spilling and have some free ArchVGPR slots.
- saveRegs(VGPRs, NumSavedRegs);
- if (UseArchVGPRForAGPRSpill)
- saveRegs(AGPRs, NumSavedRegs);
- } else {
- // No AGPR usage in the region i.e., no allocation granule to worry about.
- Progress |= saveRegs(VGPRs, NumRegs);
- }
-
- return Progress;
-}
-
-bool ExcessRP::saveAGPRs(unsigned NumRegs) {
- return saveRegs(AGPRs, NumRegs) || saveRegs(VGPRs, NumRegs);
-}
-
-bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
- const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
-
- REMAT_DEBUG({
- dbgs() << "Collecting rematerializable instructions in ";
- MF.getFunction().printAsOperand(dbgs(), false);
- dbgs() << '\n';
- });
+ if (HasRematDependency)
+ break;
- // Maps optimizable regions (i.e., regions at minimum and VGPR-limited
- // occupancy, or regions with VGPR spilling) to a model of their excess RP.
- DenseMap<unsigned, ExcessRP> OptRegions;
- const Function &F = MF.getFunction();
-
- std::pair<unsigned, unsigned> WavesPerEU = ST.getWavesPerEU(F);
- const unsigned MaxSGPRsNoSpill = ST.getMaxNumSGPRs(F);
- const unsigned MaxVGPRsNoSpill = ST.getMaxNumVGPRs(F);
- const unsigned MaxSGPRsIncOcc =
- ST.getMaxNumSGPRs(DAG.MinOccupancy + 1, false);
- const unsigned MaxVGPRsIncOcc = ST.getMaxNumVGPRs(DAG.MinOccupancy + 1);
- IncreaseOccupancy = WavesPerEU.second > DAG.MinOccupancy;
-
- auto ClearOptRegionsIf = [&](bool Cond) -> bool {
- if (Cond) {
- // We won't try to increase occupancy.
- IncreaseOccupancy = false;
- OptRegions.clear();
- }
- return Cond;
- };
-
- // Collect optimizable regions. If there is spilling in any region we will
- // just try to reduce ArchVGPR spilling. Otherwise we will try to increase
- // occupancy by one in the whole function.
- for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
- GCNRegPressure &RP = DAG.Pressure[I];
-
- // Check whether SGPR pressures prevents us from eliminating spilling.
- unsigned NumSGPRs = RP.getSGPRNum();
- if (NumSGPRs > MaxSGPRsNoSpill)
- ClearOptRegionsIf(IncreaseOccupancy);
-
- ExcessRP Excess(ST, RP, MaxVGPRsNoSpill);
- if (Excess) {
- ClearOptRegionsIf(IncreaseOccupancy);
- } else if (IncreaseOccupancy) {
- // Check whether SGPR pressure prevents us from increasing occupancy.
- if (ClearOptRegionsIf(NumSGPRs > MaxSGPRsIncOcc)) {
- if (DAG.MinOccupancy >= WavesPerEU.first)
- return false;
- continue;
- }
- if ((Excess = ExcessRP(ST, RP, MaxVGPRsIncOcc))) {
- // We can only rematerialize ArchVGPRs at this point.
- unsigned NumArchVGPRsToRemat = Excess.ArchVGPRs + Excess.VGPRs;
- bool NotEnoughArchVGPRs = NumArchVGPRsToRemat > RP.getArchVGPRNum();
- if (ClearOptRegionsIf(Excess.AGPRs || NotEnoughArchVGPRs)) {
- if (DAG.MinOccupancy >= WavesPerEU.first)
- return false;
- continue;
+ // We are only collecting defs that are defined in another block and are
+ // live-through or used inside regions at MinOccupancy. This means that the
+ // register must be in the live-in set for the region.
+ bool AddedToRe...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/139341
More information about the llvm-commits
mailing list