[llvm] [AMDGPU] More accurately account for AVGPR pressure (PR #150711)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 29 11:49:04 PDT 2025
https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/150711
>From 52cbccd2cd6185be0499998ec969952865218169 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Wed, 23 Jul 2025 15:41:11 -0700
Subject: [PATCH 01/10] [AMDGPU] More accurately account for AVGPR pressure
Change-Id: I6f129c2723b52a391a96178e390f60535164ac9b
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 114 +++++++++++-------
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 94 ++++++++++-----
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 27 +++--
.../lib/Target/AMDGPU/SIFormMemoryClauses.cpp | 4 +-
4 files changed, 152 insertions(+), 87 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 334afd3a2a5b4..286c8d9529731 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -99,20 +99,22 @@ void GCNRegPressure::inc(unsigned Reg,
bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
unsigned MaxOccupancy) const {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
unsigned DynamicVGPRBlockSize =
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
const auto SGPROcc = std::min(MaxOccupancy,
ST.getOccupancyWithNumSGPRs(getSGPRNum()));
const auto VGPROcc = std::min(
- MaxOccupancy, ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts()),
- DynamicVGPRBlockSize));
+ MaxOccupancy,
+ ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs),
+ DynamicVGPRBlockSize));
const auto OtherSGPROcc = std::min(MaxOccupancy,
ST.getOccupancyWithNumSGPRs(O.getSGPRNum()));
- const auto OtherVGPROcc =
- std::min(MaxOccupancy,
- ST.getOccupancyWithNumVGPRs(O.getVGPRNum(ST.hasGFX90AInsts()),
- DynamicVGPRBlockSize));
+ const auto OtherVGPROcc = std::min(
+ MaxOccupancy, ST.getOccupancyWithNumVGPRs(
+ O.getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs),
+ DynamicVGPRBlockSize));
const auto Occ = std::min(SGPROcc, VGPROcc);
const auto OtherOcc = std::min(OtherSGPROcc, OtherVGPROcc);
@@ -135,35 +137,36 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
unsigned OtherVGPRForSGPRSpills =
(OtherExcessSGPR + (WaveSize - 1)) / WaveSize;
- unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
-
// Unified excess pressure conditions, accounting for VGPRs used for SGPR
// spills
unsigned ExcessVGPR =
- std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts()) +
+ std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) +
VGPRForSGPRSpills - MaxVGPRs),
0);
- unsigned OtherExcessVGPR =
- std::max(static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts()) +
- OtherVGPRForSGPRSpills - MaxVGPRs),
- 0);
+ unsigned OtherExcessVGPR = std::max(
+ static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) +
+ OtherVGPRForSGPRSpills - MaxVGPRs),
+ 0);
// Arch VGPR excess pressure conditions, accounting for VGPRs used for SGPR
// spills
- unsigned ExcessArchVGPR = std::max(
- static_cast<int>(getVGPRNum(false) + VGPRForSGPRSpills - MaxArchVGPRs),
- 0);
+ unsigned ExcessArchVGPR =
+ std::max(static_cast<int>(getVGPRNum(false, MaxArchVGPRs) +
+ VGPRForSGPRSpills - MaxArchVGPRs),
+ 0);
unsigned OtherExcessArchVGPR =
- std::max(static_cast<int>(O.getVGPRNum(false) + OtherVGPRForSGPRSpills -
- MaxArchVGPRs),
+ std::max(static_cast<int>(O.getVGPRNum(false, MaxArchVGPRs) +
+ OtherVGPRForSGPRSpills - MaxArchVGPRs),
0);
// AGPR excess pressure conditions
- unsigned ExcessAGPR = std::max(
- static_cast<int>(ST.hasGFX90AInsts() ? (getAGPRNum() - MaxArchVGPRs)
- : (getAGPRNum() - MaxVGPRs)),
- 0);
+ unsigned ExcessAGPR =
+ std::max(static_cast<int>(ST.hasGFX90AInsts()
+ ? (getAGPRNum(MaxArchVGPRs) - MaxArchVGPRs)
+ : (getAGPRNum(MaxArchVGPRs) - MaxVGPRs)),
+ 0);
unsigned OtherExcessAGPR = std::max(
- static_cast<int>(ST.hasGFX90AInsts() ? (O.getAGPRNum() - MaxArchVGPRs)
- : (O.getAGPRNum() - MaxVGPRs)),
+ static_cast<int>(ST.hasGFX90AInsts()
+ ? (O.getAGPRNum(MaxArchVGPRs) - MaxArchVGPRs)
+ : (O.getAGPRNum(MaxArchVGPRs) - MaxVGPRs)),
0);
bool ExcessRP = ExcessSGPR || ExcessVGPR || ExcessArchVGPR || ExcessAGPR;
@@ -184,14 +187,21 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
return VGPRDiff > 0;
if (SGPRDiff != 0) {
unsigned PureExcessVGPR =
- std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts()) - MaxVGPRs),
- 0) +
- std::max(static_cast<int>(getVGPRNum(false) - MaxArchVGPRs), 0);
+ std::max(
+ static_cast<int>(getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) -
+ MaxVGPRs),
+ 0) +
+ std::max(
+ static_cast<int>(getVGPRNum(false, MaxArchVGPRs) - MaxArchVGPRs),
+ 0);
unsigned OtherPureExcessVGPR =
std::max(
- static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts()) - MaxVGPRs),
+ static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) -
+ MaxVGPRs),
0) +
- std::max(static_cast<int>(O.getVGPRNum(false) - MaxArchVGPRs), 0);
+ std::max(static_cast<int>(O.getVGPRNum(false, MaxArchVGPRs) -
+ MaxArchVGPRs),
+ 0);
// If we have a special case where there is a tie in excess VGPR, but one
// of the pressures has VGPR usage from SGPR spills, prefer the pressure
@@ -221,33 +231,36 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
if (SW != OtherSW)
return SW < OtherSW;
} else {
- auto VW = getVGPRTuplesWeight();
- auto OtherVW = O.getVGPRTuplesWeight();
+ auto VW = getVGPRTuplesWeight(MaxArchVGPRs);
+ auto OtherVW = O.getVGPRTuplesWeight(MaxArchVGPRs);
if (VW != OtherVW)
return VW < OtherVW;
}
}
// Give final precedence to lower general RP.
- return SGPRImportant ? (getSGPRNum() < O.getSGPRNum()):
- (getVGPRNum(ST.hasGFX90AInsts()) <
- O.getVGPRNum(ST.hasGFX90AInsts()));
+ return SGPRImportant ? (getSGPRNum() < O.getSGPRNum())
+ : (getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) <
+ O.getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs));
}
Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST,
unsigned DynamicVGPRBlockSize) {
return Printable([&RP, ST, DynamicVGPRBlockSize](raw_ostream &OS) {
- OS << "VGPRs: " << RP.getArchVGPRNum() << ' '
- << "AGPRs: " << RP.getAGPRNum();
+ OS << "VGPRs: " << RP.getArchVGPRNum(ST->getAddressableNumArchVGPRs())
+ << ' ' << "AGPRs: " << RP.getAGPRNum(ST->getAddressableNumArchVGPRs());
if (ST)
OS << "(O"
- << ST->getOccupancyWithNumVGPRs(RP.getVGPRNum(ST->hasGFX90AInsts()),
- DynamicVGPRBlockSize)
+ << ST->getOccupancyWithNumVGPRs(
+ RP.getVGPRNum(ST->hasGFX90AInsts(),
+ ST->getAddressableNumArchVGPRs()),
+ DynamicVGPRBlockSize)
<< ')';
OS << ", SGPRs: " << RP.getSGPRNum();
if (ST)
OS << "(O" << ST->getOccupancyWithNumSGPRs(RP.getSGPRNum()) << ')';
- OS << ", LVGPR WT: " << RP.getVGPRTuplesWeight()
+ OS << ", LVGPR WT: "
+ << RP.getVGPRTuplesWeight(ST->getAddressableNumArchVGPRs())
<< ", LSGPR WT: " << RP.getSGPRTuplesWeight();
if (ST)
OS << " -> Occ: " << RP.getOccupancy(*ST, DynamicVGPRBlockSize);
@@ -398,8 +411,9 @@ void GCNRPTarget::setRegLimits(unsigned NumSGPRs, unsigned NumVGPRs,
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
unsigned DynamicVGPRBlockSize =
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
+ AddressableNumArchVGPRs = ST.getAddressableNumArchVGPRs();
MaxSGPRs = std::min(ST.getAddressableNumSGPRs(), NumSGPRs);
- MaxVGPRs = std::min(ST.getAddressableNumArchVGPRs(), NumVGPRs);
+ MaxVGPRs = std::min(AddressableNumArchVGPRs, NumVGPRs);
MaxUnifiedVGPRs =
ST.hasGFX90AInsts()
? std::min(ST.getAddressableNumVGPRs(DynamicVGPRBlockSize), NumVGPRs)
@@ -414,15 +428,21 @@ bool GCNRPTarget::isSaveBeneficial(Register Reg,
if (SRI->isSGPRClass(RC))
return RP.getSGPRNum() > MaxSGPRs;
- unsigned NumVGPRs =
- SRI->isAGPRClass(RC) ? RP.getAGPRNum() : RP.getArchVGPRNum();
+
+ bool ShouldUseAGPR =
+ SRI->isAGPRClass(RC) ||
+ (SRI->isVectorSuperClass(RC) &&
+ RP.getArchVGPRNum(AddressableNumArchVGPRs) >= AddressableNumArchVGPRs);
+ unsigned NumVGPRs = ShouldUseAGPR
+ ? RP.getAGPRNum(AddressableNumArchVGPRs)
+ : RP.getArchVGPRNum(AddressableNumArchVGPRs);
return isVGPRBankSaveBeneficial(NumVGPRs);
}
bool GCNRPTarget::satisfied() const {
if (RP.getSGPRNum() > MaxSGPRs)
return false;
- if (RP.getVGPRNum(false) > MaxVGPRs &&
+ if (RP.getVGPRNum(false, AddressableNumArchVGPRs) > MaxVGPRs &&
(!CombineVGPRSavings || !satisifiesVGPRBanksTarget()))
return false;
return satisfiesUnifiedTarget();
@@ -876,10 +896,12 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
OS << "---\nname: " << MF.getName() << "\nbody: |\n";
- auto printRP = [](const GCNRegPressure &RP) {
- return Printable([&RP](raw_ostream &OS) {
+ auto printRP = [&MF](const GCNRegPressure &RP) {
+ return Printable([&RP, &MF](raw_ostream &OS) {
OS << format(PFX " %-5d", RP.getSGPRNum())
- << format(" %-5d", RP.getVGPRNum(false));
+ << format(" %-5d",
+ RP.getVGPRNum(false, MF.getSubtarget<GCNSubtarget>()
+ .getAddressableNumArchVGPRs()));
});
};
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index ea33a229110c1..a8c1c3bfd8703 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -43,13 +43,13 @@ struct GCNRegPressure {
/// \returns the SGPR32 pressure
unsigned getSGPRNum() const { return Value[SGPR]; }
- /// \returns the aggregated ArchVGPR32, AccVGPR32, and Pseudo AVGPR pressure
- /// dependent upon \p UnifiedVGPRFile
- unsigned getVGPRNum(bool UnifiedVGPRFile) const {
+ unsigned getVGPRNum(bool UnifiedVGPRFile,
+ unsigned AddressableArchVGPR) const {
if (UnifiedVGPRFile) {
- return Value[AGPR]
- ? getUnifiedVGPRNum(Value[VGPR], Value[AGPR], Value[AVGPR])
- : Value[VGPR] + Value[AVGPR];
+ return Value[AGPR] || Value[AVGPR]
+ ? getUnifiedVGPRNum(Value[VGPR], Value[AGPR], Value[AVGPR],
+ AddressableArchVGPR)
+ : Value[VGPR];
}
// AVGPR assignment priority is based on the width of the register. Account
// AVGPR pressure as VGPR.
@@ -61,33 +61,60 @@ struct GCNRegPressure {
/// VGPR file.
inline static unsigned getUnifiedVGPRNum(unsigned NumArchVGPRs,
unsigned NumAGPRs,
- unsigned NumAVGPRs) {
-
- // Assume AVGPRs will be assigned as VGPRs.
- return alignTo(NumArchVGPRs + NumAVGPRs,
+ unsigned NumAVGPRs,
+ unsigned AddressableArchVGPR) {
+
+ // Until we hit the VGPRThreshold, we will assign AV as VGPR. After that
+ // point, we will assign as AGPR.
+ unsigned AVGPRsAsVGPRs =
+ NumArchVGPRs < AddressableArchVGPR
+ ? std::min((AddressableArchVGPR - NumArchVGPRs), NumAVGPRs)
+ : 0;
+ unsigned AVGPRsAsAGPRs =
+ NumAVGPRs > AVGPRsAsVGPRs ? NumAVGPRs - AVGPRsAsVGPRs : 0;
+ return alignTo(NumArchVGPRs + AVGPRsAsVGPRs,
AMDGPU::IsaInfo::getArchVGPRAllocGranule()) +
- NumAGPRs;
+ NumAGPRs + AVGPRsAsAGPRs;
}
/// \returns the ArchVGPR32 pressure, plus the AVGPRS which we assume will be
/// allocated as VGPR
- unsigned getArchVGPRNum() const { return Value[VGPR] + Value[AVGPR]; }
+ unsigned getArchVGPRNum(unsigned AddressableArchVGPR) const {
+ return std::min(Value[VGPR] + Value[AVGPR], AddressableArchVGPR);
+ }
/// \returns the AccVGPR32 pressure
- unsigned getAGPRNum() const { return Value[AGPR]; }
+ unsigned getAGPRNum(unsigned AddressableArchVGPR) const {
+ unsigned VGPRsForAGPRs =
+ Value[VGPR] + Value[AVGPR] > AddressableArchVGPR
+ ? (Value[VGPR] + Value[AVGPR] - AddressableArchVGPR)
+ : 0;
+ return Value[AGPR] + VGPRsForAGPRs;
+ }
/// \returns the AVGPR32 pressure
unsigned getAVGPRNum() const { return Value[AVGPR]; }
- unsigned getVGPRTuplesWeight() const {
- return std::max(Value[TOTAL_KINDS + VGPR] + Value[TOTAL_KINDS + AVGPR],
- Value[TOTAL_KINDS + AGPR]);
+ unsigned getVGPRTuplesWeight(unsigned AddressableArchVGPR) const {
+ unsigned AVGPRsAsVGPRs =
+ Value[TOTAL_KINDS + VGPR] < AddressableArchVGPR
+ ? std::min(AddressableArchVGPR - Value[TOTAL_KINDS + VGPR],
+ Value[TOTAL_KINDS + AVGPR])
+ : 0;
+ unsigned AVGPRsAsAGPRs = Value[TOTAL_KINDS + AVGPR] > AVGPRsAsVGPRs
+ ? Value[TOTAL_KINDS + AVGPR] - AVGPRsAsVGPRs
+ : 0;
+
+ return std::max(Value[TOTAL_KINDS + VGPR] + AVGPRsAsVGPRs,
+ Value[TOTAL_KINDS + AGPR] + AVGPRsAsAGPRs);
}
unsigned getSGPRTuplesWeight() const { return Value[TOTAL_KINDS + SGPR]; }
unsigned getOccupancy(const GCNSubtarget &ST,
unsigned DynamicVGPRBlockSize) const {
- return std::min(ST.getOccupancyWithNumSGPRs(getSGPRNum()),
- ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts()),
- DynamicVGPRBlockSize));
+ return std::min(
+ ST.getOccupancyWithNumSGPRs(getSGPRNum()),
+ ST.getOccupancyWithNumVGPRs(
+ getVGPRNum(ST.hasGFX90AInsts(), ST.getAddressableNumArchVGPRs()),
+ DynamicVGPRBlockSize));
}
void inc(unsigned Reg,
@@ -151,7 +178,7 @@ struct GCNRegPressure {
friend GCNRegPressure max(const GCNRegPressure &P1,
const GCNRegPressure &P2);
- friend Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST,
+ friend Printable print(const GCNRegPressure &RP,
unsigned DynamicVGPRBlockSize);
};
@@ -220,16 +247,19 @@ class GCNRPTarget {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
friend raw_ostream &operator<<(raw_ostream &OS, const GCNRPTarget &Target) {
OS << "Actual/Target: " << Target.RP.getSGPRNum() << '/' << Target.MaxSGPRs
- << " SGPRs, " << Target.RP.getArchVGPRNum() << '/' << Target.MaxVGPRs
- << " ArchVGPRs, " << Target.RP.getAGPRNum() << '/' << Target.MaxVGPRs
- << " AGPRs";
+ << " SGPRs, " << Target.RP.getArchVGPRNum(Target.AddressableNumArchVGPRs)
+ << '/' << Target.MaxVGPRs << " ArchVGPRs, "
+ << Target.RP.getAGPRNum(Target.AddressableNumArchVGPRs) << '/'
+ << Target.MaxVGPRs << " AGPRs";
if (Target.MaxUnifiedVGPRs) {
- OS << ", " << Target.RP.getVGPRNum(true) << '/' << Target.MaxUnifiedVGPRs
- << " VGPRs (unified)";
+ OS << ", " << Target.RP.getVGPRNum(true, Target.AddressableNumArchVGPRs)
+ << '/' << Target.MaxUnifiedVGPRs << " VGPRs (unified)";
} else if (Target.CombineVGPRSavings) {
- OS << ", " << Target.RP.getArchVGPRNum() + Target.RP.getAGPRNum() << '/'
- << 2 * Target.MaxVGPRs << " VGPRs (combined target)";
+ OS << ", "
+ << Target.RP.getArchVGPRNum(Target.AddressableNumArchVGPRs) +
+ Target.RP.getAGPRNum(Target.AddressableNumArchVGPRs)
+ << '/' << 2 * Target.MaxVGPRs << " VGPRs (combined target)";
}
return OS;
}
@@ -238,7 +268,6 @@ class GCNRPTarget {
private:
/// Current register pressure.
GCNRegPressure RP;
-
/// Target number of SGPRs.
unsigned MaxSGPRs;
/// Target number of ArchVGPRs and AGPRs.
@@ -246,6 +275,8 @@ class GCNRPTarget {
/// Target number of overall VGPRs for subtargets with unified RFs. Always 0
/// for subtargets with non-unified RFs.
unsigned MaxUnifiedVGPRs;
+ /// The maximum number of arch vgprs allowed by the subtarget.
+ unsigned AddressableNumArchVGPRs;
/// Whether we consider that the register allocator will be able to swap
/// between ArchVGPRs and AGPRs by copying them to a super register class.
/// Concretely, this allows savings in one of the VGPR banks to help toward
@@ -254,12 +285,15 @@ class GCNRPTarget {
inline bool satisifiesVGPRBanksTarget() const {
assert(CombineVGPRSavings && "only makes sense with combined savings");
- return RP.getArchVGPRNum() + RP.getAGPRNum() <= 2 * MaxVGPRs;
+ return RP.getArchVGPRNum(AddressableNumArchVGPRs) +
+ RP.getAGPRNum(AddressableNumArchVGPRs) <=
+ 2 * MaxVGPRs;
}
/// Always satisified when the subtarget doesn't have a unified RF.
inline bool satisfiesUnifiedTarget() const {
- return !MaxUnifiedVGPRs || RP.getVGPRNum(true) <= MaxUnifiedVGPRs;
+ return !MaxUnifiedVGPRs ||
+ RP.getVGPRNum(true, AddressableNumArchVGPRs) <= MaxUnifiedVGPRs;
}
inline bool isVGPRBankSaveBeneficial(unsigned NumVGPRs) const {
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index ce1ce687d0038..772c979809b75 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -190,10 +190,13 @@ static void getRegisterPressures(
TempUpwardTracker.recede(*MI);
NewPressure = TempUpwardTracker.getPressure();
}
+ unsigned AddressableArchVGPR =
+ DAG->MF.getSubtarget<GCNSubtarget>().getAddressableNumArchVGPRs();
Pressure[AMDGPU::RegisterPressureSets::SReg_32] = NewPressure.getSGPRNum();
Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
- NewPressure.getArchVGPRNum();
- Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = NewPressure.getAGPRNum();
+ NewPressure.getArchVGPRNum(AddressableArchVGPR);
+ Pressure[AMDGPU::RegisterPressureSets::AGPR_32] =
+ NewPressure.getAGPRNum(AddressableArchVGPR);
}
void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
@@ -339,7 +342,8 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
? static_cast<GCNRPTracker *>(&UpwardTracker)
: static_cast<GCNRPTracker *>(&DownwardTracker);
SGPRPressure = T->getPressure().getSGPRNum();
- VGPRPressure = T->getPressure().getArchVGPRNum();
+ VGPRPressure = T->getPressure().getArchVGPRNum(
+ DAG->MF.getSubtarget<GCNSubtarget>().getAddressableNumArchVGPRs());
}
}
ReadyQueue &Q = Zone.Available;
@@ -1279,9 +1283,10 @@ void GCNSchedStage::checkScheduling() {
LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
unsigned DynamicVGPRBlockSize = DAG.MFI.getDynamicVGPRBlockSize();
-
+ unsigned AddressableArchVGPR = ST.getAddressableNumArchVGPRs();
if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
- PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) {
+ PressureAfter.getVGPRNum(ST.hasGFX90AInsts(), AddressableArchVGPR) <=
+ S.VGPRCriticalLimit) {
DAG.Pressure[RegionIdx] = PressureAfter;
DAG.RegionsWithMinOcc[RegionIdx] =
PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize) ==
@@ -1331,9 +1336,10 @@ void GCNSchedStage::checkScheduling() {
unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
- if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) > MaxVGPRs ||
- PressureAfter.getArchVGPRNum() > MaxArchVGPRs ||
- PressureAfter.getAGPRNum() > MaxArchVGPRs ||
+ if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts(), AddressableArchVGPR) >
+ MaxVGPRs ||
+ PressureAfter.getArchVGPRNum(AddressableArchVGPR) > MaxArchVGPRs ||
+ PressureAfter.getAGPRNum(AddressableArchVGPR) > MaxArchVGPRs ||
PressureAfter.getSGPRNum() > MaxSGPRs) {
DAG.RegionsWithHighRP[RegionIdx] = true;
DAG.RegionsWithExcessRP[RegionIdx] = true;
@@ -1471,12 +1477,13 @@ bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {
// For dynamic VGPR mode, we don't want to waste any VGPR blocks.
if (DAG.MFI.isDynamicVGPREnabled()) {
+ unsigned AddressableArchVGPR = ST.getAddressableNumArchVGPRs();
unsigned BlocksBefore = AMDGPU::IsaInfo::getAllocatedNumVGPRBlocks(
&ST, DAG.MFI.getDynamicVGPRBlockSize(),
- PressureBefore.getVGPRNum(false));
+ PressureBefore.getVGPRNum(false, AddressableArchVGPR));
unsigned BlocksAfter = AMDGPU::IsaInfo::getAllocatedNumVGPRBlocks(
&ST, DAG.MFI.getDynamicVGPRBlockSize(),
- PressureAfter.getVGPRNum(false));
+ PressureAfter.getVGPRNum(false, AddressableArchVGPR));
if (BlocksAfter > BlocksBefore)
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index 6b13b06590102..844908a5ce8d9 100644
--- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -211,7 +211,9 @@ bool SIFormMemoryClausesImpl::checkPressure(const MachineInstr &MI,
// tracking does not account for the alignment requirements for SGPRs, or the
// fragmentation of registers the allocator will need to satisfy.
if (Occupancy >= MFI->getMinAllowedOccupancy() &&
- MaxPressure.getVGPRNum(ST->hasGFX90AInsts()) <= MaxVGPRs / 2 &&
+ MaxPressure.getVGPRNum(ST->hasGFX90AInsts(),
+ ST->getAddressableNumArchVGPRs()) <=
+ MaxVGPRs / 2 &&
MaxPressure.getSGPRNum() <= MaxSGPRs / 2) {
LastRecordedOccupancy = Occupancy;
return true;
>From 99a540244f4c848c9d228b10c6fa33de605c64d4 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 28 Jul 2025 13:05:45 -0700
Subject: [PATCH 02/10] Handle gfx908 case
Change-Id: Ic16c8a4ffdf58027de164c598cfac70fc453bb00
---
.../Target/AMDGPU/GCNIterativeScheduler.cpp | 34 ++---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 127 +++++++++---------
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 24 ++--
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 77 ++++++-----
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 9 ++
.../lib/Target/AMDGPU/SIFormMemoryClauses.cpp | 5 +-
.../AMDGPU/debug-value-scheduler-liveins.mir | 2 +-
7 files changed, 154 insertions(+), 124 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index f253a841f16a6..050e47270498b 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -451,7 +451,7 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
const unsigned DynamicVGPRBlockSize =
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
const auto Occ =
- Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
+ Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize, MF);
LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc
<< ", current = " << Occ << '\n');
@@ -460,7 +460,7 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
// Always build the DAG to add mutations
BuildDAG DAG(*R, *this);
- if (R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >= NewOcc)
+ if (R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize, MF) >= NewOcc)
continue;
LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3);
@@ -471,7 +471,7 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n";
printSchedRP(dbgs(), R->MaxPressure, MaxRP));
- NewOcc = std::min(NewOcc, MaxRP.getOccupancy(ST, DynamicVGPRBlockSize));
+ NewOcc = std::min(NewOcc, MaxRP.getOccupancy(ST, DynamicVGPRBlockSize, MF));
if (NewOcc <= Occ)
break;
@@ -496,7 +496,7 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
sortRegionsByPressure(TgtOcc);
auto Occ =
- Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
+ Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize, MF);
bool IsReentry = false;
if (TryMaximizeOccupancy && Occ < TgtOcc) {
@@ -527,21 +527,22 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
const auto RP = getRegionPressure(*R);
LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
- if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) {
+ if (RP.getOccupancy(ST, DynamicVGPRBlockSize, MF) < TgtOcc) {
LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
- if (R->BestSchedule.get() && R->BestSchedule->MaxPressure.getOccupancy(
- ST, DynamicVGPRBlockSize) >= TgtOcc) {
+ if (R->BestSchedule.get() &&
+ R->BestSchedule->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize,
+ MF) >= TgtOcc) {
LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
scheduleBest(*R);
} else {
LLVM_DEBUG(dbgs() << ", restoring\n");
Ovr.restoreOrder();
- assert(R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >=
+ assert(R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize, MF) >=
TgtOcc);
}
}
- FinalOccupancy =
- std::min(FinalOccupancy, RP.getOccupancy(ST, DynamicVGPRBlockSize));
+ FinalOccupancy = std::min(FinalOccupancy,
+ RP.getOccupancy(ST, DynamicVGPRBlockSize, MF));
}
}
MFI->limitOccupancy(FinalOccupancy);
@@ -591,7 +592,7 @@ void GCNIterativeScheduler::scheduleILP(
sortRegionsByPressure(TgtOcc);
auto Occ =
- Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
+ Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize, MF);
bool IsReentry = false;
if (TryMaximizeOccupancy && Occ < TgtOcc) {
@@ -612,18 +613,19 @@ void GCNIterativeScheduler::scheduleILP(
const auto RP = getSchedulePressure(*R, ILPSchedule);
LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
- if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) {
+ if (RP.getOccupancy(ST, DynamicVGPRBlockSize, MF) < TgtOcc) {
LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
- if (R->BestSchedule.get() && R->BestSchedule->MaxPressure.getOccupancy(
- ST, DynamicVGPRBlockSize) >= TgtOcc) {
+ if (R->BestSchedule.get() &&
+ R->BestSchedule->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize,
+ MF) >= TgtOcc) {
LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
scheduleBest(*R);
}
} else {
scheduleRegion(*R, ILPSchedule, RP);
LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
- FinalOccupancy =
- std::min(FinalOccupancy, RP.getOccupancy(ST, DynamicVGPRBlockSize));
+ FinalOccupancy = std::min(FinalOccupancy,
+ RP.getOccupancy(ST, DynamicVGPRBlockSize, MF));
}
}
MFI->limitOccupancy(FinalOccupancy);
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 286c8d9529731..786b45902ae48 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -99,21 +99,21 @@ void GCNRegPressure::inc(unsigned Reg,
bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
unsigned MaxOccupancy) const {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
+ unsigned ArchVGPRThreshold = ST.getArchVGPRAllocationThreshold(MF);
unsigned DynamicVGPRBlockSize =
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
const auto SGPROcc = std::min(MaxOccupancy,
ST.getOccupancyWithNumSGPRs(getSGPRNum()));
const auto VGPROcc = std::min(
- MaxOccupancy,
- ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs),
- DynamicVGPRBlockSize));
+ MaxOccupancy, ST.getOccupancyWithNumVGPRs(
+ getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold),
+ DynamicVGPRBlockSize));
const auto OtherSGPROcc = std::min(MaxOccupancy,
ST.getOccupancyWithNumSGPRs(O.getSGPRNum()));
const auto OtherVGPROcc = std::min(
MaxOccupancy, ST.getOccupancyWithNumVGPRs(
- O.getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs),
+ O.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold),
DynamicVGPRBlockSize));
const auto Occ = std::min(SGPROcc, VGPROcc);
@@ -139,34 +139,37 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
// Unified excess pressure conditions, accounting for VGPRs used for SGPR
// spills
- unsigned ExcessVGPR =
- std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) +
- VGPRForSGPRSpills - MaxVGPRs),
- 0);
+ unsigned ExcessVGPR = std::max(
+ static_cast<int>(getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) +
+ VGPRForSGPRSpills - MaxVGPRs),
+ 0);
unsigned OtherExcessVGPR = std::max(
- static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) +
+ static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) +
OtherVGPRForSGPRSpills - MaxVGPRs),
0);
// Arch VGPR excess pressure conditions, accounting for VGPRs used for SGPR
// spills
+ unsigned AddressableArchVGPRs = ST.getAddressableNumArchVGPRs();
unsigned ExcessArchVGPR =
- std::max(static_cast<int>(getVGPRNum(false, MaxArchVGPRs) +
- VGPRForSGPRSpills - MaxArchVGPRs),
+ std::max(static_cast<int>(getVGPRNum(false, ArchVGPRThreshold) +
+ VGPRForSGPRSpills - AddressableArchVGPRs),
0);
unsigned OtherExcessArchVGPR =
- std::max(static_cast<int>(O.getVGPRNum(false, MaxArchVGPRs) +
- OtherVGPRForSGPRSpills - MaxArchVGPRs),
+ std::max(static_cast<int>(O.getVGPRNum(false, ArchVGPRThreshold) +
+ OtherVGPRForSGPRSpills - AddressableArchVGPRs),
0);
// AGPR excess pressure conditions
unsigned ExcessAGPR =
- std::max(static_cast<int>(ST.hasGFX90AInsts()
- ? (getAGPRNum(MaxArchVGPRs) - MaxArchVGPRs)
- : (getAGPRNum(MaxArchVGPRs) - MaxVGPRs)),
+ std::max(static_cast<int>(
+ ST.hasGFX90AInsts()
+ ? (getAGPRNum(ArchVGPRThreshold) - AddressableArchVGPRs)
+ : (getAGPRNum(ArchVGPRThreshold) - MaxVGPRs)),
0);
unsigned OtherExcessAGPR = std::max(
- static_cast<int>(ST.hasGFX90AInsts()
- ? (O.getAGPRNum(MaxArchVGPRs) - MaxArchVGPRs)
- : (O.getAGPRNum(MaxArchVGPRs) - MaxVGPRs)),
+ static_cast<int>(
+ ST.hasGFX90AInsts()
+ ? (O.getAGPRNum(ArchVGPRThreshold) - AddressableArchVGPRs)
+ : (O.getAGPRNum(ArchVGPRThreshold) - MaxVGPRs)),
0);
bool ExcessRP = ExcessSGPR || ExcessVGPR || ExcessArchVGPR || ExcessAGPR;
@@ -187,20 +190,20 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
return VGPRDiff > 0;
if (SGPRDiff != 0) {
unsigned PureExcessVGPR =
- std::max(
- static_cast<int>(getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) -
- MaxVGPRs),
- 0) +
- std::max(
- static_cast<int>(getVGPRNum(false, MaxArchVGPRs) - MaxArchVGPRs),
- 0);
+ std::max(static_cast<int>(
+ getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) -
+ MaxVGPRs),
+ 0) +
+ std::max(static_cast<int>(getVGPRNum(false, ArchVGPRThreshold) -
+ AddressableArchVGPRs),
+ 0);
unsigned OtherPureExcessVGPR =
- std::max(
- static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) -
- MaxVGPRs),
- 0) +
- std::max(static_cast<int>(O.getVGPRNum(false, MaxArchVGPRs) -
- MaxArchVGPRs),
+ std::max(static_cast<int>(
+ O.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) -
+ MaxVGPRs),
+ 0) +
+ std::max(static_cast<int>(O.getVGPRNum(false, ArchVGPRThreshold) -
+ AddressableArchVGPRs),
0);
// If we have a special case where there is a tie in excess VGPR, but one
@@ -231,8 +234,8 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
if (SW != OtherSW)
return SW < OtherSW;
} else {
- auto VW = getVGPRTuplesWeight(MaxArchVGPRs);
- auto OtherVW = O.getVGPRTuplesWeight(MaxArchVGPRs);
+ auto VW = getVGPRTuplesWeight(ArchVGPRThreshold);
+ auto OtherVW = O.getVGPRTuplesWeight(ArchVGPRThreshold);
if (VW != OtherVW)
return VW < OtherVW;
}
@@ -240,32 +243,33 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
// Give final precedence to lower general RP.
return SGPRImportant ? (getSGPRNum() < O.getSGPRNum())
- : (getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) <
- O.getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs));
+ : (getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) <
+ O.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold));
}
Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST,
- unsigned DynamicVGPRBlockSize) {
- return Printable([&RP, ST, DynamicVGPRBlockSize](raw_ostream &OS) {
- OS << "VGPRs: " << RP.getArchVGPRNum(ST->getAddressableNumArchVGPRs())
- << ' ' << "AGPRs: " << RP.getAGPRNum(ST->getAddressableNumArchVGPRs());
- if (ST)
- OS << "(O"
- << ST->getOccupancyWithNumVGPRs(
- RP.getVGPRNum(ST->hasGFX90AInsts(),
- ST->getAddressableNumArchVGPRs()),
- DynamicVGPRBlockSize)
- << ')';
- OS << ", SGPRs: " << RP.getSGPRNum();
- if (ST)
- OS << "(O" << ST->getOccupancyWithNumSGPRs(RP.getSGPRNum()) << ')';
- OS << ", LVGPR WT: "
- << RP.getVGPRTuplesWeight(ST->getAddressableNumArchVGPRs())
- << ", LSGPR WT: " << RP.getSGPRTuplesWeight();
- if (ST)
- OS << " -> Occ: " << RP.getOccupancy(*ST, DynamicVGPRBlockSize);
- OS << '\n';
- });
+ unsigned DynamicVGPRBlockSize,
+ const MachineFunction *MF) {
+ unsigned ArchVGPRThreshold = ST->getArchVGPRAllocationThreshold(*MF);
+ return Printable(
+ [&RP, ST, DynamicVGPRBlockSize, ArchVGPRThreshold, MF](raw_ostream &OS) {
+ OS << "VGPRs: " << RP.getArchVGPRNum(ArchVGPRThreshold) << ' '
+ << "AGPRs: " << RP.getAGPRNum(ArchVGPRThreshold);
+ if (ST)
+ OS << "(O"
+ << ST->getOccupancyWithNumVGPRs(
+ RP.getVGPRNum(ST->hasGFX90AInsts(), ArchVGPRThreshold),
+ DynamicVGPRBlockSize)
+ << ')';
+ OS << ", SGPRs: " << RP.getSGPRNum();
+ if (ST)
+ OS << "(O" << ST->getOccupancyWithNumSGPRs(RP.getSGPRNum()) << ')';
+ OS << ", LVGPR WT: " << RP.getVGPRTuplesWeight(ArchVGPRThreshold)
+ << ", LSGPR WT: " << RP.getSGPRTuplesWeight();
+ if (ST)
+ OS << " -> Occ: " << RP.getOccupancy(*ST, DynamicVGPRBlockSize, *MF);
+ OS << '\n';
+ });
}
static LaneBitmask getDefRegMask(const MachineOperand &MO,
@@ -899,9 +903,10 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
auto printRP = [&MF](const GCNRegPressure &RP) {
return Printable([&RP, &MF](raw_ostream &OS) {
OS << format(PFX " %-5d", RP.getSGPRNum())
- << format(" %-5d",
- RP.getVGPRNum(false, MF.getSubtarget<GCNSubtarget>()
- .getAddressableNumArchVGPRs()));
+ << format(
+ " %-5d",
+ RP.getVGPRNum(false, MF.getSubtarget<GCNSubtarget>()
+ .getArchVGPRAllocationThreshold(MF)));
});
};
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index a8c1c3bfd8703..98eb35eaaca8e 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -108,13 +108,13 @@ struct GCNRegPressure {
}
unsigned getSGPRTuplesWeight() const { return Value[TOTAL_KINDS + SGPR]; }
- unsigned getOccupancy(const GCNSubtarget &ST,
- unsigned DynamicVGPRBlockSize) const {
- return std::min(
- ST.getOccupancyWithNumSGPRs(getSGPRNum()),
- ST.getOccupancyWithNumVGPRs(
- getVGPRNum(ST.hasGFX90AInsts(), ST.getAddressableNumArchVGPRs()),
- DynamicVGPRBlockSize));
+ unsigned getOccupancy(const GCNSubtarget &ST, unsigned DynamicVGPRBlockSize,
+ const MachineFunction &MF) const {
+ return std::min(ST.getOccupancyWithNumSGPRs(getSGPRNum()),
+ ST.getOccupancyWithNumVGPRs(
+ getVGPRNum(ST.hasGFX90AInsts(),
+ ST.getArchVGPRAllocationThreshold(MF)),
+ DynamicVGPRBlockSize));
}
void inc(unsigned Reg,
@@ -123,9 +123,10 @@ struct GCNRegPressure {
const MachineRegisterInfo &MRI);
bool higherOccupancy(const GCNSubtarget &ST, const GCNRegPressure &O,
- unsigned DynamicVGPRBlockSize) const {
- return getOccupancy(ST, DynamicVGPRBlockSize) >
- O.getOccupancy(ST, DynamicVGPRBlockSize);
+ unsigned DynamicVGPRBlockSize,
+ const MachineFunction &MF) const {
+ return getOccupancy(ST, DynamicVGPRBlockSize, MF) >
+ O.getOccupancy(ST, DynamicVGPRBlockSize, MF);
}
/// Compares \p this GCNRegpressure to \p O, returning true if \p this is
@@ -551,7 +552,8 @@ bool isEqual(const GCNRPTracker::LiveRegSet &S1,
const GCNRPTracker::LiveRegSet &S2);
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST = nullptr,
- unsigned DynamicVGPRBlockSize = 0);
+ unsigned DynamicVGPRBlockSize = 0,
+ const MachineFunction *MF = nullptr);
Printable print(const GCNRPTracker::LiveRegSet &LiveRegs,
const MachineRegisterInfo &MRI);
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 772c979809b75..2b61ad4e7a8d5 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -190,13 +190,14 @@ static void getRegisterPressures(
TempUpwardTracker.recede(*MI);
NewPressure = TempUpwardTracker.getPressure();
}
- unsigned AddressableArchVGPR =
- DAG->MF.getSubtarget<GCNSubtarget>().getAddressableNumArchVGPRs();
+ unsigned ArchVGPRThreshold =
+ DAG->MF.getSubtarget<GCNSubtarget>().getArchVGPRAllocationThreshold(
+ DAG->MF);
Pressure[AMDGPU::RegisterPressureSets::SReg_32] = NewPressure.getSGPRNum();
Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
- NewPressure.getArchVGPRNum(AddressableArchVGPR);
+ NewPressure.getArchVGPRNum(ArchVGPRThreshold);
Pressure[AMDGPU::RegisterPressureSets::AGPR_32] =
- NewPressure.getAGPRNum(AddressableArchVGPR);
+ NewPressure.getAGPRNum(ArchVGPRThreshold);
}
void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
@@ -343,7 +344,8 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
: static_cast<GCNRPTracker *>(&DownwardTracker);
SGPRPressure = T->getPressure().getSGPRNum();
VGPRPressure = T->getPressure().getArchVGPRNum(
- DAG->MF.getSubtarget<GCNSubtarget>().getAddressableNumArchVGPRs());
+ DAG->MF.getSubtarget<GCNSubtarget>().getArchVGPRAllocationThreshold(
+ DAG->MF));
}
}
ReadyQueue &Q = Zone.Available;
@@ -1144,8 +1146,9 @@ void UnclusteredHighRPStage::finalizeGCNSchedStage() {
if (DAG.MinOccupancy > InitialOccupancy) {
for (unsigned IDX = 0; IDX < DAG.Pressure.size(); ++IDX)
DAG.RegionsWithMinOcc[IDX] =
- DAG.Pressure[IDX].getOccupancy(
- DAG.ST, DAG.MFI.getDynamicVGPRBlockSize()) == DAG.MinOccupancy;
+ DAG.Pressure[IDX].getOccupancy(DAG.ST,
+ DAG.MFI.getDynamicVGPRBlockSize(),
+ DAG.MF) == DAG.MinOccupancy;
LLVM_DEBUG(dbgs() << StageID
<< " stage successfully increased occupancy to "
@@ -1197,8 +1200,10 @@ bool GCNSchedStage::initGCNRegion() {
dbgs() << "Pressure before scheduling:\nRegion live-ins:"
<< print(DAG.LiveIns[RegionIdx], DAG.MRI)
<< "Region live-in pressure: "
- << print(llvm::getRegPressure(DAG.MRI, DAG.LiveIns[RegionIdx]))
- << "Region register pressure: " << print(PressureBefore));
+ << print(llvm::getRegPressure(DAG.MRI, DAG.LiveIns[RegionIdx]),
+ &ST, 0, &MF)
+ << "Region register pressure: "
+ << print(PressureBefore, &ST, 0, &MF));
S.HasHighPressure = false;
S.KnownExcessRP = isRegionWithExcessRP();
@@ -1279,17 +1284,18 @@ void GCNSchedStage::checkScheduling() {
// Check the results of scheduling.
PressureAfter = DAG.getRealRegPressure(RegionIdx);
- LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));
+ LLVM_DEBUG(dbgs() << "Pressure after scheduling: "
+ << print(PressureAfter, &ST, 0, &MF));
LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
unsigned DynamicVGPRBlockSize = DAG.MFI.getDynamicVGPRBlockSize();
- unsigned AddressableArchVGPR = ST.getAddressableNumArchVGPRs();
+ unsigned ArchVGPRThreshold = ST.getArchVGPRAllocationThreshold(MF);
if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
- PressureAfter.getVGPRNum(ST.hasGFX90AInsts(), AddressableArchVGPR) <=
+ PressureAfter.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) <=
S.VGPRCriticalLimit) {
DAG.Pressure[RegionIdx] = PressureAfter;
DAG.RegionsWithMinOcc[RegionIdx] =
- PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize) ==
+ PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize, DAG.MF) ==
DAG.MinOccupancy;
// Early out if we have achieved the occupancy target.
@@ -1299,10 +1305,12 @@ void GCNSchedStage::checkScheduling() {
unsigned TargetOccupancy = std::min(
S.getTargetOccupancy(), ST.getOccupancyWithWorkGroupSizes(MF).second);
- unsigned WavesAfter = std::min(
- TargetOccupancy, PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize));
- unsigned WavesBefore = std::min(
- TargetOccupancy, PressureBefore.getOccupancy(ST, DynamicVGPRBlockSize));
+ unsigned WavesAfter =
+ std::min(TargetOccupancy,
+ PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize, DAG.MF));
+ unsigned WavesBefore =
+ std::min(TargetOccupancy,
+ PressureBefore.getOccupancy(ST, DynamicVGPRBlockSize, DAG.MF));
LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
<< ", after " << WavesAfter << ".\n");
@@ -1336,10 +1344,10 @@ void GCNSchedStage::checkScheduling() {
unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
- if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts(), AddressableArchVGPR) >
+ if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) >
MaxVGPRs ||
- PressureAfter.getArchVGPRNum(AddressableArchVGPR) > MaxArchVGPRs ||
- PressureAfter.getAGPRNum(AddressableArchVGPR) > MaxArchVGPRs ||
+ PressureAfter.getArchVGPRNum(ArchVGPRThreshold) > MaxArchVGPRs ||
+ PressureAfter.getAGPRNum(ArchVGPRThreshold) > MaxArchVGPRs ||
PressureAfter.getSGPRNum() > MaxSGPRs) {
DAG.RegionsWithHighRP[RegionIdx] = true;
DAG.RegionsWithExcessRP[RegionIdx] = true;
@@ -1352,7 +1360,7 @@ void GCNSchedStage::checkScheduling() {
} else {
DAG.Pressure[RegionIdx] = PressureAfter;
DAG.RegionsWithMinOcc[RegionIdx] =
- PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize) ==
+ PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize, DAG.MF) ==
DAG.MinOccupancy;
}
}
@@ -1477,13 +1485,13 @@ bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {
// For dynamic VGPR mode, we don't want to waste any VGPR blocks.
if (DAG.MFI.isDynamicVGPREnabled()) {
- unsigned AddressableArchVGPR = ST.getAddressableNumArchVGPRs();
+ unsigned ArchVGPRThreshold = ST.getArchVGPRAllocationThreshold(MF);
unsigned BlocksBefore = AMDGPU::IsaInfo::getAllocatedNumVGPRBlocks(
&ST, DAG.MFI.getDynamicVGPRBlockSize(),
- PressureBefore.getVGPRNum(false, AddressableArchVGPR));
+ PressureBefore.getVGPRNum(false, ArchVGPRThreshold));
unsigned BlocksAfter = AMDGPU::IsaInfo::getAllocatedNumVGPRBlocks(
&ST, DAG.MFI.getDynamicVGPRBlockSize(),
- PressureAfter.getVGPRNum(false, AddressableArchVGPR));
+ PressureAfter.getVGPRNum(false, ArchVGPRThreshold));
if (BlocksAfter > BlocksBefore)
return true;
}
@@ -1507,8 +1515,8 @@ bool OccInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) {
// If RP is not reduced in the unclustered reschedule stage, revert to the
// old schedule.
- if ((WavesAfter <=
- PressureBefore.getOccupancy(ST, DAG.MFI.getDynamicVGPRBlockSize()) &&
+ if ((WavesAfter <= PressureBefore.getOccupancy(
+ ST, DAG.MFI.getDynamicVGPRBlockSize(), DAG.MF) &&
mayCauseSpilling(WavesAfter)) ||
GCNSchedStage::shouldRevertScheduling(WavesAfter)) {
LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
@@ -1530,9 +1538,10 @@ bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) {
ScheduleMetrics MAfter = getScheduleMetrics(DAG);
unsigned OldMetric = MBefore.getMetric();
unsigned NewMetric = MAfter.getMetric();
- unsigned WavesBefore = std::min(
- S.getTargetOccupancy(),
- PressureBefore.getOccupancy(ST, DAG.MFI.getDynamicVGPRBlockSize()));
+ unsigned WavesBefore =
+ std::min(S.getTargetOccupancy(),
+ PressureBefore.getOccupancy(
+ ST, DAG.MFI.getDynamicVGPRBlockSize(), DAG.MF));
unsigned Profit =
((WavesAfter * ScheduleMetrics::ScaleFactor) / WavesBefore *
((OldMetric + ScheduleMetricBias) * ScheduleMetrics::ScaleFactor) /
@@ -1586,8 +1595,8 @@ bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
void GCNSchedStage::revertScheduling() {
DAG.RegionsWithMinOcc[RegionIdx] =
- PressureBefore.getOccupancy(ST, DAG.MFI.getDynamicVGPRBlockSize()) ==
- DAG.MinOccupancy;
+ PressureBefore.getOccupancy(ST, DAG.MFI.getDynamicVGPRBlockSize(),
+ DAG.MF) == DAG.MinOccupancy;
LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
DAG.RegionEnd = DAG.RegionBegin;
int SkippedDebugInstr = 0;
@@ -2025,8 +2034,10 @@ void PreRARematStage::rematerialize() {
}
DAG.Pressure[I] = RP;
AchievedOcc = std::min(
- AchievedOcc, RP.getOccupancy(ST, MF.getInfo<SIMachineFunctionInfo>()
- ->getDynamicVGPRBlockSize()));
+ AchievedOcc,
+ RP.getOccupancy(
+ ST, MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize(),
+ DAG.MF));
}
REMAT_DEBUG(dbgs() << "Achieved occupancy " << AchievedOcc << "\n");
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 10ded0e1d1c3a..a259b90545ee9 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1629,6 +1629,15 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return AMDGPU::IsaInfo::getAddressableNumArchVGPRs(this);
}
+ unsigned getArchVGPRAllocationThreshold(const MachineFunction &MF) const {
+ if (hasGFX90AInsts() || !hasMAIInsts())
+ return AMDGPU::IsaInfo::getAddressableNumArchVGPRs(this);
+
+ const Function &F = MF.getFunction();
+ std::pair<unsigned, unsigned> Waves = getWavesPerEU(F);
+ return getMaxNumVGPRs(Waves.first, 0);
+ }
+
/// \returns Addressable number of VGPRs supported by the subtarget.
unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const {
return AMDGPU::IsaInfo::getAddressableNumVGPRs(this, DynamicVGPRBlockSize);
diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index 844908a5ce8d9..f4cf8f4e03df8 100644
--- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -199,7 +199,8 @@ bool SIFormMemoryClausesImpl::checkPressure(const MachineInstr &MI,
GCNRegPressure MaxPressure = RPT.moveMaxPressure();
unsigned Occupancy = MaxPressure.getOccupancy(
*ST,
- MI.getMF()->getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize());
+ MI.getMF()->getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize(),
+ *MI.getMF());
// Don't push over half the register budget. We don't want to introduce
// spilling just to form a soft clause.
@@ -212,7 +213,7 @@ bool SIFormMemoryClausesImpl::checkPressure(const MachineInstr &MI,
// fragmentation of registers the allocator will need to satisfy.
if (Occupancy >= MFI->getMinAllowedOccupancy() &&
MaxPressure.getVGPRNum(ST->hasGFX90AInsts(),
- ST->getAddressableNumArchVGPRs()) <=
+ ST->getArchVGPRAllocationThreshold(*MI.getMF())) <=
MaxVGPRs / 2 &&
MaxPressure.getSGPRNum() <= MaxSGPRs / 2) {
LastRecordedOccupancy = Occupancy;
diff --git a/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-liveins.mir b/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-liveins.mir
index 2a08c52e447ba..72181346764fb 100644
--- a/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-liveins.mir
+++ b/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-liveins.mir
@@ -6,7 +6,7 @@
# CHECK-NEXT: test_get_liveins:%bb.0
# CHECK: ********** MI Scheduling **********
# CHECK-NEXT: test_get_liveins:%bb.1
-# CHECK: Region live-in pressure: VGPRs: 1 AGPRs: 0, SGPRs: 0, LVGPR WT: 0, LSGPR WT: 0
+# CHECK: Region live-in pressure: VGPRs: 1 AGPRs: 0(O10), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 10
# CHECK: ScheduleDAGMILive::schedule starting
---
>From 1f24d721d1bb0ef89fe91a787cf0941edc63816b Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 28 Jul 2025 13:19:52 -0700
Subject: [PATCH 03/10] Cleanup signature for getOccupancy
Change-Id: I0b74f6ee1d93bd5e6fc3e285c0c6e91a8090d28e
---
.../Target/AMDGPU/GCNIterativeScheduler.cpp | 39 ++++++-------------
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 2 +-
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 14 ++++---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 33 +++++-----------
.../lib/Target/AMDGPU/SIFormMemoryClauses.cpp | 5 +--
5 files changed, 31 insertions(+), 62 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index 050e47270498b..2c833abedbfb7 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -447,11 +447,7 @@ void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
// BestSchedules aren't deleted on fail.
unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
// TODO: assert Regions are sorted descending by pressure
- const auto &ST = MF.getSubtarget<GCNSubtarget>();
- const unsigned DynamicVGPRBlockSize =
- MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
- const auto Occ =
- Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize, MF);
+ const auto Occ = Regions.front()->MaxPressure.getOccupancy(MF);
LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc
<< ", current = " << Occ << '\n');
@@ -460,7 +456,7 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
// Always build the DAG to add mutations
BuildDAG DAG(*R, *this);
- if (R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize, MF) >= NewOcc)
+ if (R->MaxPressure.getOccupancy(MF) >= NewOcc)
continue;
LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3);
@@ -471,7 +467,7 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n";
printSchedRP(dbgs(), R->MaxPressure, MaxRP));
- NewOcc = std::min(NewOcc, MaxRP.getOccupancy(ST, DynamicVGPRBlockSize, MF));
+ NewOcc = std::min(NewOcc, MaxRP.getOccupancy(MF));
if (NewOcc <= Occ)
break;
@@ -489,14 +485,11 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
bool TryMaximizeOccupancy) {
- const auto &ST = MF.getSubtarget<GCNSubtarget>();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
auto TgtOcc = MFI->getMinAllowedOccupancy();
- unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize();
sortRegionsByPressure(TgtOcc);
- auto Occ =
- Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize, MF);
+ auto Occ = Regions.front()->MaxPressure.getOccupancy(MF);
bool IsReentry = false;
if (TryMaximizeOccupancy && Occ < TgtOcc) {
@@ -527,22 +520,19 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
const auto RP = getRegionPressure(*R);
LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
- if (RP.getOccupancy(ST, DynamicVGPRBlockSize, MF) < TgtOcc) {
+ if (RP.getOccupancy(MF) < TgtOcc) {
LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
if (R->BestSchedule.get() &&
- R->BestSchedule->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize,
- MF) >= TgtOcc) {
+ R->BestSchedule->MaxPressure.getOccupancy(MF) >= TgtOcc) {
LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
scheduleBest(*R);
} else {
LLVM_DEBUG(dbgs() << ", restoring\n");
Ovr.restoreOrder();
- assert(R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize, MF) >=
- TgtOcc);
+ assert(R->MaxPressure.getOccupancy(MF) >= TgtOcc);
}
}
- FinalOccupancy = std::min(FinalOccupancy,
- RP.getOccupancy(ST, DynamicVGPRBlockSize, MF));
+ FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(MF));
}
}
MFI->limitOccupancy(FinalOccupancy);
@@ -585,14 +575,11 @@ void GCNIterativeScheduler::scheduleMinReg(bool force) {
void GCNIterativeScheduler::scheduleILP(
bool TryMaximizeOccupancy) {
- const auto &ST = MF.getSubtarget<GCNSubtarget>();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
auto TgtOcc = MFI->getMinAllowedOccupancy();
- unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize();
sortRegionsByPressure(TgtOcc);
- auto Occ =
- Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize, MF);
+ auto Occ = Regions.front()->MaxPressure.getOccupancy(MF);
bool IsReentry = false;
if (TryMaximizeOccupancy && Occ < TgtOcc) {
@@ -613,19 +600,17 @@ void GCNIterativeScheduler::scheduleILP(
const auto RP = getSchedulePressure(*R, ILPSchedule);
LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
- if (RP.getOccupancy(ST, DynamicVGPRBlockSize, MF) < TgtOcc) {
+ if (RP.getOccupancy(MF) < TgtOcc) {
LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
if (R->BestSchedule.get() &&
- R->BestSchedule->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize,
- MF) >= TgtOcc) {
+ R->BestSchedule->MaxPressure.getOccupancy(MF) >= TgtOcc) {
LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
scheduleBest(*R);
}
} else {
scheduleRegion(*R, ILPSchedule, RP);
LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
- FinalOccupancy = std::min(FinalOccupancy,
- RP.getOccupancy(ST, DynamicVGPRBlockSize, MF));
+ FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(MF));
}
}
MFI->limitOccupancy(FinalOccupancy);
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 786b45902ae48..9ee171e1b9999 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -267,7 +267,7 @@ Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST,
OS << ", LVGPR WT: " << RP.getVGPRTuplesWeight(ArchVGPRThreshold)
<< ", LSGPR WT: " << RP.getSGPRTuplesWeight();
if (ST)
- OS << " -> Occ: " << RP.getOccupancy(*ST, DynamicVGPRBlockSize, *MF);
+ OS << " -> Occ: " << RP.getOccupancy(*MF);
OS << '\n';
});
}
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 98eb35eaaca8e..9eb86017adafc 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -18,6 +18,7 @@
#define LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
#include "GCNSubtarget.h"
+#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include <algorithm>
@@ -108,8 +109,11 @@ struct GCNRegPressure {
}
unsigned getSGPRTuplesWeight() const { return Value[TOTAL_KINDS + SGPR]; }
- unsigned getOccupancy(const GCNSubtarget &ST, unsigned DynamicVGPRBlockSize,
- const MachineFunction &MF) const {
+ unsigned getOccupancy(const MachineFunction &MF) const {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ unsigned DynamicVGPRBlockSize =
+ MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
+
return std::min(ST.getOccupancyWithNumSGPRs(getSGPRNum()),
ST.getOccupancyWithNumVGPRs(
getVGPRNum(ST.hasGFX90AInsts(),
@@ -122,11 +126,9 @@ struct GCNRegPressure {
LaneBitmask NewMask,
const MachineRegisterInfo &MRI);
- bool higherOccupancy(const GCNSubtarget &ST, const GCNRegPressure &O,
- unsigned DynamicVGPRBlockSize,
+ bool higherOccupancy(const GCNRegPressure &O,
const MachineFunction &MF) const {
- return getOccupancy(ST, DynamicVGPRBlockSize, MF) >
- O.getOccupancy(ST, DynamicVGPRBlockSize, MF);
+ return getOccupancy(MF) > O.getOccupancy(MF);
}
/// Compares \p this GCNRegpressure to \p O, returning true if \p this is
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 2b61ad4e7a8d5..ef3dcea2fcac5 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1146,9 +1146,7 @@ void UnclusteredHighRPStage::finalizeGCNSchedStage() {
if (DAG.MinOccupancy > InitialOccupancy) {
for (unsigned IDX = 0; IDX < DAG.Pressure.size(); ++IDX)
DAG.RegionsWithMinOcc[IDX] =
- DAG.Pressure[IDX].getOccupancy(DAG.ST,
- DAG.MFI.getDynamicVGPRBlockSize(),
- DAG.MF) == DAG.MinOccupancy;
+ DAG.Pressure[IDX].getOccupancy(DAG.MF) == DAG.MinOccupancy;
LLVM_DEBUG(dbgs() << StageID
<< " stage successfully increased occupancy to "
@@ -1288,15 +1286,13 @@ void GCNSchedStage::checkScheduling() {
<< print(PressureAfter, &ST, 0, &MF));
LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
- unsigned DynamicVGPRBlockSize = DAG.MFI.getDynamicVGPRBlockSize();
unsigned ArchVGPRThreshold = ST.getArchVGPRAllocationThreshold(MF);
if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
PressureAfter.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) <=
S.VGPRCriticalLimit) {
DAG.Pressure[RegionIdx] = PressureAfter;
DAG.RegionsWithMinOcc[RegionIdx] =
- PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize, DAG.MF) ==
- DAG.MinOccupancy;
+ PressureAfter.getOccupancy(DAG.MF) == DAG.MinOccupancy;
// Early out if we have achieved the occupancy target.
LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
@@ -1306,11 +1302,9 @@ void GCNSchedStage::checkScheduling() {
unsigned TargetOccupancy = std::min(
S.getTargetOccupancy(), ST.getOccupancyWithWorkGroupSizes(MF).second);
unsigned WavesAfter =
- std::min(TargetOccupancy,
- PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize, DAG.MF));
+ std::min(TargetOccupancy, PressureAfter.getOccupancy(DAG.MF));
unsigned WavesBefore =
- std::min(TargetOccupancy,
- PressureBefore.getOccupancy(ST, DynamicVGPRBlockSize, DAG.MF));
+ std::min(TargetOccupancy, PressureBefore.getOccupancy(DAG.MF));
LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
<< ", after " << WavesAfter << ".\n");
@@ -1360,8 +1354,7 @@ void GCNSchedStage::checkScheduling() {
} else {
DAG.Pressure[RegionIdx] = PressureAfter;
DAG.RegionsWithMinOcc[RegionIdx] =
- PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize, DAG.MF) ==
- DAG.MinOccupancy;
+ PressureAfter.getOccupancy(DAG.MF) == DAG.MinOccupancy;
}
}
@@ -1515,8 +1508,7 @@ bool OccInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) {
// If RP is not reduced in the unclustered reschedule stage, revert to the
// old schedule.
- if ((WavesAfter <= PressureBefore.getOccupancy(
- ST, DAG.MFI.getDynamicVGPRBlockSize(), DAG.MF) &&
+ if ((WavesAfter <= PressureBefore.getOccupancy(DAG.MF) &&
mayCauseSpilling(WavesAfter)) ||
GCNSchedStage::shouldRevertScheduling(WavesAfter)) {
LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
@@ -1539,9 +1531,7 @@ bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) {
unsigned OldMetric = MBefore.getMetric();
unsigned NewMetric = MAfter.getMetric();
unsigned WavesBefore =
- std::min(S.getTargetOccupancy(),
- PressureBefore.getOccupancy(
- ST, DAG.MFI.getDynamicVGPRBlockSize(), DAG.MF));
+ std::min(S.getTargetOccupancy(), PressureBefore.getOccupancy(DAG.MF));
unsigned Profit =
((WavesAfter * ScheduleMetrics::ScaleFactor) / WavesBefore *
((OldMetric + ScheduleMetricBias) * ScheduleMetrics::ScaleFactor) /
@@ -1595,8 +1585,7 @@ bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
void GCNSchedStage::revertScheduling() {
DAG.RegionsWithMinOcc[RegionIdx] =
- PressureBefore.getOccupancy(ST, DAG.MFI.getDynamicVGPRBlockSize(),
- DAG.MF) == DAG.MinOccupancy;
+ PressureBefore.getOccupancy(DAG.MF) == DAG.MinOccupancy;
LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
DAG.RegionEnd = DAG.RegionBegin;
int SkippedDebugInstr = 0;
@@ -2033,11 +2022,7 @@ void PreRARematStage::rematerialize() {
}
}
DAG.Pressure[I] = RP;
- AchievedOcc = std::min(
- AchievedOcc,
- RP.getOccupancy(
- ST, MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize(),
- DAG.MF));
+ AchievedOcc = std::min(AchievedOcc, RP.getOccupancy(DAG.MF));
}
REMAT_DEBUG(dbgs() << "Achieved occupancy " << AchievedOcc << "\n");
}
diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index f4cf8f4e03df8..03d5ac6dec025 100644
--- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -197,10 +197,7 @@ bool SIFormMemoryClausesImpl::checkPressure(const MachineInstr &MI,
// pointer becomes dead and could otherwise be reused for destination.
RPT.advanceToNext();
GCNRegPressure MaxPressure = RPT.moveMaxPressure();
- unsigned Occupancy = MaxPressure.getOccupancy(
- *ST,
- MI.getMF()->getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize(),
- *MI.getMF());
+ unsigned Occupancy = MaxPressure.getOccupancy(*MI.getMF());
// Don't push over half the register budget. We don't want to introduce
// spilling just to form a soft clause.
>From c3351970a84afa0293f2160b7dce6bb3d48f8ef3 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 28 Jul 2025 19:13:14 -0700
Subject: [PATCH 04/10] Factor out getAVGPRSAs*GPRsNum
Change-Id: Ia3b8507f95763079ee3c2224655990a299c8854d
---
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 39 +++++++++++++++++--------
1 file changed, 27 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 9eb86017adafc..c7449b43a35b4 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -57,6 +57,24 @@ struct GCNRegPressure {
return std::max(Value[VGPR] + Value[AVGPR], Value[AGPR]);
}
+ inline static unsigned getAVGPRsAsVGPRsNum(unsigned NumArchVGPRs,
+ unsigned NumAVGPRs,
+ unsigned AddressableArchVGPR) {
+
+ return NumArchVGPRs < AddressableArchVGPR
+ ? std::min((AddressableArchVGPR - NumArchVGPRs), NumAVGPRs)
+ : 0;
+ }
+
+ inline static unsigned getAVGPRsAsAGPRsNum(unsigned NumArchVGPRs,
+ unsigned NumAGPRs,
+ unsigned NumAVGPRs,
+ unsigned AddressableArchVGPR) {
+ unsigned AVGPRsAsVGPRs =
+ getAVGPRsAsVGPRsNum(NumArchVGPRs, NumAVGPRs, AddressableArchVGPR);
+ return NumAVGPRs > AVGPRsAsVGPRs ? NumAVGPRs - AVGPRsAsVGPRs : 0;
+ }
+
/// Returns the aggregated VGPR pressure, assuming \p NumArchVGPRs ArchVGPRs
/// \p NumAGPRs AGPRS, and \p NumAVGPRs AVGPRs for a target with a unified
/// VGPR file.
@@ -68,11 +86,10 @@ struct GCNRegPressure {
// Until we hit the VGPRThreshold, we will assign AV as VGPR. After that
// point, we will assign as AGPR.
unsigned AVGPRsAsVGPRs =
- NumArchVGPRs < AddressableArchVGPR
- ? std::min((AddressableArchVGPR - NumArchVGPRs), NumAVGPRs)
- : 0;
- unsigned AVGPRsAsAGPRs =
- NumAVGPRs > AVGPRsAsVGPRs ? NumAVGPRs - AVGPRsAsVGPRs : 0;
+ getAVGPRsAsVGPRsNum(NumArchVGPRs, NumAVGPRs, AddressableArchVGPR);
+ unsigned AVGPRsAsAGPRs = getAVGPRsAsAGPRsNum(
+ NumArchVGPRs, NumAGPRs, NumAVGPRs, AddressableArchVGPR);
+ NumAVGPRs > AVGPRsAsVGPRs ? NumAVGPRs - AVGPRsAsVGPRs : 0;
return alignTo(NumArchVGPRs + AVGPRsAsVGPRs,
AMDGPU::IsaInfo::getArchVGPRAllocGranule()) +
NumAGPRs + AVGPRsAsAGPRs;
@@ -96,13 +113,11 @@ struct GCNRegPressure {
unsigned getVGPRTuplesWeight(unsigned AddressableArchVGPR) const {
unsigned AVGPRsAsVGPRs =
- Value[TOTAL_KINDS + VGPR] < AddressableArchVGPR
- ? std::min(AddressableArchVGPR - Value[TOTAL_KINDS + VGPR],
- Value[TOTAL_KINDS + AVGPR])
- : 0;
- unsigned AVGPRsAsAGPRs = Value[TOTAL_KINDS + AVGPR] > AVGPRsAsVGPRs
- ? Value[TOTAL_KINDS + AVGPR] - AVGPRsAsVGPRs
- : 0;
+ getAVGPRsAsVGPRsNum(Value[TOTAL_KINDS + VGPR],
+ Value[TOTAL_KINDS + AVGPR], AddressableArchVGPR);
+ unsigned AVGPRsAsAGPRs = getAVGPRsAsAGPRsNum(
+ Value[TOTAL_KINDS + VGPR], Value[TOTAL_KINDS + AGPR],
+ Value[TOTAL_KINDS + AVGPR], AddressableArchVGPR);
return std::max(Value[TOTAL_KINDS + VGPR] + AVGPRsAsVGPRs,
Value[TOTAL_KINDS + AGPR] + AVGPRsAsAGPRs);
>From 33a70f2a3faa35ebda87554ce22244cd5e6101d3 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 28 Jul 2025 19:14:44 -0700
Subject: [PATCH 05/10] Formatting
Change-Id: I14486056bef5e9a97842be68a7f5abe82ecc37fe
---
llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index 2c833abedbfb7..87f5b9f16868a 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -484,7 +484,7 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
}
void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
- bool TryMaximizeOccupancy) {
+ bool TryMaximizeOccupancy) {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
auto TgtOcc = MFI->getMinAllowedOccupancy();
@@ -573,8 +573,7 @@ void GCNIterativeScheduler::scheduleMinReg(bool force) {
///////////////////////////////////////////////////////////////////////////////
// ILP scheduler port
-void GCNIterativeScheduler::scheduleILP(
- bool TryMaximizeOccupancy) {
+void GCNIterativeScheduler::scheduleILP(bool TryMaximizeOccupancy) {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
auto TgtOcc = MFI->getMinAllowedOccupancy();
>From 572732449576479c7394638b1e21a92a39559d35 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 28 Jul 2025 19:34:17 -0700
Subject: [PATCH 06/10] Use getMaxNumVectorRegs instead of
getArchVGPRAllocationThreshold
Change-Id: I36e92840e35774cb419389ee6dadc26dd376ebaa
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 14 +++++++------
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 12 +++++------
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 21 ++++++++++++-------
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 9 --------
.../lib/Target/AMDGPU/SIFormMemoryClauses.cpp | 5 +++--
5 files changed, 30 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 9ee171e1b9999..966e810115195 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -99,7 +99,8 @@ void GCNRegPressure::inc(unsigned Reg,
bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
unsigned MaxOccupancy) const {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- unsigned ArchVGPRThreshold = ST.getArchVGPRAllocationThreshold(MF);
+ unsigned ArchVGPRThreshold =
+ ST.getRegisterInfo()->getMaxNumVectorRegs(MF).first;
unsigned DynamicVGPRBlockSize =
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
@@ -250,7 +251,8 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST,
unsigned DynamicVGPRBlockSize,
const MachineFunction *MF) {
- unsigned ArchVGPRThreshold = ST->getArchVGPRAllocationThreshold(*MF);
+ unsigned ArchVGPRThreshold =
+ ST->getRegisterInfo()->getMaxNumVectorRegs(*MF).first;
return Printable(
[&RP, ST, DynamicVGPRBlockSize, ArchVGPRThreshold, MF](raw_ostream &OS) {
OS << "VGPRs: " << RP.getArchVGPRNum(ArchVGPRThreshold) << ' '
@@ -903,10 +905,10 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
auto printRP = [&MF](const GCNRegPressure &RP) {
return Printable([&RP, &MF](raw_ostream &OS) {
OS << format(PFX " %-5d", RP.getSGPRNum())
- << format(
- " %-5d",
- RP.getVGPRNum(false, MF.getSubtarget<GCNSubtarget>()
- .getArchVGPRAllocationThreshold(MF)));
+ << format(" %-5d", RP.getVGPRNum(false, MF.getSubtarget<GCNSubtarget>()
+ .getRegisterInfo()
+ ->getMaxNumVectorRegs(MF)
+ .first));
});
};
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index c7449b43a35b4..d61e0348dabb4 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -89,7 +89,6 @@ struct GCNRegPressure {
getAVGPRsAsVGPRsNum(NumArchVGPRs, NumAVGPRs, AddressableArchVGPR);
unsigned AVGPRsAsAGPRs = getAVGPRsAsAGPRsNum(
NumArchVGPRs, NumAGPRs, NumAVGPRs, AddressableArchVGPR);
- NumAVGPRs > AVGPRsAsVGPRs ? NumAVGPRs - AVGPRsAsVGPRs : 0;
return alignTo(NumArchVGPRs + AVGPRsAsVGPRs,
AMDGPU::IsaInfo::getArchVGPRAllocGranule()) +
NumAGPRs + AVGPRsAsAGPRs;
@@ -129,11 +128,12 @@ struct GCNRegPressure {
unsigned DynamicVGPRBlockSize =
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
- return std::min(ST.getOccupancyWithNumSGPRs(getSGPRNum()),
- ST.getOccupancyWithNumVGPRs(
- getVGPRNum(ST.hasGFX90AInsts(),
- ST.getArchVGPRAllocationThreshold(MF)),
- DynamicVGPRBlockSize));
+ return std::min(
+ ST.getOccupancyWithNumSGPRs(getSGPRNum()),
+ ST.getOccupancyWithNumVGPRs(
+ getVGPRNum(ST.hasGFX90AInsts(),
+ ST.getRegisterInfo()->getMaxNumVectorRegs(MF).first),
+ DynamicVGPRBlockSize));
}
void inc(unsigned Reg,
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index ef3dcea2fcac5..52359135f8893 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -190,9 +190,10 @@ static void getRegisterPressures(
TempUpwardTracker.recede(*MI);
NewPressure = TempUpwardTracker.getPressure();
}
- unsigned ArchVGPRThreshold =
- DAG->MF.getSubtarget<GCNSubtarget>().getArchVGPRAllocationThreshold(
- DAG->MF);
+ unsigned ArchVGPRThreshold = DAG->MF.getSubtarget<GCNSubtarget>()
+ .getRegisterInfo()
+ ->getMaxNumVectorRegs(DAG->MF)
+ .first;
Pressure[AMDGPU::RegisterPressureSets::SReg_32] = NewPressure.getSGPRNum();
Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
NewPressure.getArchVGPRNum(ArchVGPRThreshold);
@@ -343,9 +344,11 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
? static_cast<GCNRPTracker *>(&UpwardTracker)
: static_cast<GCNRPTracker *>(&DownwardTracker);
SGPRPressure = T->getPressure().getSGPRNum();
- VGPRPressure = T->getPressure().getArchVGPRNum(
- DAG->MF.getSubtarget<GCNSubtarget>().getArchVGPRAllocationThreshold(
- DAG->MF));
+ VGPRPressure =
+ T->getPressure().getArchVGPRNum(DAG->MF.getSubtarget<GCNSubtarget>()
+ .getRegisterInfo()
+ ->getMaxNumVectorRegs(DAG->MF)
+ .first);
}
}
ReadyQueue &Q = Zone.Available;
@@ -1286,7 +1289,8 @@ void GCNSchedStage::checkScheduling() {
<< print(PressureAfter, &ST, 0, &MF));
LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
- unsigned ArchVGPRThreshold = ST.getArchVGPRAllocationThreshold(MF);
+ unsigned ArchVGPRThreshold =
+ ST.getRegisterInfo()->getMaxNumVectorRegs(MF).first;
if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
PressureAfter.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) <=
S.VGPRCriticalLimit) {
@@ -1478,7 +1482,8 @@ bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {
// For dynamic VGPR mode, we don't want to waste any VGPR blocks.
if (DAG.MFI.isDynamicVGPREnabled()) {
- unsigned ArchVGPRThreshold = ST.getArchVGPRAllocationThreshold(MF);
+ unsigned ArchVGPRThreshold =
+ ST.getRegisterInfo()->getMaxNumVectorRegs(MF).first;
unsigned BlocksBefore = AMDGPU::IsaInfo::getAllocatedNumVGPRBlocks(
&ST, DAG.MFI.getDynamicVGPRBlockSize(),
PressureBefore.getVGPRNum(false, ArchVGPRThreshold));
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index a259b90545ee9..10ded0e1d1c3a 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1629,15 +1629,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return AMDGPU::IsaInfo::getAddressableNumArchVGPRs(this);
}
- unsigned getArchVGPRAllocationThreshold(const MachineFunction &MF) const {
- if (hasGFX90AInsts() || !hasMAIInsts())
- return AMDGPU::IsaInfo::getAddressableNumArchVGPRs(this);
-
- const Function &F = MF.getFunction();
- std::pair<unsigned, unsigned> Waves = getWavesPerEU(F);
- return getMaxNumVGPRs(Waves.first, 0);
- }
-
/// \returns Addressable number of VGPRs supported by the subtarget.
unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const {
return AMDGPU::IsaInfo::getAddressableNumVGPRs(this, DynamicVGPRBlockSize);
diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index 03d5ac6dec025..cdc80ca9267d6 100644
--- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -209,8 +209,9 @@ bool SIFormMemoryClausesImpl::checkPressure(const MachineInstr &MI,
// tracking does not account for the alignment requirements for SGPRs, or the
// fragmentation of registers the allocator will need to satisfy.
if (Occupancy >= MFI->getMinAllowedOccupancy() &&
- MaxPressure.getVGPRNum(ST->hasGFX90AInsts(),
- ST->getArchVGPRAllocationThreshold(*MI.getMF())) <=
+ MaxPressure.getVGPRNum(
+ ST->hasGFX90AInsts(),
+ ST->getRegisterInfo()->getMaxNumVectorRegs(*MI.getMF()).first) <=
MaxVGPRs / 2 &&
MaxPressure.getSGPRNum() <= MaxSGPRs / 2) {
LastRecordedOccupancy = Occupancy;
>From 5a0696d975c90c5d634311b56d3224d351dcccdf Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 29 Jul 2025 08:24:20 -0700
Subject: [PATCH 07/10] Add test
Change-Id: I68bc69d5bafa3d8161c7b507721a9cde3e99d2b1
---
llvm/test/CodeGen/AMDGPU/avgpr-pressure.mir | 348 ++++++++++++++++++++
1 file changed, 348 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/avgpr-pressure.mir
diff --git a/llvm/test/CodeGen/AMDGPU/avgpr-pressure.mir b/llvm/test/CodeGen/AMDGPU/avgpr-pressure.mir
new file mode 100644
index 0000000000000..358942e73a7c6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/avgpr-pressure.mir
@@ -0,0 +1,348 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=machine-scheduler --debug-only=machine-scheduler -o - %s 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+--- |
+ define void @avgpr_rp_occ1() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @avgpr_rp_occ2() #1 {
+ entry:
+ unreachable
+ }
+
+ define void @avgpr_rp_occ3() #2 {
+ entry:
+ unreachable
+ }
+
+ define void @avgpr_rp_occ4() #3 {
+ entry:
+ unreachable
+ }
+
+ define void @avgpr_rp_occ5() #4 {
+ entry:
+ unreachable
+ }
+
+ define void @avgpr_rp_occ6() #5 {
+ entry:
+ unreachable
+ }
+
+ define void @avgpr_rp_occ7() #6 {
+ entry:
+ unreachable
+ }
+
+ define void @avgpr_rp_occ8() #7 {
+ entry:
+ unreachable
+ }
+
+ attributes #0 = {"amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="64,64"}
+ attributes #1 = {"amdgpu-waves-per-eu"="2,2" "amdgpu-flat-work-group-size"="64,64"}
+ attributes #2 = {"amdgpu-waves-per-eu"="3,3" "amdgpu-flat-work-group-size"="64,64"}
+ attributes #3 = {"amdgpu-waves-per-eu"="4,4" "amdgpu-flat-work-group-size"="64,64"}
+ attributes #4 = {"amdgpu-waves-per-eu"="5,5" "amdgpu-flat-work-group-size"="64,64"}
+ attributes #5 = {"amdgpu-waves-per-eu"="6,6" "amdgpu-flat-work-group-size"="64,64"}
+ attributes #6 = {"amdgpu-waves-per-eu"="7,7" "amdgpu-flat-work-group-size"="64,64"}
+ attributes #7 = {"amdgpu-waves-per-eu"="8,8" "amdgpu-flat-work-group-size"="64,64"}
+
+
+...
+
+# CHECK: avgpr_rp_occ1:%bb.0
+# CHECK: Pressure before scheduling:
+# CHECK-NEXT: Region live-ins:
+# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8
+# CHECK-NEXT: Region register pressure: VGPRs: 256 AGPRs: 192(O1), SGPRs: 0(O10), LVGPR WT: 256, LSGPR WT: 0 -> Occ: 1
+
+---
+name: avgpr_rp_occ1
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:av_1024 = IMPLICIT_DEF
+ %9:av_1024 = IMPLICIT_DEF
+ %10:av_1024 = IMPLICIT_DEF
+ %11:av_1024 = IMPLICIT_DEF
+ %12:av_1024 = IMPLICIT_DEF
+ %13:av_1024 = IMPLICIT_DEF
+ %14:av_1024 = IMPLICIT_DEF
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7
+
+ bb.1:
+ KILL %8, %9, %10, %11, %12, %13, %14
+ S_ENDPGM 0
+...
+
+# CHECK: avgpr_rp_occ2:%bb.0
+# CHECK: Pressure before scheduling:
+# CHECK-NEXT: Region live-ins:
+# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8
+# CHECK-NEXT: Region register pressure: VGPRs: 128 AGPRs: 64(O2), SGPRs: 0(O10), LVGPR WT: 128, LSGPR WT: 0 -> Occ: 2
+
+---
+name: avgpr_rp_occ2
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:av_1024 = IMPLICIT_DEF
+ %5:av_1024 = IMPLICIT_DEF
+ %6:av_1024 = IMPLICIT_DEF
+ SCHED_BARRIER 0
+ KILL %1, %2, %3
+
+ bb.1:
+ KILL %4, %5, %6
+ S_ENDPGM 0
+...
+
+# CHECK: avgpr_rp_occ3:%bb.0
+# CHECK: Pressure before scheduling:
+# CHECK-NEXT: Region live-ins:
+# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8
+# CHECK-NEXT: Region register pressure: VGPRs: 84 AGPRs: 44(O4), SGPRs: 0(O10), LVGPR WT: 84, LSGPR WT: 0 -> Occ: 4
+
+---
+name: avgpr_rp_occ3
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:av_1024 = IMPLICIT_DEF
+ %4:av_1024 = IMPLICIT_DEF
+ SCHED_BARRIER 0
+ KILL %1, %2
+
+ bb.1:
+ KILL %3, %4
+ S_ENDPGM 0
+...
+
+# CHECK: avgpr_rp_occ4:%bb.0
+# CHECK: Pressure before scheduling:
+# CHECK-NEXT: Region live-ins:
+# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8
+# CHECK-NEXT: Region register pressure: VGPRs: 64 AGPRs: 64(O4), SGPRs: 0(O10), LVGPR WT: 64, LSGPR WT: 0 -> Occ: 4
+
+---
+name: avgpr_rp_occ4
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:av_1024 = IMPLICIT_DEF
+ %4:av_1024 = IMPLICIT_DEF
+ SCHED_BARRIER 0
+ KILL %1, %2
+
+ bb.1:
+ KILL %3, %4
+ S_ENDPGM 0
+...
+
+# CHECK: avgpr_rp_occ5:%bb.0
+# CHECK: Pressure before scheduling:
+# CHECK-NEXT: Region live-ins:
+# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8
+# CHECK-NEXT: Region register pressure: VGPRs: 48 AGPRs: 80(O4), SGPRs: 0(O10), LVGPR WT: 64, LSGPR WT: 0 -> Occ: 4
+
+---
+name: avgpr_rp_occ5
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:av_1024 = IMPLICIT_DEF
+ %4:av_1024 = IMPLICIT_DEF
+ SCHED_BARRIER 0
+ KILL %1, %2
+
+ bb.1:
+ KILL %3, %4
+ S_ENDPGM 0
+...
+
+# CHECK: avgpr_rp_occ6:%bb.0
+# CHECK: Pressure before scheduling:
+# CHECK-NEXT: Region live-ins:
+# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8
+# CHECK-NEXT: Region register pressure: VGPRs: 40 AGPRs: 88(O4), SGPRs: 0(O10), LVGPR WT: 64, LSGPR WT: 0 -> Occ: 4
+
+---
+name: avgpr_rp_occ6
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:av_1024 = IMPLICIT_DEF
+ %4:av_1024 = IMPLICIT_DEF
+ SCHED_BARRIER 0
+ KILL %1, %2
+
+ bb.1:
+ KILL %3, %4
+ S_ENDPGM 0
+...
+
+# CHECK: avgpr_rp_occ7:%bb.0
+# CHECK: Pressure before scheduling:
+# CHECK-NEXT: Region live-ins:
+# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8
+# CHECK-NEXT: Region register pressure: VGPRs: 36 AGPRs: 92(O4), SGPRs: 0(O10), LVGPR WT: 64, LSGPR WT: 0 -> Occ: 4
+
+---
+name: avgpr_rp_occ7
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:av_1024 = IMPLICIT_DEF
+ %4:av_1024 = IMPLICIT_DEF
+ SCHED_BARRIER 0
+ KILL %1, %2
+
+ bb.1:
+ KILL %3, %4
+ S_ENDPGM 0
+...
+
+# CHECK: avgpr_rp_occ8:%bb.0
+# CHECK: Pressure before scheduling:
+# CHECK-NEXT: Region live-ins:
+# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8
+# CHECK-NEXT: Region register pressure: VGPRs: 32 AGPRs: 96(O4), SGPRs: 0(O10), LVGPR WT: 64, LSGPR WT: 0 -> Occ: 4
+
+---
+name: avgpr_rp_occ8
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:av_1024 = IMPLICIT_DEF
+ %4:av_1024 = IMPLICIT_DEF
+ SCHED_BARRIER 0
+ KILL %1, %2
+
+ bb.1:
+ KILL %3, %4
+ S_ENDPGM 0
+...
+
>From 38e255d764360972d6c61b3dddf329c48d3c46fd Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 29 Jul 2025 09:05:44 -0700
Subject: [PATCH 08/10] Rebase for getMaxNumVectorRegs move
Change-Id: I17c9239229b94c42c35b5683d77f8dfe3f70bafc
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 7 +++----
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 2 +-
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 10 ++++------
llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp | 2 +-
4 files changed, 9 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 966e810115195..bd03ccf5322e5 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -100,7 +100,7 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
unsigned MaxOccupancy) const {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
unsigned ArchVGPRThreshold =
- ST.getRegisterInfo()->getMaxNumVectorRegs(MF).first;
+ ST.getMaxNumVectorRegs(MF.getFunction()).first;
unsigned DynamicVGPRBlockSize =
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
@@ -252,7 +252,7 @@ Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST,
unsigned DynamicVGPRBlockSize,
const MachineFunction *MF) {
unsigned ArchVGPRThreshold =
- ST->getRegisterInfo()->getMaxNumVectorRegs(*MF).first;
+ ST->getMaxNumVectorRegs(MF->getFunction()).first;
return Printable(
[&RP, ST, DynamicVGPRBlockSize, ArchVGPRThreshold, MF](raw_ostream &OS) {
OS << "VGPRs: " << RP.getArchVGPRNum(ArchVGPRThreshold) << ' '
@@ -906,8 +906,7 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
return Printable([&RP, &MF](raw_ostream &OS) {
OS << format(PFX " %-5d", RP.getSGPRNum())
<< format(" %-5d", RP.getVGPRNum(false, MF.getSubtarget<GCNSubtarget>()
- .getRegisterInfo()
- ->getMaxNumVectorRegs(MF)
+ .getMaxNumVectorRegs(MF.getFunction())
.first));
});
};
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index d61e0348dabb4..0e03834380525 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -132,7 +132,7 @@ struct GCNRegPressure {
ST.getOccupancyWithNumSGPRs(getSGPRNum()),
ST.getOccupancyWithNumVGPRs(
getVGPRNum(ST.hasGFX90AInsts(),
- ST.getRegisterInfo()->getMaxNumVectorRegs(MF).first),
+ ST.getMaxNumVectorRegs(MF.getFunction()).first),
DynamicVGPRBlockSize));
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 52359135f8893..80e6c49c42fbc 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -191,8 +191,7 @@ static void getRegisterPressures(
NewPressure = TempUpwardTracker.getPressure();
}
unsigned ArchVGPRThreshold = DAG->MF.getSubtarget<GCNSubtarget>()
- .getRegisterInfo()
- ->getMaxNumVectorRegs(DAG->MF)
+ .getMaxNumVectorRegs(DAG->MF.getFunction())
.first;
Pressure[AMDGPU::RegisterPressureSets::SReg_32] = NewPressure.getSGPRNum();
Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
@@ -346,8 +345,7 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
SGPRPressure = T->getPressure().getSGPRNum();
VGPRPressure =
T->getPressure().getArchVGPRNum(DAG->MF.getSubtarget<GCNSubtarget>()
- .getRegisterInfo()
- ->getMaxNumVectorRegs(DAG->MF)
+ .getMaxNumVectorRegs(DAG->MF.getFunction())
.first);
}
}
@@ -1290,7 +1288,7 @@ void GCNSchedStage::checkScheduling() {
LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
unsigned ArchVGPRThreshold =
- ST.getRegisterInfo()->getMaxNumVectorRegs(MF).first;
+ ST.getMaxNumVectorRegs(MF.getFunction()).first;
if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
PressureAfter.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) <=
S.VGPRCriticalLimit) {
@@ -1483,7 +1481,7 @@ bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {
// For dynamic VGPR mode, we don't want to waste any VGPR blocks.
if (DAG.MFI.isDynamicVGPREnabled()) {
unsigned ArchVGPRThreshold =
- ST.getRegisterInfo()->getMaxNumVectorRegs(MF).first;
+ ST.getMaxNumVectorRegs(MF.getFunction()).first;
unsigned BlocksBefore = AMDGPU::IsaInfo::getAllocatedNumVGPRBlocks(
&ST, DAG.MFI.getDynamicVGPRBlockSize(),
PressureBefore.getVGPRNum(false, ArchVGPRThreshold));
diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index cdc80ca9267d6..e29ac72c7ba31 100644
--- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -211,7 +211,7 @@ bool SIFormMemoryClausesImpl::checkPressure(const MachineInstr &MI,
if (Occupancy >= MFI->getMinAllowedOccupancy() &&
MaxPressure.getVGPRNum(
ST->hasGFX90AInsts(),
- ST->getRegisterInfo()->getMaxNumVectorRegs(*MI.getMF()).first) <=
+ ST->getMaxNumVectorRegs(MI.getMF()->getFunction()).first) <=
MaxVGPRs / 2 &&
MaxPressure.getSGPRNum() <= MaxSGPRs / 2) {
LastRecordedOccupancy = Occupancy;
>From dcecf426736fad35ae746e9d5e8b29602fdf797a Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 29 Jul 2025 09:15:16 -0700
Subject: [PATCH 09/10] Formatting
Change-Id: I992cdc7ab89d244eaed82d4e671238878376c8d2
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 9 ++++-----
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 14 ++++++--------
2 files changed, 10 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index bd03ccf5322e5..4564163b137be 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -99,8 +99,7 @@ void GCNRegPressure::inc(unsigned Reg,
bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
unsigned MaxOccupancy) const {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- unsigned ArchVGPRThreshold =
- ST.getMaxNumVectorRegs(MF.getFunction()).first;
+ unsigned ArchVGPRThreshold = ST.getMaxNumVectorRegs(MF.getFunction()).first;
unsigned DynamicVGPRBlockSize =
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
@@ -251,8 +250,7 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST,
unsigned DynamicVGPRBlockSize,
const MachineFunction *MF) {
- unsigned ArchVGPRThreshold =
- ST->getMaxNumVectorRegs(MF->getFunction()).first;
+ unsigned ArchVGPRThreshold = ST->getMaxNumVectorRegs(MF->getFunction()).first;
return Printable(
[&RP, ST, DynamicVGPRBlockSize, ArchVGPRThreshold, MF](raw_ostream &OS) {
OS << "VGPRs: " << RP.getArchVGPRNum(ArchVGPRThreshold) << ' '
@@ -906,7 +904,8 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
return Printable([&RP, &MF](raw_ostream &OS) {
OS << format(PFX " %-5d", RP.getSGPRNum())
<< format(" %-5d", RP.getVGPRNum(false, MF.getSubtarget<GCNSubtarget>()
- .getMaxNumVectorRegs(MF.getFunction())
+ .getMaxNumVectorRegs(
+ MF.getFunction())
.first));
});
};
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 80e6c49c42fbc..3cf9a7c0f972e 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -343,10 +343,10 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
? static_cast<GCNRPTracker *>(&UpwardTracker)
: static_cast<GCNRPTracker *>(&DownwardTracker);
SGPRPressure = T->getPressure().getSGPRNum();
- VGPRPressure =
- T->getPressure().getArchVGPRNum(DAG->MF.getSubtarget<GCNSubtarget>()
- .getMaxNumVectorRegs(DAG->MF.getFunction())
- .first);
+ VGPRPressure = T->getPressure().getArchVGPRNum(
+ DAG->MF.getSubtarget<GCNSubtarget>()
+ .getMaxNumVectorRegs(DAG->MF.getFunction())
+ .first);
}
}
ReadyQueue &Q = Zone.Available;
@@ -1287,8 +1287,7 @@ void GCNSchedStage::checkScheduling() {
<< print(PressureAfter, &ST, 0, &MF));
LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
- unsigned ArchVGPRThreshold =
- ST.getMaxNumVectorRegs(MF.getFunction()).first;
+ unsigned ArchVGPRThreshold = ST.getMaxNumVectorRegs(MF.getFunction()).first;
if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
PressureAfter.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) <=
S.VGPRCriticalLimit) {
@@ -1480,8 +1479,7 @@ bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {
// For dynamic VGPR mode, we don't want to waste any VGPR blocks.
if (DAG.MFI.isDynamicVGPREnabled()) {
- unsigned ArchVGPRThreshold =
- ST.getMaxNumVectorRegs(MF.getFunction()).first;
+ unsigned ArchVGPRThreshold = ST.getMaxNumVectorRegs(MF.getFunction()).first;
unsigned BlocksBefore = AMDGPU::IsaInfo::getAllocatedNumVGPRBlocks(
&ST, DAG.MFI.getDynamicVGPRBlockSize(),
PressureBefore.getVGPRNum(false, ArchVGPRThreshold));
>From d59fba1f01a00df11a46d0ae236901dfb30ea899 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 29 Jul 2025 11:48:29 -0700
Subject: [PATCH 10/10] Fix test + handling of ArchVGPR pressure
Change-Id: I15cd9b4e9e38d7000a403bed56918819ae858658
---
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 14 +-
llvm/test/CodeGen/AMDGPU/avgpr-pressure.mir | 161 ++++++++++++++++++--
2 files changed, 155 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 0e03834380525..8b80cc42c9bb0 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -97,15 +97,17 @@ struct GCNRegPressure {
/// \returns the ArchVGPR32 pressure, plus the AVGPRS which we assume will be
/// allocated as VGPR
unsigned getArchVGPRNum(unsigned AddressableArchVGPR) const {
- return std::min(Value[VGPR] + Value[AVGPR], AddressableArchVGPR);
+ unsigned AVGPRsAsVGPRs =
+ getAVGPRsAsVGPRsNum(Value[VGPR], Value[AVGPR], AddressableArchVGPR);
+
+ return Value[VGPR] + AVGPRsAsVGPRs;
}
/// \returns the AccVGPR32 pressure
unsigned getAGPRNum(unsigned AddressableArchVGPR) const {
- unsigned VGPRsForAGPRs =
- Value[VGPR] + Value[AVGPR] > AddressableArchVGPR
- ? (Value[VGPR] + Value[AVGPR] - AddressableArchVGPR)
- : 0;
- return Value[AGPR] + VGPRsForAGPRs;
+ unsigned AVGPRsAsAGPRs = getAVGPRsAsAGPRsNum(
+ Value[VGPR], Value[AGPR], Value[AVGPR], AddressableArchVGPR);
+
+ return Value[AGPR] + AVGPRsAsAGPRs;
}
/// \returns the AVGPR32 pressure
unsigned getAVGPRNum() const { return Value[AVGPR]; }
diff --git a/llvm/test/CodeGen/AMDGPU/avgpr-pressure.mir b/llvm/test/CodeGen/AMDGPU/avgpr-pressure.mir
index 358942e73a7c6..a5183ce0d2661 100644
--- a/llvm/test/CodeGen/AMDGPU/avgpr-pressure.mir
+++ b/llvm/test/CodeGen/AMDGPU/avgpr-pressure.mir
@@ -42,6 +42,22 @@
unreachable
}
+
+ define void @vgpr_rp_occ1() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @vgpr_rp_occ2() #1 {
+ entry:
+ unreachable
+ }
+
+ define void @vgpr_rp_occ3() #2 {
+ entry:
+ unreachable
+ }
+
attributes #0 = {"amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="64,64"}
attributes #1 = {"amdgpu-waves-per-eu"="2,2" "amdgpu-flat-work-group-size"="64,64"}
attributes #2 = {"amdgpu-waves-per-eu"="3,3" "amdgpu-flat-work-group-size"="64,64"}
@@ -194,8 +210,8 @@ machineFunctionInfo:
body: |
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
- %1:vreg_1024 = IMPLICIT_DEF
- %2:vreg_1024 = IMPLICIT_DEF
+ %1:av_1024 = IMPLICIT_DEF
+ %2:av_1024 = IMPLICIT_DEF
%3:av_1024 = IMPLICIT_DEF
%4:av_1024 = IMPLICIT_DEF
SCHED_BARRIER 0
@@ -210,7 +226,7 @@ body: |
# CHECK: Pressure before scheduling:
# CHECK-NEXT: Region live-ins:
# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8
-# CHECK-NEXT: Region register pressure: VGPRs: 48 AGPRs: 80(O4), SGPRs: 0(O10), LVGPR WT: 64, LSGPR WT: 0 -> Occ: 4
+# CHECK-NEXT: Region register pressure: VGPRs: 48 AGPRs: 80(O4), SGPRs: 0(O10), LVGPR WT: 80, LSGPR WT: 0 -> Occ: 4
---
name: avgpr_rp_occ5
@@ -229,8 +245,8 @@ machineFunctionInfo:
body: |
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
- %1:vreg_1024 = IMPLICIT_DEF
- %2:vreg_1024 = IMPLICIT_DEF
+ %1:av_1024 = IMPLICIT_DEF
+ %2:av_1024 = IMPLICIT_DEF
%3:av_1024 = IMPLICIT_DEF
%4:av_1024 = IMPLICIT_DEF
SCHED_BARRIER 0
@@ -245,7 +261,7 @@ body: |
# CHECK: Pressure before scheduling:
# CHECK-NEXT: Region live-ins:
# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8
-# CHECK-NEXT: Region register pressure: VGPRs: 40 AGPRs: 88(O4), SGPRs: 0(O10), LVGPR WT: 64, LSGPR WT: 0 -> Occ: 4
+# CHECK-NEXT: Region register pressure: VGPRs: 40 AGPRs: 88(O4), SGPRs: 0(O10), LVGPR WT: 88, LSGPR WT: 0 -> Occ: 4
---
name: avgpr_rp_occ6
@@ -264,8 +280,8 @@ machineFunctionInfo:
body: |
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
- %1:vreg_1024 = IMPLICIT_DEF
- %2:vreg_1024 = IMPLICIT_DEF
+ %1:av_1024 = IMPLICIT_DEF
+ %2:av_1024 = IMPLICIT_DEF
%3:av_1024 = IMPLICIT_DEF
%4:av_1024 = IMPLICIT_DEF
SCHED_BARRIER 0
@@ -280,7 +296,7 @@ body: |
# CHECK: Pressure before scheduling:
# CHECK-NEXT: Region live-ins:
# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8
-# CHECK-NEXT: Region register pressure: VGPRs: 36 AGPRs: 92(O4), SGPRs: 0(O10), LVGPR WT: 64, LSGPR WT: 0 -> Occ: 4
+# CHECK-NEXT: Region register pressure: VGPRs: 36 AGPRs: 92(O4), SGPRs: 0(O10), LVGPR WT: 92, LSGPR WT: 0 -> Occ: 4
---
name: avgpr_rp_occ7
@@ -299,8 +315,8 @@ machineFunctionInfo:
body: |
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
- %1:vreg_1024 = IMPLICIT_DEF
- %2:vreg_1024 = IMPLICIT_DEF
+ %1:av_1024 = IMPLICIT_DEF
+ %2:av_1024 = IMPLICIT_DEF
%3:av_1024 = IMPLICIT_DEF
%4:av_1024 = IMPLICIT_DEF
SCHED_BARRIER 0
@@ -315,7 +331,7 @@ body: |
# CHECK: Pressure before scheduling:
# CHECK-NEXT: Region live-ins:
# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8
-# CHECK-NEXT: Region register pressure: VGPRs: 32 AGPRs: 96(O4), SGPRs: 0(O10), LVGPR WT: 64, LSGPR WT: 0 -> Occ: 4
+# CHECK-NEXT: Region register pressure: VGPRs: 32 AGPRs: 96(O4), SGPRs: 0(O10), LVGPR WT: 96, LSGPR WT: 0 -> Occ: 4
---
name: avgpr_rp_occ8
@@ -334,8 +350,8 @@ machineFunctionInfo:
body: |
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5
- %1:vreg_1024 = IMPLICIT_DEF
- %2:vreg_1024 = IMPLICIT_DEF
+ %1:av_1024 = IMPLICIT_DEF
+ %2:av_1024 = IMPLICIT_DEF
%3:av_1024 = IMPLICIT_DEF
%4:av_1024 = IMPLICIT_DEF
SCHED_BARRIER 0
@@ -346,3 +362,120 @@ body: |
S_ENDPGM 0
...
+# CHECK: vgpr_rp_occ1:%bb.0
+# CHECK: Pressure before scheduling:
+# CHECK-NEXT: Region live-ins:
+# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8
+# CHECK-NEXT: Region register pressure: VGPRs: 448 AGPRs: 0(O1), SGPRs: 0(O10), LVGPR WT: 448, LSGPR WT: 0 -> Occ: 1
+
+---
+name: vgpr_rp_occ1
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_1024 = IMPLICIT_DEF
+ %9:vreg_1024 = IMPLICIT_DEF
+ %10:vreg_1024 = IMPLICIT_DEF
+ %11:vreg_1024 = IMPLICIT_DEF
+ %12:vreg_1024 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ %14:vreg_1024 = IMPLICIT_DEF
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7
+
+ bb.1:
+ KILL %8, %9, %10, %11, %12, %13, %14
+ S_ENDPGM 0
+...
+
+# CHECK: vgpr_rp_occ2:%bb.0
+# CHECK: Pressure before scheduling:
+# CHECK-NEXT: Region live-ins:
+# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8
+# CHECK-NEXT: Region register pressure: VGPRs: 192 AGPRs: 0(O2), SGPRs: 0(O10), LVGPR WT: 192, LSGPR WT: 0 -> Occ: 2
+
+---
+name: vgpr_rp_occ2
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ SCHED_BARRIER 0
+ KILL %1, %2, %3
+
+ bb.1:
+ KILL %4, %5, %6
+ S_ENDPGM 0
+...
+
+# CHECK: vgpr_rp_occ3:%bb.0
+# CHECK: Pressure before scheduling:
+# CHECK-NEXT: Region live-ins:
+# CHECK-NEXT: Region live-in pressure: VGPRs: 0 AGPRs: 0(O8), SGPRs: 0(O10), LVGPR WT: 0, LSGPR WT: 0 -> Occ: 8
+# CHECK-NEXT: Region register pressure: VGPRs: 128 AGPRs: 0(O4), SGPRs: 0(O10), LVGPR WT: 128, LSGPR WT: 0 -> Occ: 4
+
+
+---
+name: vgpr_rp_occ3
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ SCHED_BARRIER 0
+ KILL %1, %2
+
+ bb.1:
+ KILL %3, %4
+ S_ENDPGM 0
+...
More information about the llvm-commits
mailing list