[llvm] [AMDGPU] Replace dynamic VGPR feature with attribute (PR #133444)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 28 07:01:53 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Diana Picus (rovka)
<details>
<summary>Changes</summary>
Use a function attribute (amdgpu-dynamic-vgpr) instead of a subtarget feature, as requested in #<!-- -->130030.
---
Patch is 99.47 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/133444.diff
30 Files Affected:
- (modified) llvm/docs/AMDGPUUsage.rst (+5-5)
- (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (-6)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (+9-9)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp (+5-1)
- (modified) llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp (+15-12)
- (modified) llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/GCNRegPressure.cpp (+8-6)
- (modified) llvm/lib/Target/AMDGPU/GCNRegPressure.h (+6-6)
- (modified) llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp (+20-18)
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.cpp (+14-6)
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+9-12)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp (+2-1)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h (+1)
- (modified) llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/SIFrameLowering.cpp (+3-3)
- (modified) llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (+4-4)
- (modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (+4)
- (modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h (+8)
- (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+17-17)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/machine-function-info-cwsr.ll (+2-2)
- (added) llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable-dvgpr.ll (+305)
- (modified) llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll (-2)
- (added) llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-dvgpr.ll (+205)
- (modified) llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll (+4-9)
- (added) llvm/test/CodeGen/AMDGPU/release-vgprs-gfx12-dvgpr.mir (+340)
- (modified) llvm/test/CodeGen/AMDGPU/release-vgprs-gfx12.mir (+19-38)
- (modified) llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp (+19-19)
``````````diff
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index ab507e3714ebb..daedc324c5d0a 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -759,11 +759,6 @@ For example:
performant than code generated for XNACK replay
disabled.
- dynamic-vgpr TODO Represents the "Dynamic VGPR" hardware mode, introduced in GFX12.
- Waves launched in this mode may allocate or deallocate the VGPRs
- using dedicated instructions, but may not send the DEALLOC_VGPRS
- message.
-
=============== ============================ ==================================================
.. _amdgpu-target-id:
@@ -1743,6 +1738,11 @@ The AMDGPU backend supports the following LLVM IR attributes.
"amdgpu-promote-alloca-to-vector-vgpr-ratio" Ratio of VGPRs to budget for promoting alloca to vectors.
+ "amdgpu-dynamic-vgpr" Represents the "Dynamic VGPR" hardware mode, introduced in GFX12.
+ Waves launched in this mode may allocate or deallocate the VGPRs
+ using dedicated instructions, but may not send the DEALLOC_VGPRS
+ message.
+
================================================ ==========================================================
Calling Conventions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 84619dd656f35..8bd9fcd8e04e7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1257,12 +1257,6 @@ def FeatureXF32Insts : SubtargetFeature<"xf32-insts",
"v_mfma_f32_16x16x8_xf32 and v_mfma_f32_32x32x4_xf32"
>;
-def FeatureDynamicVGPR : SubtargetFeature <"dynamic-vgpr",
- "DynamicVGPR",
- "true",
- "Enable dynamic VGPR mode"
->;
-
def FeatureDynamicVGPRBlockSize32 : SubtargetFeature<"dynamic-vgpr-block-size-32",
"DynamicVGPRBlockSize32",
"true",
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 800e2b9c0e657..8e3f5aef8bd05 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -452,13 +452,13 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
uint64_t TotalNumVgpr =
getTotalNumVGPRs(STM.hasGFX90AInsts(), NumAgpr, NumVgpr);
uint64_t NumVGPRsForWavesPerEU = std::max(
- {TotalNumVgpr, (uint64_t)1, (uint64_t)STM.getMinNumVGPRs(MaxWaves)});
+ {TotalNumVgpr, (uint64_t)1, (uint64_t)STM.getMinNumVGPRs(MaxWaves, MFI.isDynamicVGPREnabled())});
uint64_t NumSGPRsForWavesPerEU = std::max(
{NumSgpr, (uint64_t)1, (uint64_t)STM.getMinNumSGPRs(MaxWaves)});
const MCExpr *OccupancyExpr = AMDGPUMCExpr::createOccupancy(
STM.getOccupancyWithWorkGroupSizes(*MF).second,
MCConstantExpr::create(NumSGPRsForWavesPerEU, OutContext),
- MCConstantExpr::create(NumVGPRsForWavesPerEU, OutContext), STM,
+ MCConstantExpr::create(NumVGPRsForWavesPerEU, OutContext), MFI.isDynamicVGPREnabled(), STM,
OutContext);
uint64_t Occupancy;
@@ -1081,7 +1081,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
Ctx);
ProgInfo.NumVGPRsForWavesPerEU =
AMDGPUMCExpr::createMax({ProgInfo.NumVGPR, CreateExpr(1ul),
- CreateExpr(STM.getMinNumVGPRs(MaxWaves))},
+ CreateExpr(STM.getMinNumVGPRs(MaxWaves, MFI->isDynamicVGPREnabled()))},
Ctx);
if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||
@@ -1255,7 +1255,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.Occupancy = AMDGPUMCExpr::createOccupancy(
STM.computeOccupancy(F, ProgInfo.LDSSize).second,
- ProgInfo.NumSGPRsForWavesPerEU, ProgInfo.NumVGPRsForWavesPerEU, STM, Ctx);
+ ProgInfo.NumSGPRsForWavesPerEU, ProgInfo.NumVGPRsForWavesPerEU, MFI->isDynamicVGPREnabled(), STM, Ctx);
const auto [MinWEU, MaxWEU] =
AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", {0, 0}, true);
@@ -1404,7 +1404,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
// Helper function to add common PAL Metadata 3.0+
static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
const SIProgramInfo &CurrentProgramInfo,
- CallingConv::ID CC, const GCNSubtarget &ST) {
+ CallingConv::ID CC, const GCNSubtarget &ST, bool IsDynamicVGPR) {
if (ST.hasIEEEMode())
MD->setHwStage(CC, ".ieee_mode", (bool)CurrentProgramInfo.IEEEMode);
@@ -1416,7 +1416,7 @@ static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
(bool)CurrentProgramInfo.TrapHandlerEnable);
MD->setHwStage(CC, ".excp_en", CurrentProgramInfo.EXCPEnable);
- if (ST.isDynamicVGPREnabled())
+ if (IsDynamicVGPR)
MD->setComputeRegisters(".dynamic_vgpr_en", true);
}
@@ -1443,7 +1443,7 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
// For targets that support dynamic VGPRs, set the number of saved dynamic
// VGPRs (if any) in the PAL metadata.
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
- if (STM.isDynamicVGPREnabled() &&
+ if (MFI->isDynamicVGPREnabled() &&
MFI->getScratchReservedForDynamicVGPRs() > 0)
MD->setHwStage(CC, ".dynamic_vgpr_saved_count",
MFI->getScratchReservedForDynamicVGPRs() / 4);
@@ -1469,7 +1469,7 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
MD->setHwStage(CC, ".debug_mode", (bool)CurrentProgramInfo.DebugMode);
MD->setHwStage(CC, ".scratch_en", msgpack::Type::Boolean,
CurrentProgramInfo.ScratchEnable);
- EmitPALMetadataCommon(MD, CurrentProgramInfo, CC, STM);
+ EmitPALMetadataCommon(MD, CurrentProgramInfo, CC, STM, MFI->isDynamicVGPREnabled());
}
// ScratchSize is in bytes, 16 aligned.
@@ -1540,7 +1540,7 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
MD->setRsrc2(CallingConv::AMDGPU_CS,
CurrentProgramInfo.getComputePGMRSrc2(Ctx), Ctx);
} else {
- EmitPALMetadataCommon(MD, CurrentProgramInfo, CallingConv::AMDGPU_CS, ST);
+ EmitPALMetadataCommon(MD, CurrentProgramInfo, CallingConv::AMDGPU_CS, ST, MF.getInfo<SIMachineFunctionInfo>()->isDynamicVGPREnabled());
}
// Set optional info
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 98a70c0dbb912..8f9a7aefb104e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -199,8 +199,12 @@ unsigned getMaxVGPRs(const TargetMachine &TM, const Function &F) {
if (!TM.getTargetTriple().isAMDGCN())
return 128;
+ bool IsDynamicVGPR = false;
+ if (F.hasFnAttribute("amdgpu-dynamic-vgpr"))
+ IsDynamicVGPR = F.getFnAttribute("amdgpu-dynamic-vgpr").getValueAsBool();
+
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
- unsigned MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
+ unsigned MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first, IsDynamicVGPR);
// A non-entry function has only 32 caller preserved registers.
// Do not promote alloca which will force spilling unless we know the function
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index da065e8d8cb6b..ded5c270e3cd9 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -426,13 +426,14 @@ void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
// TODO: assert Regions are sorted descending by pressure
const auto &ST = MF.getSubtarget<GCNSubtarget>();
- const auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
+ const bool IsDynamicVGPR = MF.getInfo<SIMachineFunctionInfo>()->isDynamicVGPREnabled();
+ const auto Occ = Regions.front()->MaxPressure.getOccupancy(ST, IsDynamicVGPR);
LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc
<< ", current = " << Occ << '\n');
auto NewOcc = TargetOcc;
for (auto *R : Regions) {
- if (R->MaxPressure.getOccupancy(ST) >= NewOcc)
+ if (R->MaxPressure.getOccupancy(ST, IsDynamicVGPR) >= NewOcc)
break;
LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3);
@@ -444,7 +445,7 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n";
printSchedRP(dbgs(), R->MaxPressure, MaxRP));
- NewOcc = std::min(NewOcc, MaxRP.getOccupancy(ST));
+ NewOcc = std::min(NewOcc, MaxRP.getOccupancy(ST, IsDynamicVGPR));
if (NewOcc <= Occ)
break;
@@ -465,9 +466,10 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
const auto &ST = MF.getSubtarget<GCNSubtarget>();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
auto TgtOcc = MFI->getMinAllowedOccupancy();
+ bool IsDynamicVGPR = MFI->isDynamicVGPREnabled();
sortRegionsByPressure(TgtOcc);
- auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
+ auto Occ = Regions.front()->MaxPressure.getOccupancy(ST, IsDynamicVGPR);
if (TryMaximizeOccupancy && Occ < TgtOcc)
Occ = tryMaximizeOccupancy(TgtOcc);
@@ -494,19 +496,19 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
const auto RP = getRegionPressure(*R);
LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
- if (RP.getOccupancy(ST) < TgtOcc) {
+ if (RP.getOccupancy(ST, IsDynamicVGPR) < TgtOcc) {
LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
if (R->BestSchedule.get() &&
- R->BestSchedule->MaxPressure.getOccupancy(ST) >= TgtOcc) {
+ R->BestSchedule->MaxPressure.getOccupancy(ST, IsDynamicVGPR) >= TgtOcc) {
LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
scheduleBest(*R);
} else {
LLVM_DEBUG(dbgs() << ", restoring\n");
Ovr.restoreOrder();
- assert(R->MaxPressure.getOccupancy(ST) >= TgtOcc);
+ assert(R->MaxPressure.getOccupancy(ST, IsDynamicVGPR) >= TgtOcc);
}
}
- FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(ST));
+ FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(ST, IsDynamicVGPR));
}
}
MFI->limitOccupancy(FinalOccupancy);
@@ -552,9 +554,10 @@ void GCNIterativeScheduler::scheduleILP(
const auto &ST = MF.getSubtarget<GCNSubtarget>();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
auto TgtOcc = MFI->getMinAllowedOccupancy();
+ bool IsDynamicVGPR = MFI->isDynamicVGPREnabled();
sortRegionsByPressure(TgtOcc);
- auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
+ auto Occ = Regions.front()->MaxPressure.getOccupancy(ST, IsDynamicVGPR);
if (TryMaximizeOccupancy && Occ < TgtOcc)
Occ = tryMaximizeOccupancy(TgtOcc);
@@ -572,17 +575,17 @@ void GCNIterativeScheduler::scheduleILP(
const auto RP = getSchedulePressure(*R, ILPSchedule);
LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
- if (RP.getOccupancy(ST) < TgtOcc) {
+ if (RP.getOccupancy(ST, IsDynamicVGPR) < TgtOcc) {
LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
if (R->BestSchedule.get() &&
- R->BestSchedule->MaxPressure.getOccupancy(ST) >= TgtOcc) {
+ R->BestSchedule->MaxPressure.getOccupancy(ST, IsDynamicVGPR) >= TgtOcc) {
LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
scheduleBest(*R);
}
} else {
scheduleRegion(*R, ILPSchedule, RP);
LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
- FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(ST));
+ FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(ST, IsDynamicVGPR));
}
}
MFI->limitOccupancy(FinalOccupancy);
diff --git a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
index 13eb0ca539a4c..f3fd6c8564693 100644
--- a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
@@ -251,7 +251,7 @@ bool GCNNSAReassignImpl::run(MachineFunction &MF) {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
MaxNumVGPRs = ST->getMaxNumVGPRs(MF);
- MaxNumVGPRs = std::min(ST->getMaxNumVGPRs(MFI->getOccupancy()), MaxNumVGPRs);
+ MaxNumVGPRs = std::min(ST->getMaxNumVGPRs(MFI->getOccupancy(), MFI->isDynamicVGPREnabled()), MaxNumVGPRs);
CSRegs = MRI->getCalleeSavedRegs();
using Candidate = std::pair<const MachineInstr*, bool>;
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index f74d12cfab0c0..3088dd2f65a8c 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -13,6 +13,7 @@
#include "GCNRegPressure.h"
#include "AMDGPU.h"
+#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
using namespace llvm;
@@ -92,17 +93,18 @@ void GCNRegPressure::inc(unsigned Reg,
bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
unsigned MaxOccupancy) const {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ bool IsDynamicVGPR = MF.getInfo<SIMachineFunctionInfo>()->isDynamicVGPREnabled();
const auto SGPROcc = std::min(MaxOccupancy,
ST.getOccupancyWithNumSGPRs(getSGPRNum()));
const auto VGPROcc =
std::min(MaxOccupancy,
- ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts())));
+ ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts()), IsDynamicVGPR));
const auto OtherSGPROcc = std::min(MaxOccupancy,
ST.getOccupancyWithNumSGPRs(O.getSGPRNum()));
const auto OtherVGPROcc =
std::min(MaxOccupancy,
- ST.getOccupancyWithNumVGPRs(O.getVGPRNum(ST.hasGFX90AInsts())));
+ ST.getOccupancyWithNumVGPRs(O.getVGPRNum(ST.hasGFX90AInsts()), IsDynamicVGPR));
const auto Occ = std::min(SGPROcc, VGPROcc);
const auto OtherOcc = std::min(OtherSGPROcc, OtherVGPROcc);
@@ -224,13 +226,13 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
O.getVGPRNum(ST.hasGFX90AInsts()));
}
-Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST) {
- return Printable([&RP, ST](raw_ostream &OS) {
+Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST, bool IsDynamicVGPR) {
+ return Printable([&RP, ST, IsDynamicVGPR](raw_ostream &OS) {
OS << "VGPRs: " << RP.Value[GCNRegPressure::VGPR32] << ' '
<< "AGPRs: " << RP.getAGPRNum();
if (ST)
OS << "(O"
- << ST->getOccupancyWithNumVGPRs(RP.getVGPRNum(ST->hasGFX90AInsts()))
+ << ST->getOccupancyWithNumVGPRs(RP.getVGPRNum(ST->hasGFX90AInsts()), IsDynamicVGPR)
<< ')';
OS << ", SGPRs: " << RP.getSGPRNum();
if (ST)
@@ -238,7 +240,7 @@ Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST) {
OS << ", LVGPR WT: " << RP.getVGPRTuplesWeight()
<< ", LSGPR WT: " << RP.getSGPRTuplesWeight();
if (ST)
- OS << " -> Occ: " << RP.getOccupancy(*ST);
+ OS << " -> Occ: " << RP.getOccupancy(*ST, IsDynamicVGPR);
OS << '\n';
});
}
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 7554b9f578fcb..15e86c733ff60 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -67,9 +67,9 @@ struct GCNRegPressure {
Value[AGPR_TUPLE]); }
unsigned getSGPRTuplesWeight() const { return Value[SGPR_TUPLE]; }
- unsigned getOccupancy(const GCNSubtarget &ST) const {
+ unsigned getOccupancy(const GCNSubtarget &ST, bool IsDynamicVGPR) const {
return std::min(ST.getOccupancyWithNumSGPRs(getSGPRNum()),
- ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts())));
+ ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts()), IsDynamicVGPR));
}
void inc(unsigned Reg,
@@ -77,8 +77,8 @@ struct GCNRegPressure {
LaneBitmask NewMask,
const MachineRegisterInfo &MRI);
- bool higherOccupancy(const GCNSubtarget &ST, const GCNRegPressure& O) const {
- return getOccupancy(ST) > O.getOccupancy(ST);
+ bool higherOccupancy(const GCNSubtarget &ST, const GCNRegPressure& O, bool IsDynamicVGPR) const {
+ return getOccupancy(ST, IsDynamicVGPR) > O.getOccupancy(ST, IsDynamicVGPR);
}
/// Compares \p this GCNRegpressure to \p O, returning true if \p this is
@@ -126,7 +126,7 @@ struct GCNRegPressure {
friend GCNRegPressure max(const GCNRegPressure &P1,
const GCNRegPressure &P2);
- friend Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST);
+ friend Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST, bool IsDynamicVGPR);
};
inline GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2) {
@@ -395,7 +395,7 @@ GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI,
bool isEqual(const GCNRPTracker::LiveRegSet &S1,
const GCNRPTracker::LiveRegSet &S2);
-Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST = nullptr);
+Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST = nullptr, bool IsDynamicVGPR = false);
Printable print(const GCNRPTracker::LiveRegSet &LiveRegs,
const MachineRegisterInfo &MRI);
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index ea9bc88bbe86b..f2900beb5234c 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -95,16 +95,16 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
if (!KnownExcessRP) {
VGPRCriticalLimit =
- std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit);
+ std::min(ST.getMaxNumVGPRs(TargetOccupancy, MFI.isDynamicVGPREnabled()), VGPRExcessLimit);
} else {
// This is similar to ST.getMaxNumVGPRs(TargetOccupancy) result except
// returns a reasonably small number for targets with lots of VGPRs, such
// as GFX10 and GFX11.
LLVM_DEBUG(dbgs() << "Region is known to spill, use alternative "
"VGPRCriticalLimit calculation method.\n");
-
- unsigned Granule = AMDGPU::IsaInfo::getVGPRAllocGranule(&ST);
- unsigned Addressable = AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST);
+ bool IsDynamicVGPR = MFI.isDynamicVGPREnabled();
+ unsigned Granule = AMDGPU::IsaInfo::getVGPRAllocGranule(&ST, IsDynamicVGPR);
+ unsigned Addressable = AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST, IsDynamicVGPR);
unsigned VGPRBudget = alignDown(Addressable / TargetOccupancy, Granule);
VGPRBudget = std::max(VGPRBudget, Granule);
VGPRCriticalLimit = std::min(VGPRBudget, VGPRExcessLimit);
@@ -1126,7 +1126,7 @@ void UnclusteredHighRPStage::finalizeGCNSchedStage() {
if (DAG.MinOccupancy > InitialOccupancy) {
for (unsigned IDX = 0; IDX < DAG.Pressure.size(); ++IDX)
DAG.RegionsWithMinOcc[IDX] =
- DAG.Pressure[IDX].getOccupancy(DAG.ST) == DAG.MinOccupancy;
+ DAG.Pressure[IDX].getOccupancy(DAG.ST, DAG.MFI.isDynamicVGPREnabled()) == DAG.MinOccupancy;
LLVM_DEBUG(dbgs() << StageID
<< " stage successfully increased occupancy to "
@@ -1266,11 +1266,13 @@ void GCNSchedStage::checkScheduling() {
LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));
LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
+ bool IsDynamicVGPR = DAG.MFI.i...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/133444
More information about the llvm-commits
mailing list