[llvm] f014303 - [AMDGPU] [NFC]: Organize the code around reserving registers.
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 22 18:48:37 PDT 2022
Author: hsmahesha
Date: 2022-03-23T07:15:59+05:30
New Revision: f014303e2ce410aa20517db612d91488e7a06d22
URL: https://github.com/llvm/llvm-project/commit/f014303e2ce410aa20517db612d91488e7a06d22
DIFF: https://github.com/llvm/llvm-project/commit/f014303e2ce410aa20517db612d91488e7a06d22.diff
LOG: [AMDGPU] [NFC]: Organize the code around reserving registers.
First, add code to reserve all required special purpose registers,
followed by code to reserve SGPRs, followed by code to reserve
VGPRs/AGPRs.
This patch is prepared as a pre-requisite to fix an issue related to
GFX90A hardware.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D122219
Added:
Modified:
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 3d7d56ec71d28..deeaee928f7f3 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -546,6 +546,10 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(AMDGPU::MODE);
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ // Reserve special purpose registers.
+ //
// EXEC_LO and EXEC_HI could be allocated and used as regular register, but
// this seems likely to result in bugs, so I'm marking them as reserved.
reserveRegisterTuples(Reserved, AMDGPU::EXEC);
@@ -596,6 +600,8 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(AMDGPU::VCC_HI);
}
+ // Reserve SGPRs.
+ //
unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
@@ -603,43 +609,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, Reg);
}
- const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
- unsigned MaxNumAGPRs = MaxNumVGPRs;
- unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
-
- if (ST.hasGFX90AInsts()) {
- // In an entry function without calls and AGPRs used it is possible to use
- // the whole register budget for VGPRs.
-
- // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and
- // split register file accordingly.
- if (MFI->usesAGPRs(MF)) {
- MaxNumVGPRs /= 2;
- MaxNumAGPRs = MaxNumVGPRs;
- } else {
- if (MaxNumVGPRs > TotalNumVGPRs) {
- MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
- MaxNumVGPRs = TotalNumVGPRs;
- } else
- MaxNumAGPRs = 0;
- }
- } else if (ST.hasMAIInsts()) {
- // In order to guarantee copying between AGPRs, we need a scratch VGPR
- // available at all times.
- reserveRegisterTuples(Reserved, AMDGPU::VGPR32);
- }
-
- for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
- unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
- reserveRegisterTuples(Reserved, Reg);
- }
-
- for (unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) {
- unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
- reserveRegisterTuples(Reserved, Reg);
- }
-
for (auto Reg : AMDGPU::SReg_32RegClass) {
Reserved.set(getSubReg(Reg, AMDGPU::hi16));
Register Low = getSubReg(Reg, AMDGPU::lo16);
@@ -648,22 +617,10 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(Low);
}
- for (auto Reg : AMDGPU::AGPR_32RegClass) {
- Reserved.set(getSubReg(Reg, AMDGPU::hi16));
- }
-
- // Reserve all the rest AGPRs if there are no instructions to use it.
- if (!ST.hasMAIInsts()) {
- for (unsigned i = 0; i < MaxNumVGPRs; ++i) {
- unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
- reserveRegisterTuples(Reserved, Reg);
- }
- }
-
Register ScratchRSrcReg = MFI->getScratchRSrcReg();
if (ScratchRSrcReg != AMDGPU::NoRegister) {
- // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
- // to spill.
+ // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we
+ // need to spill.
// TODO: May need to reserve a VGPR if doing LDS spilling.
reserveRegisterTuples(Reserved, ScratchRSrcReg);
}
@@ -672,7 +629,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// which is detected after the function is lowered. If we aren't really going
// to need SP, don't bother reserving it.
MCRegister StackPtrReg = MFI->getStackPtrOffsetReg();
-
if (StackPtrReg) {
reserveRegisterTuples(Reserved, StackPtrReg);
assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
@@ -690,20 +646,64 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
}
- for (auto Reg : MFI->WWMReservedRegs) {
- reserveRegisterTuples(Reserved, Reg.first);
+ // Reserve VGPRs/AGPRs.
+ //
+ unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
+ unsigned MaxNumAGPRs = MaxNumVGPRs;
+ unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
+
+ // Reserve all the AGPRs if there are no instructions to use it.
+ if (!ST.hasMAIInsts()) {
+ for (unsigned i = 0; i < MaxNumAGPRs; ++i) {
+ unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
+ reserveRegisterTuples(Reserved, Reg);
+ }
}
- // Reserve VGPRs used for SGPR spilling.
- // Note we treat freezeReservedRegs unusually because we run register
- // allocation in two phases. It's OK to re-freeze with new registers for the
- // second run.
-#if 0
- for (auto &SpilledFI : MFI->sgpr_spill_vgprs()) {
- for (auto &SpilledVGPR : SpilledFI.second)
- reserveRegisterTuples(Reserved, SpilledVGPR.VGPR);
+ for (auto Reg : AMDGPU::AGPR_32RegClass) {
+ Reserved.set(getSubReg(Reg, AMDGPU::hi16));
+ }
+
+ // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
+ // a wave may have up to 512 total vector registers combining together both
+ // VGPRs and AGPRs. Hence, in an entry function without calls and without
+ // AGPRs used within it, it is possible to use the whole vector register
+ // budget for VGPRs.
+ //
+ // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
+ // register file accordingly.
+ if (ST.hasGFX90AInsts()) {
+ if (MFI->usesAGPRs(MF)) {
+ MaxNumVGPRs /= 2;
+ MaxNumAGPRs = MaxNumVGPRs;
+ } else {
+ if (MaxNumVGPRs > TotalNumVGPRs) {
+ MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
+ MaxNumVGPRs = TotalNumVGPRs;
+ } else
+ MaxNumAGPRs = 0;
+ }
+ }
+
+ for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
+ unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
+ reserveRegisterTuples(Reserved, Reg);
+ }
+
+ for (unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) {
+ unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
+ reserveRegisterTuples(Reserved, Reg);
+ }
+
+ // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
+ // VGPR available at all times.
+ if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
+ reserveRegisterTuples(Reserved, AMDGPU::VGPR32);
+ }
+
+ for (auto Reg : MFI->WWMReservedRegs) {
+ reserveRegisterTuples(Reserved, Reg.first);
}
-#endif
// FIXME: Stop using reserved registers for this.
for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())
More information about the llvm-commits
mailing list