[llvm] af5e5c4 - [AMDGPU] Add WWM reserved VGPRs to WWMSpills
Christudasan Devadasan via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 16 22:18:17 PST 2022
Author: Christudasan Devadasan
Date: 2022-12-17T11:47:58+05:30
New Revision: af5e5c40ff73ca66e699c271e602a451a4d586a8
URL: https://github.com/llvm/llvm-project/commit/af5e5c40ff73ca66e699c271e602a451a4d586a8
DIFF: https://github.com/llvm/llvm-project/commit/af5e5c40ff73ca66e699c271e602a451a4d586a8.diff
LOG: [AMDGPU] Add WWM reserved VGPRs to WWMSpills
The custom VGPR spills inserted during frame lowering
maintain a separate list for WWM reserved registers.
Added them into WWMSpills that already tracks such
reserved registers. It unifies the spill insertion.
Reviewed By: nhaehnle, arsenm
Differential Revision: https://reviews.llvm.org/D124193
Added:
Modified:
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 5e8525e36cc7..b846bfc8f503 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -778,7 +778,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
std::optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
std::optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
- // VGPRs used for SGPR->VGPR spills
+ // Spill Whole-Wave Mode VGPRs.
for (const auto &Reg : FuncInfo->getWWMSpills()) {
Register VGPR = Reg.first;
int FI = Reg.second;
@@ -789,15 +789,6 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR, FI);
}
- for (auto ReservedWWM : FuncInfo->wwmAllocation()) {
- if (!ScratchExecCopy)
- ScratchExecCopy =
- buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL, /*IsProlog*/ true);
-
- buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
- std::get<0>(ReservedWWM), std::get<1>(ReservedWWM));
- }
-
if (ScratchExecCopy) {
// FIXME: Split block and make terminator.
unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
@@ -1064,15 +1055,6 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
FI);
}
- for (auto ReservedWWM : FuncInfo->wwmAllocation()) {
- if (!ScratchExecCopy)
- ScratchExecCopy =
- buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL, /*IsProlog*/ false);
-
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
- std::get<0>(ReservedWWM), std::get<1>(ReservedWWM));
- }
-
if (ScratchExecCopy) {
// FIXME: Split block and make terminator.
unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
@@ -1120,9 +1102,13 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+ // Allocate spill slots for WWM reserved VGPRs.
if (!FuncInfo->isEntryFunction()) {
- // Spill VGPRs used for Whole Wave Mode
- FuncInfo->allocateWWMReservedSpillSlots(MFI, *TRI);
+ for (Register Reg : FuncInfo->getWWMReservedRegs()) {
+ const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg);
+ FuncInfo->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC),
+ TRI->getSpillAlign(*RC));
+ }
}
const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
@@ -1306,8 +1292,8 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
FrameInfo.hasCalls() &&
(SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
- // VGPRs used for SGPR spilling need to be specially inserted in the prolog,
- // so don't allow the default insertion to handle them.
+ // The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't
+ // allow the default insertion to handle them.
for (auto &Reg : MFI->getWWMSpills())
SavedVGPRs.reset(Reg.first);
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 2691855b4fb7..16171d202004 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -473,20 +473,6 @@ bool SIMachineFunctionInfo::removeDeadFrameIndices(
return HaveSGPRToMemory;
}
-void SIMachineFunctionInfo::allocateWWMReservedSpillSlots(
- MachineFrameInfo &MFI, const SIRegisterInfo &TRI) {
- assert(WWMReservedFrameIndexes.empty());
-
- WWMReservedFrameIndexes.resize(WWMReservedRegs.size());
-
- int I = 0;
- for (Register VGPR : WWMReservedRegs) {
- const TargetRegisterClass *RC = TRI.getPhysRegClass(VGPR);
- WWMReservedFrameIndexes[I++] = MFI.CreateSpillStackObject(
- TRI.getSpillSize(*RC), TRI.getSpillAlign(*RC));
- }
-}
-
int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
const SIRegisterInfo &TRI) {
if (ScavengeFI)
@@ -613,7 +599,7 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
BytesInStackArgArea(MFI.getBytesInStackArgArea()),
ReturnsVoid(MFI.returnsVoid()),
ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
- for (Register Reg : MFI.WWMReservedRegs)
+ for (Register Reg : MFI.getWWMReservedRegs())
WWMReservedRegs.push_back(regToString(Reg, TRI));
if (MFI.getVGPRForAGPRCopy())
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index d0d38fbcd753..6d0606402445 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -437,24 +437,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
bool IsDead = false;
};
- // Track VGPRs reserved for WWM.
- SmallSetVector<Register, 8> WWMReservedRegs;
-
- /// Track stack slots used for save/restore of reserved WWM VGPRs in the
- /// prolog/epilog.
-
- /// FIXME: This is temporary state only needed in PrologEpilogInserter, and
- /// doesn't really belong here. It does not require serialization
- SmallVector<int, 8> WWMReservedFrameIndexes;
-
- void allocateWWMReservedSpillSlots(MachineFrameInfo &MFI,
- const SIRegisterInfo &TRI);
-
- auto wwmAllocation() const {
- assert(WWMReservedRegs.size() == WWMReservedFrameIndexes.size());
- return zip(WWMReservedRegs, WWMReservedFrameIndexes);
- }
-
private:
// Track VGPR + wave index for each subregister of the SGPR spilled to
// frameindex key.
@@ -470,6 +452,13 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
// the VGPR and its stack slot index.
WWMSpillsMap WWMSpills;
+ using ReservedRegSet = SmallSetVector<Register, 8>;
+ // To track the VGPRs reserved for WWM instructions. They get stack slots
+ // later during PrologEpilogInserter and get added into the superset WWMSpills
+ // for actual spilling. A separate set makes the register reserved part and
+ // the serialization easier.
+ ReservedRegSet WWMReservedRegs;
+
DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
// AGPRs used for VGPR spills.
@@ -521,9 +510,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
PerFunctionMIParsingState &PFS,
SMDiagnostic &Error, SMRange &SourceRange);
- void reserveWWMRegister(Register Reg) {
- WWMReservedRegs.insert(Reg);
- }
+ void reserveWWMRegister(Register Reg) { WWMReservedRegs.insert(Reg); }
AMDGPU::SIModeRegisterDefaults getMode() const {
return Mode;
@@ -539,6 +526,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
ArrayRef<Register> getSGPRSpillVGPRs() const { return SpillVGPRs; }
const WWMSpillsMap &getWWMSpills() const { return WWMSpills; }
+ const ReservedRegSet &getWWMReservedRegs() const { return WWMReservedRegs; }
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size = 4,
Align Alignment = Align(4));
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 33bc4b78091f..87553f128327 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -701,7 +701,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, MFI->getVGPRForAGPRCopy());
}
- for (Register Reg : MFI->WWMReservedRegs)
+ for (Register Reg : MFI->getWWMReservedRegs())
reserveRegisterTuples(Reserved, Reg);
// FIXME: Stop using reserved registers for this.
More information about the llvm-commits
mailing list