[llvm] af5e5c4 - [AMDGPU] Add WWM reserved VGPRs to WWMSpills

Fri Dec 16 22:18:17 PST 2022

Author: Christudasan Devadasan
Date: 2022-12-17T11:47:58+05:30
New Revision: af5e5c40ff73ca66e699c271e602a451a4d586a8

URL: https://github.com/llvm/llvm-project/commit/af5e5c40ff73ca66e699c271e602a451a4d586a8
DIFF: https://github.com/llvm/llvm-project/commit/af5e5c40ff73ca66e699c271e602a451a4d586a8.diff

LOG: [AMDGPU] Add WWM reserved VGPRs to WWMSpills

The custom VGPR spills inserted during frame lowering
maintain a separate list for WWM reserved registers.
Added them into WWMSpills that already tracks such
reserved registers. It unifies the spill insertion.

Reviewed By: nhaehnle, arsenm

Differential Revision: https://reviews.llvm.org/D124193

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
    llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
    llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
    llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 5e8525e36cc7..b846bfc8f503 100644

--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -778,7 +778,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
   std::optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
   std::optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
 
-  // VGPRs used for SGPR->VGPR spills
+  // Spill Whole-Wave Mode VGPRs.
   for (const auto &Reg : FuncInfo->getWWMSpills()) {
     Register VGPR = Reg.first;
     int FI = Reg.second;
@@ -789,15 +789,6 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
     buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR, FI);
   }
 
-  for (auto ReservedWWM : FuncInfo->wwmAllocation()) {
-    if (!ScratchExecCopy)
-      ScratchExecCopy =
-          buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL, /*IsProlog*/ true);
-
-    buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
-                     std::get<0>(ReservedWWM), std::get<1>(ReservedWWM));
-  }
-
   if (ScratchExecCopy) {
     // FIXME: Split block and make terminator.
     unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
@@ -1064,15 +1055,6 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
                        FI);
   }
 
-  for (auto ReservedWWM : FuncInfo->wwmAllocation()) {
-    if (!ScratchExecCopy)
-      ScratchExecCopy =
-          buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL, /*IsProlog*/ false);
-
-    buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
-                       std::get<0>(ReservedWWM), std::get<1>(ReservedWWM));
-  }
-
   if (ScratchExecCopy) {
     // FIXME: Split block and make terminator.
     unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
@@ -1120,9 +1102,13 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
   MachineRegisterInfo &MRI = MF.getRegInfo();
   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
 
+  // Allocate spill slots for WWM reserved VGPRs.
   if (!FuncInfo->isEntryFunction()) {
-    // Spill VGPRs used for Whole Wave Mode
-    FuncInfo->allocateWWMReservedSpillSlots(MFI, *TRI);
+    for (Register Reg : FuncInfo->getWWMReservedRegs()) {
+      const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg);
+      FuncInfo->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC),
+                                 TRI->getSpillAlign(*RC));
+    }
   }
 
   const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
@@ -1306,8 +1292,8 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
       FrameInfo.hasCalls() &&
       (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
 
-  // VGPRs used for SGPR spilling need to be specially inserted in the prolog,
-  // so don't allow the default insertion to handle them.
+  // The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't
+  // allow the default insertion to handle them.
   for (auto &Reg : MFI->getWWMSpills())
     SavedVGPRs.reset(Reg.first);
 

diff  --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 2691855b4fb7..16171d202004 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -473,20 +473,6 @@ bool SIMachineFunctionInfo::removeDeadFrameIndices(
   return HaveSGPRToMemory;
 }
 
-void SIMachineFunctionInfo::allocateWWMReservedSpillSlots(
-    MachineFrameInfo &MFI, const SIRegisterInfo &TRI) {
-  assert(WWMReservedFrameIndexes.empty());
-
-  WWMReservedFrameIndexes.resize(WWMReservedRegs.size());
-
-  int I = 0;
-  for (Register VGPR : WWMReservedRegs) {
-    const TargetRegisterClass *RC = TRI.getPhysRegClass(VGPR);
-    WWMReservedFrameIndexes[I++] = MFI.CreateSpillStackObject(
-        TRI.getSpillSize(*RC), TRI.getSpillAlign(*RC));
-  }
-}
-
 int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
                                          const SIRegisterInfo &TRI) {
   if (ScavengeFI)
@@ -613,7 +599,7 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
       BytesInStackArgArea(MFI.getBytesInStackArgArea()),
       ReturnsVoid(MFI.returnsVoid()),
       ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
-  for (Register Reg : MFI.WWMReservedRegs)
+  for (Register Reg : MFI.getWWMReservedRegs())
     WWMReservedRegs.push_back(regToString(Reg, TRI));
 
   if (MFI.getVGPRForAGPRCopy())

diff  --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index d0d38fbcd753..6d0606402445 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -437,24 +437,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
     bool IsDead = false;
   };
 
-  // Track VGPRs reserved for WWM.
-  SmallSetVector<Register, 8> WWMReservedRegs;
-
-  /// Track stack slots used for save/restore of reserved WWM VGPRs in the
-  /// prolog/epilog.
-
-  /// FIXME: This is temporary state only needed in PrologEpilogInserter, and
-  /// doesn't really belong here. It does not require serialization
-  SmallVector<int, 8> WWMReservedFrameIndexes;
-
-  void allocateWWMReservedSpillSlots(MachineFrameInfo &MFI,
-                                     const SIRegisterInfo &TRI);
-
-  auto wwmAllocation() const {
-    assert(WWMReservedRegs.size() == WWMReservedFrameIndexes.size());
-    return zip(WWMReservedRegs, WWMReservedFrameIndexes);
-  }
-
 private:
   // Track VGPR + wave index for each subregister of the SGPR spilled to
   // frameindex key.
@@ -470,6 +452,13 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
   // the VGPR and its stack slot index.
   WWMSpillsMap WWMSpills;
 
+  using ReservedRegSet = SmallSetVector<Register, 8>;
+  // To track the VGPRs reserved for WWM instructions. They get stack slots
+  // later during PrologEpilogInserter and get added into the superset WWMSpills
+  // for actual spilling. A separate set makes the register reserved part and
+  // the serialization easier.
+  ReservedRegSet WWMReservedRegs;
+
   DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
 
   // AGPRs used for VGPR spills.
@@ -521,9 +510,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
                                 PerFunctionMIParsingState &PFS,
                                 SMDiagnostic &Error, SMRange &SourceRange);
 
-  void reserveWWMRegister(Register Reg) {
-    WWMReservedRegs.insert(Reg);
-  }
+  void reserveWWMRegister(Register Reg) { WWMReservedRegs.insert(Reg); }
 
   AMDGPU::SIModeRegisterDefaults getMode() const {
     return Mode;
@@ -539,6 +526,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
 
   ArrayRef<Register> getSGPRSpillVGPRs() const { return SpillVGPRs; }
   const WWMSpillsMap &getWWMSpills() const { return WWMSpills; }
+  const ReservedRegSet &getWWMReservedRegs() const { return WWMReservedRegs; }
 
   void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size = 4,
                         Align Alignment = Align(4));

diff  --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 33bc4b78091f..87553f128327 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -701,7 +701,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     reserveRegisterTuples(Reserved, MFI->getVGPRForAGPRCopy());
   }
 
-  for (Register Reg : MFI->WWMReservedRegs)
+  for (Register Reg : MFI->getWWMReservedRegs())
     reserveRegisterTuples(Reserved, Reg);
 
   // FIXME: Stop using reserved registers for this.