[llvm] 20a940f - [AMDGPU][SIFrameLowering] Unify PEI SGPR spill saves and restores

Christudasan Devadasan via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 16 22:20:50 PST 2022


Author: Christudasan Devadasan
Date: 2022-12-17T11:50:25+05:30
New Revision: 20a940f1e217b0df031e656b2811be02b15eb14e

URL: https://github.com/llvm/llvm-project/commit/20a940f1e217b0df031e656b2811be02b15eb14e
DIFF: https://github.com/llvm/llvm-project/commit/20a940f1e217b0df031e656b2811be02b15eb14e.diff

LOG: [AMDGPU][SIFrameLowering] Unify PEI SGPR spill saves and restores

There is a lot of customization and eventually code duplication in the
frame lowering that handles special SGPR spills like the one needed for
the Frame Pointer. Incorporating any additional SGPR spill currently
makes it difficult during PEI. This patch introduces a new spill builder
to efficiently handle such spill requirements. Various spill methods are
special handled using a separate class.

Reviewed By: sebastian-ne, scott.linder

Differential Revision: https://reviews.llvm.org/D132436

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
    llvm/lib/Target/AMDGPU/SIFrameLowering.h
    llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
    llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
    llvm/test/CodeGen/AMDGPU/stack-realign.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index bd32ef7bc9f0..9a9bb0750831 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -26,6 +26,18 @@ static cl::opt<bool> EnableSpillVGPRToAGPR(
   cl::ReallyHidden,
   cl::init(true));
 
+// Find a register matching \p RC from \p LiveRegs which is unused and available
+// throughout the function. On failure, returns AMDGPU::NoRegister.
+static MCRegister findUnusedRegister(MachineRegisterInfo &MRI,
+                                     const LivePhysRegs &LiveRegs,
+                                     const TargetRegisterClass &RC) {
+  for (MCRegister Reg : RC) {
+    if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
+      return Reg;
+  }
+  return MCRegister();
+}
+
 // Find a scratch register that we can use in the prologue. We avoid using
 // callee-save registers since they may appear to be free when this is called
 // from canUseAsPrologue (during shrink wrapping), but then no longer be free
@@ -39,65 +51,70 @@ static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
   for (unsigned i = 0; CSRegs[i]; ++i)
     LiveRegs.addReg(CSRegs[i]);
 
-  if (Unused) {
-    // We are looking for a register that can be used throughout the entire
-    // function, so any use is unacceptable.
-    for (MCRegister Reg : RC) {
-      if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
-        return Reg;
-    }
-  } else {
-    for (MCRegister Reg : RC) {
-      if (LiveRegs.available(MRI, Reg))
-        return Reg;
-    }
+  // We are looking for a register that can be used throughout the entire
+  // function, so any use is unacceptable.
+  if (Unused)
+    return findUnusedRegister(MRI, LiveRegs, RC);
+
+  for (MCRegister Reg : RC) {
+    if (LiveRegs.available(MRI, Reg))
+      return Reg;
   }
 
   return MCRegister();
 }
 
-static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF,
-                                           LivePhysRegs &LiveRegs,
-                                           Register &TempSGPR,
-                                           std::optional<int> &FrameIndex,
-                                           bool IsFP) {
+static void getVGPRSpillLaneOrTempRegister(
+    MachineFunction &MF, LivePhysRegs &LiveRegs, Register SGPR,
+    const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass) {
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
 
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIRegisterInfo *TRI = ST.getRegisterInfo();
+  unsigned Size = TRI->getSpillSize(RC);
+  Align Alignment = TRI->getSpillAlign(RC);
 
-  // We need to save and restore the current FP/BP.
+  // We need to save and restore the given SGPR.
 
-  // 1: Try to save the FP/BP in an unused SGPR.
-  TempSGPR = findScratchNonCalleeSaveRegister(
-      MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
+  // 1: Try to save the given register into an unused scratch SGPR. The LiveRegs
+  // should have all the callee saved registers marked as used.
+  Register ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveRegs, RC);
 
-  if (!TempSGPR) {
-    int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
-                                            TargetStackID::SGPRSpill);
+  if (!ScratchSGPR) {
+    int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,
+                                         TargetStackID::SGPRSpill);
 
-    if (TRI->spillSGPRToVGPR() && MFI->allocateSGPRSpillToVGPRLane(
-                                      MF, NewFI, /* IsPrologEpilog */ true)) {
-      // 2: There's no free lane to spill, and no free register to save FP/BP,
-      // so we're forced to spill another VGPR to use for the spill.
-      FrameIndex = NewFI;
+    if (TRI->spillSGPRToVGPR() &&
+        MFI->allocateSGPRSpillToVGPRLane(MF, FI, /* IsPrologEpilog */ true)) {
+      // 2: There's no free lane to spill, and no free register to save the
+      // SGPR, so we're forced to take another VGPR to use for the spill.
+      MFI->addToPrologEpilogSGPRSpills(
+          SGPR, PrologEpilogSGPRSaveRestoreInfo(
+                    SGPRSaveKind::SPILL_TO_VGPR_LANE, FI));
 
       LLVM_DEBUG(
-          auto Spill = MFI->getPrologEpilogSGPRSpillToVGPRLanes(NewFI).front();
-          dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "
+          auto Spill = MFI->getPrologEpilogSGPRSpillToVGPRLanes(FI).front();
+          dbgs() << printReg(SGPR, TRI) << " requires fallback spill to "
                  << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';);
     } else {
-      // Remove dead <NewFI> index
-      MF.getFrameInfo().RemoveStackObject(NewFI);
-      // 3: If all else fails, spill the FP/BP to memory.
-      FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4));
-      LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling "
-                        << (IsFP ? "FP" : "BP") << '\n');
+      // Remove dead <FI> index
+      MF.getFrameInfo().RemoveStackObject(FI);
+      // 3: If all else fails, spill the register to memory.
+      FI = FrameInfo.CreateSpillStackObject(Size, Alignment);
+      MFI->addToPrologEpilogSGPRSpills(
+          SGPR,
+          PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind::SPILL_TO_MEM, FI));
+      LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling "
+                        << printReg(SGPR, TRI) << '\n');
     }
   } else {
-    LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to "
-                      << printReg(TempSGPR, TRI) << '\n');
+    MFI->addToPrologEpilogSGPRSpills(
+        SGPR, PrologEpilogSGPRSaveRestoreInfo(
+                  SGPRSaveKind::COPY_TO_SCRATCH_SGPR, ScratchSGPR));
+    LiveRegs.addReg(ScratchSGPR);
+    LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to "
+                      << printReg(ScratchSGPR, TRI) << '\n');
   }
 }
 
@@ -109,7 +126,7 @@ static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
                              LivePhysRegs &LiveRegs, MachineFunction &MF,
                              MachineBasicBlock &MBB,
                              MachineBasicBlock::iterator I, const DebugLoc &DL,
-                             Register SpillReg, int FI) {
+                             Register SpillReg, int FI, int64_t DwordOff = 0) {
   unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
                                         : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
 
@@ -121,19 +138,17 @@ static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
   LiveRegs.addReg(SpillReg);
   bool IsKill = !MBB.isLiveIn(SpillReg);
   TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill,
-                          FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
-                          &LiveRegs);
+                          FuncInfo.getStackPtrOffsetReg(), DwordOff, MMO,
+                          nullptr, &LiveRegs);
   if (IsKill)
     LiveRegs.removeReg(SpillReg);
 }
 
-static void buildEpilogRestore(const GCNSubtarget &ST,
-                               const SIRegisterInfo &TRI,
-                               const SIMachineFunctionInfo &FuncInfo,
-                               LivePhysRegs &LiveRegs, MachineFunction &MF,
-                               MachineBasicBlock &MBB,
-                               MachineBasicBlock::iterator I,
-                               const DebugLoc &DL, Register SpillReg, int FI) {
+static void buildEpilogRestore(
+    const GCNSubtarget &ST, const SIRegisterInfo &TRI,
+    const SIMachineFunctionInfo &FuncInfo, LivePhysRegs &LiveRegs,
+    MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+    const DebugLoc &DL, Register SpillReg, int FI, int64_t DwordOff = 0) {
   unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
                                         : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
 
@@ -143,8 +158,8 @@ static void buildEpilogRestore(const GCNSubtarget &ST,
       PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
       FrameInfo.getObjectAlign(FI));
   TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false,
-                          FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
-                          &LiveRegs);
+                          FuncInfo.getStackPtrOffsetReg(), DwordOff, MMO,
+                          nullptr, &LiveRegs);
 }
 
 static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
@@ -172,6 +187,182 @@ static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     .addReg(GitPtrLo);
 }
 
+static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI,
+                         const SIMachineFunctionInfo *FuncInfo,
+                         MachineFunction &MF, MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator MBBI, bool IsProlog) {
+  if (LiveRegs.empty()) {
+    LiveRegs.init(TRI);
+    if (IsProlog) {
+      LiveRegs.addLiveIns(MBB);
+    } else {
+      // In epilog.
+      LiveRegs.addLiveOuts(MBB);
+      LiveRegs.stepBackward(*MBBI);
+    }
+  }
+}
+
+namespace llvm {
+
+// SpillBuilder to save/restore special SGPR spills like the one needed for FP,
+// BP, etc. These spills are delayed until the current function's frame is
+// finalized. For a given register, the builder uses the
+// PrologEpilogSGPRSaveRestoreInfo to decide the spill method.
+class PrologEpilogSGPRSpillBuilder {
+  MachineBasicBlock::iterator MI;
+  MachineBasicBlock &MBB;
+  MachineFunction &MF;
+  const GCNSubtarget &ST;
+  MachineFrameInfo &MFI;
+  SIMachineFunctionInfo *FuncInfo;
+  const SIInstrInfo *TII;
+  const SIRegisterInfo &TRI;
+  Register SuperReg;
+  const PrologEpilogSGPRSaveRestoreInfo SI;
+  LivePhysRegs &LiveRegs;
+  const DebugLoc &DL;
+  ArrayRef<int16_t> SplitParts;
+  unsigned NumSubRegs;
+  unsigned EltSize = 4;
+
+  void saveToMemory(const int FI) const {
+    MachineRegisterInfo &MRI = MF.getRegInfo();
+    assert(!MFI.isDeadObjectIndex(FI));
+
+    initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);
+
+    MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
+        MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
+    if (!TmpVGPR)
+      report_fatal_error("failed to find free scratch register");
+
+    for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
+      Register SubReg = NumSubRegs == 1
+                            ? SuperReg
+                            : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
+      BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
+          .addReg(SubReg);
+
+      buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MI, DL, TmpVGPR,
+                       FI, DwordOff);
+      DwordOff += 4;
+    }
+  }
+
+  void saveToVGPRLane(const int FI) const {
+    assert(!MFI.isDeadObjectIndex(FI));
+
+    assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
+    ArrayRef<SIRegisterInfo::SpilledReg> Spill =
+        FuncInfo->getPrologEpilogSGPRSpillToVGPRLanes(FI);
+    assert(Spill.size() == NumSubRegs);
+
+    for (unsigned I = 0; I < NumSubRegs; ++I) {
+      Register SubReg = NumSubRegs == 1
+                            ? SuperReg
+                            : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
+      BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[I].VGPR)
+          .addReg(SubReg)
+          .addImm(Spill[I].Lane)
+          .addReg(Spill[I].VGPR, RegState::Undef);
+    }
+  }
+
+  void copyToScratchSGPR(Register DstReg) const {
+    BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg)
+        .addReg(SuperReg)
+        .setMIFlag(MachineInstr::FrameSetup);
+  }
+
+  void restoreFromMemory(const int FI) {
+    MachineRegisterInfo &MRI = MF.getRegInfo();
+
+    initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);
+    MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
+        MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
+    if (!TmpVGPR)
+      report_fatal_error("failed to find free scratch register");
+
+    for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
+      Register SubReg = NumSubRegs == 1
+                            ? SuperReg
+                            : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
+
+      buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MI, DL, TmpVGPR,
+                         FI, DwordOff);
+      BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
+          .addReg(TmpVGPR, RegState::Kill);
+      DwordOff += 4;
+    }
+  }
+
+  void restoreFromVGPRLane(const int FI) {
+    assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
+    ArrayRef<SIRegisterInfo::SpilledReg> Spill =
+        FuncInfo->getPrologEpilogSGPRSpillToVGPRLanes(FI);
+    assert(Spill.size() == NumSubRegs);
+
+    for (unsigned I = 0; I < NumSubRegs; ++I) {
+      Register SubReg = NumSubRegs == 1
+                            ? SuperReg
+                            : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
+      BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
+          .addReg(Spill[I].VGPR)
+          .addImm(Spill[I].Lane);
+    }
+  }
+
+  void copyFromScratchSGPR(Register SrcReg) const {
+    BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg)
+        .addReg(SrcReg)
+        .setMIFlag(MachineInstr::FrameDestroy);
+  }
+
+public:
+  PrologEpilogSGPRSpillBuilder(Register Reg,
+                               const PrologEpilogSGPRSaveRestoreInfo SI,
+                               MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator MI,
+                               const DebugLoc &DL, const SIInstrInfo *TII,
+                               const SIRegisterInfo &TRI,
+                               LivePhysRegs &LiveRegs)
+      : MI(MI), MBB(MBB), MF(*MBB.getParent()),
+        ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),
+        FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
+        SuperReg(Reg), SI(SI), LiveRegs(LiveRegs), DL(DL) {
+    const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg);
+    SplitParts = TRI.getRegSplitParts(RC, EltSize);
+    NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
+
+    assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
+  }
+
+  void save() {
+    switch (SI.getKind()) {
+    case SGPRSaveKind::SPILL_TO_MEM:
+      return saveToMemory(SI.getIndex());
+    case SGPRSaveKind::SPILL_TO_VGPR_LANE:
+      return saveToVGPRLane(SI.getIndex());
+    case SGPRSaveKind::COPY_TO_SCRATCH_SGPR:
+      return copyToScratchSGPR(SI.getReg());
+    }
+  }
+
+  void restore() {
+    switch (SI.getKind()) {
+    case SGPRSaveKind::SPILL_TO_MEM:
+      return restoreFromMemory(SI.getIndex());
+    case SGPRSaveKind::SPILL_TO_VGPR_LANE:
+      return restoreFromVGPRLane(SI.getIndex());
+    case SGPRSaveKind::COPY_TO_SCRATCH_SGPR:
+      return copyFromScratchSGPR(SI.getReg());
+    }
+  }
+};
+
+} // namespace llvm
+
 // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
 void SIFrameLowering::emitEntryFunctionFlatScratchInit(
     MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
@@ -671,22 +862,6 @@ bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
   llvm_unreachable("Invalid TargetStackID::Value");
 }
 
-static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI,
-                         const SIMachineFunctionInfo *FuncInfo,
-                         MachineFunction &MF, MachineBasicBlock &MBB,
-                         MachineBasicBlock::iterator MBBI, bool IsProlog) {
-  if (LiveRegs.empty()) {
-    LiveRegs.init(TRI);
-    if (IsProlog) {
-      LiveRegs.addLiveIns(MBB);
-    } else {
-      // In epilog.
-      LiveRegs.addLiveOuts(MBB);
-      LiveRegs.stepBackward(*MBBI);
-    }
-  }
-}
-
 // Activate all lanes, returns saved exec.
 static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
                                      MachineFunction &MF,
@@ -718,13 +893,6 @@ static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
   return ScratchExecCopy;
 }
 
-// A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR.
-// Otherwise we are spilling to memory.
-static bool spilledToMemory(const MachineFunction &MF, int SaveIndex) {
-  const MachineFrameInfo &MFI = MF.getFrameInfo();
-  return MFI.getStackID(SaveIndex) != TargetStackID::SGPRSpill;
-}
-
 void SIFrameLowering::emitPrologue(MachineFunction &MF,
                                    MachineBasicBlock &MBB) const {
   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
@@ -734,7 +902,6 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
   }
 
   MachineFrameInfo &MFI = MF.getFrameInfo();
-  MachineRegisterInfo &MRI = MF.getRegInfo();
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIInstrInfo *TII = ST.getInstrInfo();
   const SIRegisterInfo &TRI = TII->getRegisterInfo();
@@ -758,9 +925,6 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
   // turn on all lanes before doing the spill to memory.
   Register ScratchExecCopy;
 
-  std::optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
-  std::optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
-
   // Spill Whole-Wave Mode VGPRs.
   for (const auto &Reg : FuncInfo->getWWMSpills()) {
     Register VGPR = Reg.first;
@@ -781,86 +945,26 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
     LiveRegs.addReg(ScratchExecCopy);
   }
 
-  auto SaveSGPRToMemory = [&](Register Reg, const int FI) {
-    assert(!MFI.isDeadObjectIndex(FI));
-
-    initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
-
-    MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
-        MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
-    if (!TmpVGPR)
-      report_fatal_error("failed to find free scratch register");
-
-    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
-        .addReg(Reg);
-
-    buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
-                     FI);
-  };
-
-  auto SaveSGPRToVGPRLane = [&](Register Reg, const int FI) {
-    assert(!MFI.isDeadObjectIndex(FI));
-
-    assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
-    ArrayRef<SIRegisterInfo::SpilledReg> Spill =
-        FuncInfo->getPrologEpilogSGPRSpillToVGPRLanes(FI);
-    assert(Spill.size() == 1);
-
-    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
-        .addReg(Reg)
-        .addImm(Spill[0].Lane)
-        .addReg(Spill[0].VGPR, RegState::Undef);
-  };
-
-  if (FPSaveIndex) {
-    if (spilledToMemory(MF, *FPSaveIndex))
-      SaveSGPRToMemory(FramePtrReg, *FPSaveIndex);
-    else
-      SaveSGPRToVGPRLane(FramePtrReg, *FPSaveIndex);
-  }
-
-  // Emit the copy if we need an FP, and are using a free SGPR to save it.
-  if (FuncInfo->SGPRForFPSaveRestoreCopy) {
-    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
-            FuncInfo->SGPRForFPSaveRestoreCopy)
-        .addReg(FramePtrReg)
-        .setMIFlag(MachineInstr::FrameSetup);
+  for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
+    PrologEpilogSGPRSpillBuilder SB(Spill.first, Spill.second, MBB, MBBI, DL,
+                                    TII, TRI, LiveRegs);
+    SB.save();
   }
 
-  if (BPSaveIndex) {
-    if (spilledToMemory(MF, *BPSaveIndex))
-      SaveSGPRToMemory(BasePtrReg, *BPSaveIndex);
-    else
-      SaveSGPRToVGPRLane(BasePtrReg, *BPSaveIndex);
-  }
-
-  // Emit the copy if we need a BP, and are using a free SGPR to save it.
-  if (FuncInfo->SGPRForBPSaveRestoreCopy) {
-    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
-            FuncInfo->SGPRForBPSaveRestoreCopy)
-        .addReg(BasePtrReg)
-        .setMIFlag(MachineInstr::FrameSetup);
-  }
-
-  // If a copy has been emitted for FP and/or BP, Make the SGPRs
-  // used in the copy instructions live throughout the function.
-  SmallVector<MCPhysReg, 2> TempSGPRs;
-  if (FuncInfo->SGPRForFPSaveRestoreCopy)
-    TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy);
-
-  if (FuncInfo->SGPRForBPSaveRestoreCopy)
-    TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy);
-
-  if (!TempSGPRs.empty()) {
+  // If a copy to scratch SGPR has been chosen for any of the SGPR spills, make
+  // such scratch registers live throughout the function.
+  SmallVector<Register, 1> ScratchSGPRs;
+  FuncInfo->getAllScratchSGPRCopyDstRegs(ScratchSGPRs);
+  if (!ScratchSGPRs.empty()) {
     for (MachineBasicBlock &MBB : MF) {
-      for (MCPhysReg Reg : TempSGPRs)
+      for (MCPhysReg Reg : ScratchSGPRs)
         MBB.addLiveIn(Reg);
 
       MBB.sortUniqueLiveIns();
     }
     if (!LiveRegs.empty()) {
-      LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
-      LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy);
+      for (MCPhysReg Reg : ScratchSGPRs)
+        LiveRegs.addReg(Reg);
     }
   }
 
@@ -910,24 +1014,20 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
     Add->getOperand(3).setIsDead(); // Mark SCC as dead.
   }
 
-  assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy ||
-                     FuncInfo->FramePointerSaveIndex)) &&
+  bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
+  assert((!HasFP || FPSaved) &&
          "Needed to save FP but didn't save it anywhere");
 
   // If we allow spilling to AGPRs we may have saved FP but then spill
   // everything into AGPRs instead of the stack.
-  assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy &&
-                    !FuncInfo->FramePointerSaveIndex) ||
-                   EnableSpillVGPRToAGPR) &&
+  assert((HasFP || !FPSaved || EnableSpillVGPRToAGPR) &&
          "Saved FP but didn't need it");
 
-  assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy ||
-                     FuncInfo->BasePointerSaveIndex)) &&
+  bool BPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(BasePtrReg);
+  assert((!HasBP || BPSaved) &&
          "Needed to save BP but didn't save it anywhere");
 
-  assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy &&
-                    !FuncInfo->BasePointerSaveIndex)) &&
-         "Saved BP but didn't need it");
+  assert((HasBP || !BPSaved) && "Saved BP but didn't need it");
 }
 
 void SIFrameLowering::emitEpilogue(MachineFunction &MF,
@@ -938,7 +1038,6 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
 
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIInstrInfo *TII = ST.getInstrInfo();
-  MachineRegisterInfo &MRI = MF.getRegInfo();
   const SIRegisterInfo &TRI = TII->getRegisterInfo();
   LivePhysRegs LiveRegs;
   // Get the insert location for the epilogue. If there were no terminators in
@@ -959,12 +1058,6 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
                              ? NumBytes + MFI.getMaxAlign().value()
                              : NumBytes;
   const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
-  const Register FramePtrReg = FuncInfo->getFrameOffsetReg();
-  const Register BasePtrReg =
-      TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
-
-  std::optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
-  std::optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
 
   if (RoundedSize != 0 && hasFP(MF)) {
     auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
@@ -974,56 +1067,10 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
     Add->getOperand(3).setIsDead(); // Mark SCC as dead.
   }
 
-  if (FuncInfo->SGPRForFPSaveRestoreCopy) {
-    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
-        .addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
-        .setMIFlag(MachineInstr::FrameDestroy);
-  }
-
-  if (FuncInfo->SGPRForBPSaveRestoreCopy) {
-    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
-        .addReg(FuncInfo->SGPRForBPSaveRestoreCopy)
-        .setMIFlag(MachineInstr::FrameDestroy);
-  }
-
-  auto RestoreSGPRFromMemory = [&](Register Reg, const int FI) {
-    initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
-    MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
-        MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
-    if (!TmpVGPR)
-      report_fatal_error("failed to find free scratch register");
-    buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
-                       FI);
-    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), Reg)
-        .addReg(TmpVGPR, RegState::Kill);
-  };
-
-  auto RestoreSGPRFromVGPRLane = [&](Register Reg, const int FI) {
-    assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
-    ArrayRef<SIRegisterInfo::SpilledReg> Spill =
-        FuncInfo->getPrologEpilogSGPRSpillToVGPRLanes(FI);
-    assert(Spill.size() == 1);
-    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), Reg)
-        .addReg(Spill[0].VGPR)
-        .addImm(Spill[0].Lane);
-  };
-
-  if (FPSaveIndex) {
-    const int FramePtrFI = *FPSaveIndex;
-    assert(!MFI.isDeadObjectIndex(FramePtrFI));
-    if (spilledToMemory(MF, FramePtrFI))
-      RestoreSGPRFromMemory(FramePtrReg, FramePtrFI);
-    else
-      RestoreSGPRFromVGPRLane(FramePtrReg, FramePtrFI);
-  }
-
-  if (BPSaveIndex) {
-    const int BasePtrFI = *BPSaveIndex;
-    assert(!MFI.isDeadObjectIndex(BasePtrFI));
-    if (spilledToMemory(MF, BasePtrFI))
-      RestoreSGPRFromMemory(BasePtrReg, BasePtrFI);
-    else
-      RestoreSGPRFromVGPRLane(BasePtrReg, BasePtrFI);
+  for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
+    PrologEpilogSGPRSpillBuilder SB(Spill.first, Spill.second, MBB, MBBI, DL,
+                                    TII, TRI, LiveRegs);
+    SB.restore();
   }
 
   Register ScratchExecCopy;
@@ -1055,8 +1102,7 @@ static bool allSGPRSpillsAreDead(const MachineFunction &MF) {
        I != E; ++I) {
     if (!MFI.isDeadObjectIndex(I) &&
         MFI.getStackID(I) == TargetStackID::SGPRSpill &&
-        (I != FuncInfo->FramePointerSaveIndex &&
-         I != FuncInfo->BasePointerSaveIndex)) {
+        !FuncInfo->checkIndexInPrologEpilogSGPRSpills(I)) {
       return false;
     }
   }
@@ -1215,6 +1261,49 @@ void SIFrameLowering::processFunctionBeforeFrameIndicesReplaced(
   }
 }
 
+// The special SGPR spills like the one needed for FP, BP or any reserved
+// registers delayed until frame lowering.
+void SIFrameLowering::determinePrologEpilogSGPRSaves(
+    MachineFunction &MF, BitVector &SavedVGPRs) const {
+  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+  LivePhysRegs LiveRegs;
+  LiveRegs.init(*TRI);
+  // Initially mark callee saved registers as used so we will not choose them
+  // while looking for scratch SGPRs.
+  const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
+  for (unsigned I = 0; CSRegs[I]; ++I)
+    LiveRegs.addReg(CSRegs[I]);
+
+  // hasFP only knows about stack objects that already exist. We're now
+  // determining the stack slots that will be created, so we have to predict
+  // them. Stack objects force FP usage with calls.
+  //
+  // Note a new VGPR CSR may be introduced if one is used for the spill, but we
+  // don't want to report it here.
+  //
+  // FIXME: Is this really hasReservedCallFrame?
+  const bool WillHaveFP =
+      FrameInfo.hasCalls() &&
+      (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
+
+  if (WillHaveFP || hasFP(MF)) {
+    Register FramePtrReg = MFI->getFrameOffsetReg();
+    assert(!MFI->hasPrologEpilogSGPRSpillEntry(FramePtrReg) &&
+           "Re-reserving spill slot for FP");
+    getVGPRSpillLaneOrTempRegister(MF, LiveRegs, FramePtrReg);
+  }
+
+  if (TRI->hasBasePointer(MF)) {
+    Register BasePtrReg = TRI->getBaseRegister();
+    assert(!MFI->hasPrologEpilogSGPRSpillEntry(BasePtrReg) &&
+           "Re-reserving spill slot for BP");
+    getVGPRSpillLaneOrTempRegister(MF, LiveRegs, BasePtrReg);
+  }
+}
+
 // Only report VGPRs to generic code.
 void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
                                            BitVector &SavedVGPRs,
@@ -1224,7 +1313,6 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
   if (MFI->isEntryFunction())
     return;
 
-  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIRegisterInfo *TRI = ST.getRegisterInfo();
 
@@ -1256,43 +1344,13 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
   if (!ST.hasGFX90AInsts())
     SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
 
-  // hasFP only knows about stack objects that already exist. We're now
-  // determining the stack slots that will be created, so we have to predict
-  // them. Stack objects force FP usage with calls.
-  //
-  // Note a new VGPR CSR may be introduced if one is used for the spill, but we
-  // don't want to report it here.
-  //
-  // FIXME: Is this really hasReservedCallFrame?
-  const bool WillHaveFP =
-      FrameInfo.hasCalls() &&
-      (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
+  determinePrologEpilogSGPRSaves(MF, SavedVGPRs);
 
   // The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't
   // allow the default insertion to handle them.
   for (auto &Reg : MFI->getWWMSpills())
     SavedVGPRs.reset(Reg.first);
 
-  LivePhysRegs LiveRegs;
-  LiveRegs.init(*TRI);
-
-  if (WillHaveFP || hasFP(MF)) {
-    assert(!MFI->SGPRForFPSaveRestoreCopy && !MFI->FramePointerSaveIndex &&
-           "Re-reserving spill slot for FP");
-    getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForFPSaveRestoreCopy,
-                                   MFI->FramePointerSaveIndex, true);
-  }
-
-  if (TRI->hasBasePointer(MF)) {
-    if (MFI->SGPRForFPSaveRestoreCopy)
-      LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy);
-
-    assert(!MFI->SGPRForBPSaveRestoreCopy &&
-           !MFI->BasePointerSaveIndex && "Re-reserving spill slot for BP");
-    getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForBPSaveRestoreCopy,
-                                   MFI->BasePointerSaveIndex, false);
-  }
-
   // Mark all lane VGPRs as BB LiveIns.
   for (MachineBasicBlock &MBB : MF) {
     for (auto &Reg : MFI->getWWMSpills())
@@ -1354,29 +1412,31 @@ bool SIFrameLowering::assignCalleeSavedSpillSlots(
     return true; // Early exit if no callee saved registers are modified!
 
   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
-  if (!FuncInfo->SGPRForFPSaveRestoreCopy &&
-      !FuncInfo->SGPRForBPSaveRestoreCopy)
-    return false;
-
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIRegisterInfo *RI = ST.getRegisterInfo();
   Register FramePtrReg = FuncInfo->getFrameOffsetReg();
   Register BasePtrReg = RI->getBaseRegister();
+  Register SGPRForFPSaveRestoreCopy =
+      FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
+  Register SGPRForBPSaveRestoreCopy =
+      FuncInfo->getScratchSGPRCopyDstReg(BasePtrReg);
+  if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy)
+    return false;
+
   unsigned NumModifiedRegs = 0;
 
-  if (FuncInfo->SGPRForFPSaveRestoreCopy)
+  if (SGPRForFPSaveRestoreCopy)
     NumModifiedRegs++;
-  if (FuncInfo->SGPRForBPSaveRestoreCopy)
+  if (SGPRForBPSaveRestoreCopy)
     NumModifiedRegs++;
 
   for (auto &CS : CSI) {
-    if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) {
-      CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
+    if (CS.getReg() == FramePtrReg && SGPRForFPSaveRestoreCopy) {
+      CS.setDstReg(SGPRForFPSaveRestoreCopy);
       if (--NumModifiedRegs)
         break;
-    } else if (CS.getReg() == BasePtrReg &&
-               FuncInfo->SGPRForBPSaveRestoreCopy) {
-      CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy);
+    } else if (CS.getReg() == BasePtrReg && SGPRForBPSaveRestoreCopy) {
+      CS.setDstReg(SGPRForBPSaveRestoreCopy);
       if (--NumModifiedRegs)
         break;
     }

diff  --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
index 79154d494e91..ee6b5b81fab0 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
@@ -33,6 +33,8 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
                             RegScavenger *RS = nullptr) const override;
   void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs,
                                 RegScavenger *RS = nullptr) const;
+  void determinePrologEpilogSGPRSaves(MachineFunction &MF,
+                                      BitVector &SavedRegs) const;
   bool
   assignCalleeSavedSpillSlots(MachineFunction &MF,
                               const TargetRegisterInfo *TRI,

diff  --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index e1e987040228..9d025cfffe64 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -473,13 +473,13 @@ bool SIMachineFunctionInfo::removeDeadFrameIndices(
   bool HaveSGPRToMemory = false;
 
   if (ResetSGPRSpillStackIDs) {
-    // All other SPGRs must be allocated on the default stack, so reset the
+    // All other SGPRs must be allocated on the default stack, so reset the
     // stack ID.
-    for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
-         ++i) {
-      if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) {
-        if (MFI.getStackID(i) == TargetStackID::SGPRSpill) {
-          MFI.setStackID(i, TargetStackID::Default);
+    for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I != E;
+         ++I) {
+      if (!checkIndexInPrologEpilogSGPRSpills(I)) {
+        if (MFI.getStackID(I) == TargetStackID::SGPRSpill) {
+          MFI.setStackID(I, TargetStackID::Default);
           HaveSGPRToMemory = true;
         }
       }

diff  --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 9b4a1d7bf843..54e7e33d5449 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -321,6 +321,35 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
 
 } // end namespace yaml
 
+// A CSR SGPR value can be preserved inside a callee using one of the following
+// methods.
+//   1. Copy to an unused scratch SGPR.
+//   2. Spill to a VGPR lane.
+//   3. Spill to memory via. a scratch VGPR.
+// class PrologEpilogSGPRSaveRestoreInfo represents the save/restore method used
+// for an SGPR at function prolog/epilog.
+enum class SGPRSaveKind : uint8_t {
+  COPY_TO_SCRATCH_SGPR,
+  SPILL_TO_VGPR_LANE,
+  SPILL_TO_MEM
+};
+
+class PrologEpilogSGPRSaveRestoreInfo {
+  SGPRSaveKind Kind;
+  union {
+    int Index;
+    Register Reg;
+  };
+
+public:
+  PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K, int I) : Kind(K), Index(I) {}
+  PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K, Register R)
+      : Kind(K), Reg(R) {}
+  Register getReg() const { return Reg; }
+  int getIndex() const { return Index; }
+  SGPRSaveKind getKind() const { return Kind; }
+};
+
 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
 /// tells the hardware which interpolation parameters to load.
 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
@@ -464,6 +493,14 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
   // the serialization easier.
   ReservedRegSet WWMReservedRegs;
 
+  using PrologEpilogSGPRSpillsMap =
+      DenseMap<Register, PrologEpilogSGPRSaveRestoreInfo>;
+  // To track the SGPR spill method used for a CSR SGPR register during
+  // frame lowering. Even though the SGPR spills are handled during
+  // SILowerSGPRSpills pass, some special handling needed later during the
+  // PrologEpilogInserter.
+  PrologEpilogSGPRSpillsMap PrologEpilogSGPRSpills;
+
   DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
 
   // AGPRs used for VGPR spills.
@@ -493,17 +530,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
     VGPRForAGPRCopy = NewVGPRForAGPRCopy;
   }
 
-public: // FIXME
-  /// If this is set, an SGPR used for save/restore of the register used for the
-  /// frame pointer.
-  Register SGPRForFPSaveRestoreCopy;
-  std::optional<int> FramePointerSaveIndex;
-
-  /// If this is set, an SGPR used for save/restore of the register used for the
-  /// base pointer.
-  Register SGPRForBPSaveRestoreCopy;
-  std::optional<int> BasePointerSaveIndex;
-
   bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg);
 
 public:
@@ -538,6 +564,50 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
   const WWMSpillsMap &getWWMSpills() const { return WWMSpills; }
   const ReservedRegSet &getWWMReservedRegs() const { return WWMReservedRegs; }
 
+  const PrologEpilogSGPRSpillsMap &getPrologEpilogSGPRSpills() const {
+    return PrologEpilogSGPRSpills;
+  }
+
+  void addToPrologEpilogSGPRSpills(Register Reg,
+                                   PrologEpilogSGPRSaveRestoreInfo SI) {
+    PrologEpilogSGPRSpills.insert(std::make_pair(Reg, SI));
+  }
+
+  // Check if an entry created for \p Reg in PrologEpilogSGPRSpills. Return true
+  // on success and false otherwise.
+  bool hasPrologEpilogSGPRSpillEntry(Register Reg) const {
+    return PrologEpilogSGPRSpills.find(Reg) != PrologEpilogSGPRSpills.end();
+  }
+
+  // Get the scratch SGPR if allocated to save/restore \p Reg.
+  Register getScratchSGPRCopyDstReg(Register Reg) const {
+    auto I = PrologEpilogSGPRSpills.find(Reg);
+    if (I != PrologEpilogSGPRSpills.end() &&
+        I->second.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR)
+      return I->second.getReg();
+
+    return AMDGPU::NoRegister;
+  }
+
+  // Get all scratch SGPRs allocated to copy/restore the SGPR spills.
+  void getAllScratchSGPRCopyDstRegs(SmallVectorImpl<Register> &Regs) const {
+    for (const auto &SI : PrologEpilogSGPRSpills) {
+      if (SI.second.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR)
+        Regs.push_back(SI.second.getReg());
+    }
+  }
+
+  // Check if \p FI is allocated for any SGPR spill to a VGPR lane during PEI.
+  bool checkIndexInPrologEpilogSGPRSpills(int FI) const {
+    return find_if(PrologEpilogSGPRSpills,
+                   [FI](const std::pair<Register,
+                                        PrologEpilogSGPRSaveRestoreInfo> &SI) {
+                     return SI.second.getKind() ==
+                                SGPRSaveKind::SPILL_TO_VGPR_LANE &&
+                            SI.second.getIndex() == FI;
+                   }) != PrologEpilogSGPRSpills.end();
+  }
+
   ArrayRef<SIRegisterInfo::SpilledReg>
   getPrologEpilogSGPRSpillToVGPRLanes(int FrameIndex) const {
     auto I = PrologEpilogSGPRSpillToVGPRLanes.find(FrameIndex);

diff  --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll
index c761a39f2d2f..3f817fff751f 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll
@@ -159,11 +159,11 @@ define void @func_call_align1024_bp_gets_vgpr_spill(<32 x i32> %a, i32 %b) #0 {
 ; GCN: buffer_store_dword [[VGPR_REG:v[0-9]+]], off, s[0:3], s32 offset:1028 ; 4-byte Folded Spill
 ; GCN-NEXT: buffer_store_dword [[VGPR_REG_1:v[0-9]+]], off, s[0:3], s32 offset:1032 ; 4-byte Folded Spill
 ; GCN-NEXT: s_mov_b64 exec, s[16:17]
+; GCN-DAG: v_writelane_b32 [[VGPR_REG_1]], s34, 1
 ; GCN-NEXT: v_writelane_b32 [[VGPR_REG_1]], s33, 0
 ; GCN-DAG: s_add_i32 [[SCRATCH_REG:s[0-9]+]], s32, 0xffc0
 ; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xffff0000
 ; GCN: v_mov_b32_e32 v32, 0
-; GCN-DAG: v_writelane_b32 [[VGPR_REG_1]], s34, 1
 ; GCN: s_mov_b32 s34, s32
 ; GCN: buffer_store_dword v32, off, s[0:3], s33 offset:1024
 ; GCN-NEXT: s_waitcnt vmcnt(0)
@@ -175,8 +175,8 @@ define void @func_call_align1024_bp_gets_vgpr_spill(<32 x i32> %a, i32 %b) #0 {
 ; GCN: v_readlane_b32 s31, [[VGPR_REG]], 1
 ; GCN: v_readlane_b32 s30, [[VGPR_REG]], 0
 ; GCN: s_add_i32 s32, s32, 0xfffd0000
-; GCN-NEXT: v_readlane_b32 s33, [[VGPR_REG_1]], 0
 ; GCN-NEXT: v_readlane_b32 s34, [[VGPR_REG_1]], 1
+; GCN-NEXT: v_readlane_b32 s33, [[VGPR_REG_1]], 0
 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
 ; GCN-NEXT: buffer_load_dword [[VGPR_REG]], off, s[0:3], s32 offset:1028 ; 4-byte Folded Reload
 ; GCN-NEXT: buffer_load_dword [[VGPR_REG_1]], off, s[0:3], s32 offset:1032 ; 4-byte Folded Reload
@@ -197,8 +197,8 @@ define i32 @needs_align1024_stack_args_used_inside_loop(ptr addrspace(5) nocaptu
 ; The BP value will get saved/restored in an SGPR at the prolgoue/epilogue.
 
 ; GCN-LABEL: needs_align1024_stack_args_used_inside_loop:
-; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
-; GCN-NEXT: s_mov_b32 [[BP_COPY:s[0-9]+]], s34
+; GCN: s_mov_b32 [[BP_COPY:s[0-9]+]], s34
+; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
 ; GCN-NEXT: s_add_i32 s33, s32, 0xffc0
 ; GCN-NEXT: s_mov_b32 s34, s32
 ; GCN-NEXT: s_and_b32 s33, s33, 0xffff0000
@@ -209,8 +209,8 @@ define i32 @needs_align1024_stack_args_used_inside_loop(ptr addrspace(5) nocaptu
 ; GCN: buffer_load_dword v{{[0-9]+}}, [[VGPR_REG]], s[0:3], 0 offen
 ; GCN: v_add_u32_e32 [[VGPR_REG]], vcc, 4, [[VGPR_REG]]
 ; GCN: s_add_i32 s32, s32, 0xfffd0000
-; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
 ; GCN-NEXT: s_mov_b32 s34, [[BP_COPY]]
+; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
 ; GCN-NEXT: s_setpc_b64 s[30:31]
 begin:
   %local_var = alloca i32, align 1024, addrspace(5)
@@ -262,9 +262,9 @@ define void @no_free_regs_spill_bp_to_memory(<32 x i32> %a, i32 %b) #5 {
 
 ; GCN-LABEL: no_free_regs_spill_bp_to_mem
 ; GCN: s_or_saveexec_b64 s[4:5], -1
-; GCN: v_mov_b32_e32 v0, s33
-; GCN: buffer_store_dword v0, off, s[0:3], s32
 ; GCN: v_mov_b32_e32 v0, s34
+; GCN: buffer_store_dword v0, off, s[0:3], s32
+; GCN: v_mov_b32_e32 v0, s33
 ; GCN-DAG: buffer_store_dword v0, off, s[0:3], s32
   %local_val = alloca i32, align 128, addrspace(5)
   store volatile i32 %b, ptr addrspace(5) %local_val, align 128
@@ -298,13 +298,13 @@ define void @spill_bp_to_memory_scratch_reg_needed_mubuf_offset(<32 x i32> %a, i
 ; GCN-NEXT: s_add_i32 s6, s32, 0x42100
 ; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s6 ; 4-byte Folded Spill
 ; GCN-NEXT: s_mov_b64 exec, s[4:5]
-; GCN-NEXT: v_mov_b32_e32 v0, s33
+; GCN-NEXT: v_mov_b32_e32 v0, s34
 ; GCN-NOT: v_mov_b32_e32 v0, 0x1088
-; GCN-NEXT: s_add_i32 s6, s32, 0x42200
+; GCN-NEXT: s_add_i32 s6, s32, 0x42300
 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
-; GCN-NEXT: v_mov_b32_e32 v0, s34
+; GCN-NEXT: v_mov_b32_e32 v0, s33
 ; GCN-NOT: v_mov_b32_e32 v0, 0x108c
-; GCN-NEXT: s_add_i32 s6, s32, 0x42300
+; GCN-NEXT: s_add_i32 s6, s32, 0x42200
 ; GCN-NEXT: s_mov_b32 s34, s32
 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
   %local_val = alloca i32, align 128, addrspace(5)


        


More information about the llvm-commits mailing list