[llvm] [AMDGPU] [SIFrameLowering] Use LiveRegUnits instead of LivePhysRegs (PR #65962)

Pranav Taneja via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 18 22:51:53 PDT 2023


https://github.com/pranavTanejaAMD updated https://github.com/llvm/llvm-project/pull/65962

>From a69dec01c18e5caa71bb13709628fb46efae92c6 Mon Sep 17 00:00:00 2001
From: Pranav <Pranav.Taneja at amd.com>
Date: Thu, 14 Sep 2023 06:44:24 +0000
Subject: [PATCH 1/2] [SIFrameLowering] Replaced LivePhysRegs with
 LiveRegUnits.

---
 llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 187 +++++++++++----------
 llvm/lib/Target/AMDGPU/SIFrameLowering.h   |   4 +-
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp  |  18 +-
 llvm/lib/Target/AMDGPU/SIRegisterInfo.h    |   6 +-
 4 files changed, 108 insertions(+), 107 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 865caae240f3470..279e8bab4b46dcf 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -11,7 +11,7 @@
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/Target/TargetMachine.h"
@@ -26,13 +26,14 @@ static cl::opt<bool> EnableSpillVGPRToAGPR(
   cl::ReallyHidden,
   cl::init(true));
 
-// Find a register matching \p RC from \p LiveRegs which is unused and available
-// throughout the function. On failure, returns AMDGPU::NoRegister.
+// Find a register matching \p RC from \p LiveUnits which is unused and
+// available throughout the function. On failure, returns AMDGPU::NoRegister.
 static MCRegister findUnusedRegister(MachineRegisterInfo &MRI,
-                                     const LivePhysRegs &LiveRegs,
+                                     const LiveRegUnits &LiveUnits,
                                      const TargetRegisterClass &RC) {
   for (MCRegister Reg : RC) {
-    if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
+    if (!MRI.isPhysRegUsed(Reg) && LiveUnits.available(Reg) &&
+        !MRI.isReserved(Reg))
       return Reg;
   }
   return MCRegister();
@@ -42,22 +43,21 @@ static MCRegister findUnusedRegister(MachineRegisterInfo &MRI,
 // callee-save registers since they may appear to be free when this is called
 // from canUseAsPrologue (during shrink wrapping), but then no longer be free
 // when this is called from emitPrologue.
-static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
-                                                   LivePhysRegs &LiveRegs,
-                                                   const TargetRegisterClass &RC,
-                                                   bool Unused = false) {
+static MCRegister findScratchNonCalleeSaveRegister(
+    MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits,
+    const TargetRegisterClass &RC, bool Unused = false) {
   // Mark callee saved registers as used so we will not choose them.
   const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
   for (unsigned i = 0; CSRegs[i]; ++i)
-    LiveRegs.addReg(CSRegs[i]);
+    LiveUnits.addReg(CSRegs[i]);
 
   // We are looking for a register that can be used throughout the entire
   // function, so any use is unacceptable.
   if (Unused)
-    return findUnusedRegister(MRI, LiveRegs, RC);
+    return findUnusedRegister(MRI, LiveUnits, RC);
 
   for (MCRegister Reg : RC) {
-    if (LiveRegs.available(MRI, Reg))
+    if (LiveUnits.available(Reg) && !MRI.isReserved(Reg))
       return Reg;
   }
 
@@ -65,9 +65,9 @@ static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
 }
 
 /// Query target location for spilling SGPRs
-/// \p IncludeScratchCopy : Also look for free scratch SGPRs 
+/// \p IncludeScratchCopy : Also look for free scratch SGPRs
 static void getVGPRSpillLaneOrTempRegister(
-    MachineFunction &MF, LivePhysRegs &LiveRegs, Register SGPR,
+    MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR,
     const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass,
     bool IncludeScratchCopy = true) {
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@@ -81,11 +81,11 @@ static void getVGPRSpillLaneOrTempRegister(
   // We need to save and restore the given SGPR.
 
   Register ScratchSGPR;
-  // 1: Try to save the given register into an unused scratch SGPR. The LiveRegs
-  // should have all the callee saved registers marked as used. For certain
-  // cases we skip copy to scratch SGPR.
+  // 1: Try to save the given register into an unused scratch SGPR. The
+  // LiveUnits should have all the callee saved registers marked as used. For
+  // certain cases we skip copy to scratch SGPR.
   if (IncludeScratchCopy)
-    ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveRegs, RC);
+    ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveUnits, RC);
 
   if (!ScratchSGPR) {
     int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,
@@ -118,7 +118,7 @@ static void getVGPRSpillLaneOrTempRegister(
     MFI->addToPrologEpilogSGPRSpills(
         SGPR, PrologEpilogSGPRSaveRestoreInfo(
                   SGPRSaveKind::COPY_TO_SCRATCH_SGPR, ScratchSGPR));
-    LiveRegs.addReg(ScratchSGPR);
+    LiveUnits.addReg(ScratchSGPR);
     LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to "
                       << printReg(ScratchSGPR, TRI) << '\n');
   }
@@ -129,7 +129,7 @@ static void getVGPRSpillLaneOrTempRegister(
 // use.
 static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
                              const SIMachineFunctionInfo &FuncInfo,
-                             LivePhysRegs &LiveRegs, MachineFunction &MF,
+                             LiveRegUnits &LiveUnits, MachineFunction &MF,
                              MachineBasicBlock &MBB,
                              MachineBasicBlock::iterator I, const DebugLoc &DL,
                              Register SpillReg, int FI, Register FrameReg,
@@ -142,18 +142,18 @@ static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
   MachineMemOperand *MMO = MF.getMachineMemOperand(
       PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
       FrameInfo.getObjectAlign(FI));
-  LiveRegs.addReg(SpillReg);
+  LiveUnits.addReg(SpillReg);
   bool IsKill = !MBB.isLiveIn(SpillReg);
   TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,
-                          DwordOff, MMO, nullptr, &LiveRegs);
+                          DwordOff, MMO, nullptr, &LiveUnits);
   if (IsKill)
-    LiveRegs.removeReg(SpillReg);
+    LiveUnits.removeReg(SpillReg);
 }
 
 static void buildEpilogRestore(const GCNSubtarget &ST,
                                const SIRegisterInfo &TRI,
                                const SIMachineFunctionInfo &FuncInfo,
-                               LivePhysRegs &LiveRegs, MachineFunction &MF,
+                               LiveRegUnits &LiveUnits, MachineFunction &MF,
                                MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator I,
                                const DebugLoc &DL, Register SpillReg, int FI,
@@ -167,7 +167,7 @@ static void buildEpilogRestore(const GCNSubtarget &ST,
       PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
       FrameInfo.getObjectAlign(FI));
   TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg,
-                          DwordOff, MMO, nullptr, &LiveRegs);
+                          DwordOff, MMO, nullptr, &LiveUnits);
 }
 
 static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
@@ -195,18 +195,18 @@ static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     .addReg(GitPtrLo);
 }
 
-static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI,
-                         const SIMachineFunctionInfo *FuncInfo,
-                         MachineFunction &MF, MachineBasicBlock &MBB,
-                         MachineBasicBlock::iterator MBBI, bool IsProlog) {
-  if (LiveRegs.empty()) {
-    LiveRegs.init(TRI);
+static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI,
+                          const SIMachineFunctionInfo *FuncInfo,
+                          MachineFunction &MF, MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MBBI, bool IsProlog) {
+  if (LiveUnits.empty()) {
+    LiveUnits.init(TRI);
     if (IsProlog) {
-      LiveRegs.addLiveIns(MBB);
+      LiveUnits.addLiveIns(MBB);
     } else {
       // In epilog.
-      LiveRegs.addLiveOuts(MBB);
-      LiveRegs.stepBackward(*MBBI);
+      LiveUnits.addLiveOuts(MBB);
+      LiveUnits.stepBackward(*MBBI);
     }
   }
 }
@@ -228,7 +228,7 @@ class PrologEpilogSGPRSpillBuilder {
   const SIRegisterInfo &TRI;
   Register SuperReg;
   const PrologEpilogSGPRSaveRestoreInfo SI;
-  LivePhysRegs &LiveRegs;
+  LiveRegUnits &LiveUnits;
   const DebugLoc &DL;
   Register FrameReg;
   ArrayRef<int16_t> SplitParts;
@@ -239,10 +239,10 @@ class PrologEpilogSGPRSpillBuilder {
     MachineRegisterInfo &MRI = MF.getRegInfo();
     assert(!MFI.isDeadObjectIndex(FI));
 
-    initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);
+    initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);
 
     MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
-        MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
+        MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
     if (!TmpVGPR)
       report_fatal_error("failed to find free scratch register");
 
@@ -253,7 +253,7 @@ class PrologEpilogSGPRSpillBuilder {
       BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
           .addReg(SubReg);
 
-      buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MI, DL, TmpVGPR,
+      buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR,
                        FI, FrameReg, DwordOff);
       DwordOff += 4;
     }
@@ -287,9 +287,9 @@ class PrologEpilogSGPRSpillBuilder {
   void restoreFromMemory(const int FI) {
     MachineRegisterInfo &MRI = MF.getRegInfo();
 
-    initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);
+    initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);
     MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
-        MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
+        MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
     if (!TmpVGPR)
       report_fatal_error("failed to find free scratch register");
 
@@ -298,8 +298,8 @@ class PrologEpilogSGPRSpillBuilder {
                             ? SuperReg
                             : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
 
-      buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MI, DL, TmpVGPR,
-                         FI, FrameReg, DwordOff);
+      buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL,
+                         TmpVGPR, FI, FrameReg, DwordOff);
       BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
           .addReg(TmpVGPR, RegState::Kill);
       DwordOff += 4;
@@ -335,11 +335,12 @@ class PrologEpilogSGPRSpillBuilder {
                                MachineBasicBlock::iterator MI,
                                const DebugLoc &DL, const SIInstrInfo *TII,
                                const SIRegisterInfo &TRI,
-                               LivePhysRegs &LiveRegs, Register FrameReg)
+                               LiveRegUnits &LiveUnits, Register FrameReg)
       : MI(MI), MBB(MBB), MF(*MBB.getParent()),
         ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),
         FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
-        SuperReg(Reg), SI(SI), LiveRegs(LiveRegs), DL(DL), FrameReg(FrameReg) {
+        SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL),
+        FrameReg(FrameReg) {
     const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
     SplitParts = TRI.getRegSplitParts(RC, EltSize);
     NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
@@ -396,9 +397,9 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
 
   if (ST.isAmdPalOS()) {
     // Extract the scratch offset from the descriptor in the GIT
-    LivePhysRegs LiveRegs;
-    LiveRegs.init(*TRI);
-    LiveRegs.addLiveIns(MBB);
+    LiveRegUnits LiveUnits;
+    LiveUnits.init(*TRI);
+    LiveUnits.addLiveIns(MBB);
 
     // Find unused reg to load flat scratch init into
     MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -409,8 +410,8 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
         std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
     Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
     for (MCPhysReg Reg : AllSGPR64s) {
-      if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) &&
-          !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
+      if (LiveUnits.available(Reg) && !MRI.isReserved(Reg) &&
+          MRI.isAllocatable(Reg) && !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
         FlatScrInit = Reg;
         break;
       }
@@ -873,7 +874,7 @@ bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
 
 // Activate only the inactive lanes when \p EnableInactiveLanes is true.
 // Otherwise, activate all lanes. It returns the saved exec.
-static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
+static Register buildScratchExecCopy(LiveRegUnits &LiveUnits,
                                      MachineFunction &MF,
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator MBBI,
@@ -886,14 +887,14 @@ static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
   const SIRegisterInfo &TRI = TII->getRegisterInfo();
   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
 
-  initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
+  initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
 
   ScratchExecCopy = findScratchNonCalleeSaveRegister(
-      MRI, LiveRegs, *TRI.getWaveMaskRegClass());
+      MRI, LiveUnits, *TRI.getWaveMaskRegClass());
   if (!ScratchExecCopy)
     report_fatal_error("failed to find free scratch register");
 
-  LiveRegs.addReg(ScratchExecCopy);
+  LiveUnits.addReg(ScratchExecCopy);
 
   const unsigned SaveExecOpc =
       ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32
@@ -909,7 +910,7 @@ static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
 
 void SIFrameLowering::emitCSRSpillStores(
     MachineFunction &MF, MachineBasicBlock &MBB,
-    MachineBasicBlock::iterator MBBI, DebugLoc &DL, LivePhysRegs &LiveRegs,
+    MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,
     Register FrameReg, Register FramePtrRegScratchCopy) const {
   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -924,7 +925,7 @@ void SIFrameLowering::emitCSRSpillStores(
   FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
   if (!WWMScratchRegs.empty())
     ScratchExecCopy =
-        buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
+        buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
                              /*IsProlog*/ true, /*EnableInactiveLanes*/ true);
 
   auto StoreWWMRegisters =
@@ -932,7 +933,7 @@ void SIFrameLowering::emitCSRSpillStores(
         for (const auto &Reg : WWMRegs) {
           Register VGPR = Reg.first;
           int FI = Reg.second;
-          buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
+          buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
                            VGPR, FI, FrameReg);
         }
       };
@@ -943,7 +944,7 @@ void SIFrameLowering::emitCSRSpillStores(
       unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
       BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
     } else {
-      ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
+      ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
                                              /*IsProlog*/ true,
                                              /*EnableInactiveLanes*/ false);
     }
@@ -955,7 +956,7 @@ void SIFrameLowering::emitCSRSpillStores(
     unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
     BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())
         .addReg(ScratchExecCopy, RegState::Kill);
-    LiveRegs.addReg(ScratchExecCopy);
+    LiveUnits.addReg(ScratchExecCopy);
   }
 
   Register FramePtrReg = FuncInfo->getFrameOffsetReg();
@@ -971,7 +972,7 @@ void SIFrameLowering::emitCSRSpillStores(
       continue;
 
     PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
-                                    LiveRegs, FrameReg);
+                                    LiveUnits, FrameReg);
     SB.save();
   }
 
@@ -986,16 +987,16 @@ void SIFrameLowering::emitCSRSpillStores(
 
       MBB.sortUniqueLiveIns();
     }
-    if (!LiveRegs.empty()) {
+    if (!LiveUnits.empty()) {
       for (MCPhysReg Reg : ScratchSGPRs)
-        LiveRegs.addReg(Reg);
+        LiveUnits.addReg(Reg);
     }
   }
 }
 
 void SIFrameLowering::emitCSRSpillRestores(
     MachineFunction &MF, MachineBasicBlock &MBB,
-    MachineBasicBlock::iterator MBBI, DebugLoc &DL, LivePhysRegs &LiveRegs,
+    MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,
     Register FrameReg, Register FramePtrRegScratchCopy) const {
   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -1015,7 +1016,7 @@ void SIFrameLowering::emitCSRSpillRestores(
       continue;
 
     PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
-                                    LiveRegs, FrameReg);
+                                    LiveUnits, FrameReg);
     SB.restore();
   }
 
@@ -1027,7 +1028,7 @@ void SIFrameLowering::emitCSRSpillRestores(
   FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
   if (!WWMScratchRegs.empty())
     ScratchExecCopy =
-        buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
+        buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
                              /*IsProlog*/ false, /*EnableInactiveLanes*/ true);
 
   auto RestoreWWMRegisters =
@@ -1035,7 +1036,7 @@ void SIFrameLowering::emitCSRSpillRestores(
         for (const auto &Reg : WWMRegs) {
           Register VGPR = Reg.first;
           int FI = Reg.second;
-          buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
+          buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
                              VGPR, FI, FrameReg);
         }
       };
@@ -1046,7 +1047,7 @@ void SIFrameLowering::emitCSRSpillRestores(
       unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
       BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
     } else {
-      ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
+      ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
                                              /*IsProlog*/ false,
                                              /*EnableInactiveLanes*/ false);
     }
@@ -1079,7 +1080,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
   Register FramePtrReg = FuncInfo->getFrameOffsetReg();
   Register BasePtrReg =
       TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
-  LivePhysRegs LiveRegs;
+  LiveRegUnits LiveUnits;
 
   MachineBasicBlock::iterator MBBI = MBB.begin();
   // DebugLoc must be unknown since the first instruction with DebugLoc is used
@@ -1097,14 +1098,14 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
   Register FramePtrRegScratchCopy;
   if (!HasFP && !hasFP(MF)) {
     // Emit the CSR spill stores with SP base register.
-    emitCSRSpillStores(MF, MBB, MBBI, DL, LiveRegs, StackPtrReg,
+    emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, StackPtrReg,
                        FramePtrRegScratchCopy);
   } else {
     // CSR spill stores will use FP as base register.
     Register SGPRForFPSaveRestoreCopy =
         FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
 
-    initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
+    initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
     if (SGPRForFPSaveRestoreCopy) {
       // Copy FP to the scratch register now and emit the CFI entry. It avoids
       // the extra FP copy needed in the other two cases when FP is spilled to
@@ -1112,18 +1113,18 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
       PrologEpilogSGPRSpillBuilder SB(
           FramePtrReg,
           FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI,
-          DL, TII, TRI, LiveRegs, FramePtrReg);
+          DL, TII, TRI, LiveUnits, FramePtrReg);
       SB.save();
-      LiveRegs.addReg(SGPRForFPSaveRestoreCopy);
+      LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
     } else {
       // Copy FP into a new scratch register so that its previous value can be
       // spilled after setting up the new frame.
       FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
-          MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass);
+          MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
       if (!FramePtrRegScratchCopy)
         report_fatal_error("failed to find free scratch register");
 
-      LiveRegs.addReg(FramePtrRegScratchCopy);
+      LiveUnits.addReg(FramePtrRegScratchCopy);
       BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrRegScratchCopy)
           .addReg(FramePtrReg);
     }
@@ -1133,9 +1134,9 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
     const unsigned Alignment = MFI.getMaxAlign().value();
 
     RoundedSize += Alignment;
-    if (LiveRegs.empty()) {
-      LiveRegs.init(TRI);
-      LiveRegs.addLiveIns(MBB);
+    if (LiveUnits.empty()) {
+      LiveUnits.init(TRI);
+      LiveUnits.addLiveIns(MBB);
     }
 
     // s_add_i32 s33, s32, NumBytes
@@ -1158,10 +1159,10 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
 
   // If FP is used, emit the CSR spills with FP base register.
   if (HasFP) {
-    emitCSRSpillStores(MF, MBB, MBBI, DL, LiveRegs, FramePtrReg,
+    emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
                        FramePtrRegScratchCopy);
     if (FramePtrRegScratchCopy)
-      LiveRegs.removeReg(FramePtrRegScratchCopy);
+      LiveUnits.removeReg(FramePtrRegScratchCopy);
   }
 
   // If we need a base pointer, set it up here. It's whatever the value of
@@ -1210,7 +1211,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
   const SIInstrInfo *TII = ST.getInstrInfo();
   const SIRegisterInfo &TRI = TII->getRegisterInfo();
   MachineRegisterInfo &MRI = MF.getRegInfo();
-  LivePhysRegs LiveRegs;
+  LiveRegUnits LiveUnits;
   // Get the insert location for the epilogue. If there were no terminators in
   // the block, get the last instruction.
   MachineBasicBlock::iterator MBBI = MBB.end();
@@ -1240,19 +1241,19 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
     // SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP
     // into a new scratch register and copy to FP later when other registers are
     // restored from the current stack frame.
-    initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
+    initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
     if (SGPRForFPSaveRestoreCopy) {
-      LiveRegs.addReg(SGPRForFPSaveRestoreCopy);
+      LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
     } else {
       FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
-          MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass);
+          MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
       if (!FramePtrRegScratchCopy)
         report_fatal_error("failed to find free scratch register");
 
-      LiveRegs.addReg(FramePtrRegScratchCopy);
+      LiveUnits.addReg(FramePtrRegScratchCopy);
     }
 
-    emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveRegs, FramePtrReg,
+    emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
                          FramePtrRegScratchCopy);
   }
 
@@ -1275,7 +1276,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
       MIB.setMIFlag(MachineInstr::FrameDestroy);
   } else {
     // Insert the CSR spill restores with SP as the base register.
-    emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveRegs, StackPtrReg,
+    emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, StackPtrReg,
                          FramePtrRegScratchCopy);
   }
 }
@@ -1472,30 +1473,30 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIRegisterInfo *TRI = ST.getRegisterInfo();
-  LivePhysRegs LiveRegs;
-  LiveRegs.init(*TRI);
+  LiveRegUnits LiveUnits;
+  LiveUnits.init(*TRI);
   // Initially mark callee saved registers as used so we will not choose them
   // while looking for scratch SGPRs.
   const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
   for (unsigned I = 0; CSRegs[I]; ++I)
-    LiveRegs.addReg(CSRegs[I]);
+    LiveUnits.addReg(CSRegs[I]);
 
   const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass();
 
   if (NeedExecCopyReservedReg) {
     Register ReservedReg = MFI->getSGPRForEXECCopy();
     assert(ReservedReg && "Should have reserved an SGPR for EXEC copy.");
-    Register UnusedScratchReg = findUnusedRegister(MRI, LiveRegs, RC);
+    Register UnusedScratchReg = findUnusedRegister(MRI, LiveUnits, RC);
     if (UnusedScratchReg) {
       // If found any unused scratch SGPR, reserve the register itself for Exec
       // copy and there is no need for any spills in that case.
       MFI->setSGPRForEXECCopy(UnusedScratchReg);
-      LiveRegs.addReg(UnusedScratchReg);
+      LiveUnits.addReg(UnusedScratchReg);
     } else {
       // Needs spill.
       assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedReg) &&
              "Re-reserving spill slot for EXEC copy register");
-      getVGPRSpillLaneOrTempRegister(MF, LiveRegs, ReservedReg, RC,
+      getVGPRSpillLaneOrTempRegister(MF, LiveUnits, ReservedReg, RC,
                                      /*IncludeScratchCopy=*/false);
     }
   }
@@ -1516,14 +1517,14 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
     Register FramePtrReg = MFI->getFrameOffsetReg();
     assert(!MFI->hasPrologEpilogSGPRSpillEntry(FramePtrReg) &&
            "Re-reserving spill slot for FP");
-    getVGPRSpillLaneOrTempRegister(MF, LiveRegs, FramePtrReg);
+    getVGPRSpillLaneOrTempRegister(MF, LiveUnits, FramePtrReg);
   }
 
   if (TRI->hasBasePointer(MF)) {
     Register BasePtrReg = TRI->getBaseRegister();
     assert(!MFI->hasPrologEpilogSGPRSpillEntry(BasePtrReg) &&
            "Re-reserving spill slot for BP");
-    getVGPRSpillLaneOrTempRegister(MF, LiveRegs, BasePtrReg);
+    getVGPRSpillLaneOrTempRegister(MF, LiveUnits, BasePtrReg);
   }
 }
 
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
index 0060fc0be431b18..b3feb759ed811fb 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
@@ -38,11 +38,11 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
                                       bool NeedExecCopyReservedReg) const;
   void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MBBI, DebugLoc &DL,
-                          LivePhysRegs &LiveRegs, Register FrameReg,
+                          LiveRegUnits &LiveUnits, Register FrameReg,
                           Register FramePtrRegScratchCopy) const;
   void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB,
                             MachineBasicBlock::iterator MBBI, DebugLoc &DL,
-                            LivePhysRegs &LiveRegs, Register FrameReg,
+                            LiveRegUnits &LiveUnits, Register FrameReg,
                             Register FramePtrRegScratchCopy) const;
   bool
   assignCalleeSavedSpillSlots(MachineFunction &MF,
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index b69bea225ac2efb..e065b104db4eb33 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -19,7 +19,7 @@
 #include "SIMachineFunctionInfo.h"
 #include "SIRegisterInfo.h"
 #include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
@@ -1309,8 +1309,8 @@ void SIRegisterInfo::buildSpillLoadStore(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL,
     unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,
     MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO,
-    RegScavenger *RS, LivePhysRegs *LiveRegs) const {
-  assert((!RS || !LiveRegs) && "Only RS or LiveRegs can be set but not both");
+    RegScavenger *RS, LiveRegUnits *LiveUnits) const {
+  assert((!RS || !LiveUnits) && "Only RS or LiveUnits can be set but not both");
 
   MachineFunction *MF = MBB.getParent();
   const SIInstrInfo *TII = ST.getInstrInfo();
@@ -1398,7 +1398,7 @@ void SIRegisterInfo::buildSpillLoadStore(
     SOffset = MCRegister();
 
     // We don't have access to the register scavenger if this function is called
-    // during  PEI::scavengeFrameVirtualRegs() so use LiveRegs in this case.
+    // during  PEI::scavengeFrameVirtualRegs() so use LiveUnits in this case.
     // TODO: Clobbering SCC is not necessary for scratch instructions in the
     // entry.
     if (RS) {
@@ -1406,10 +1406,10 @@ void SIRegisterInfo::buildSpillLoadStore(
 
       // Piggy back on the liveness scan we just did see if SCC is dead.
       CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC);
-    } else if (LiveRegs) {
-      CanClobberSCC = !LiveRegs->contains(AMDGPU::SCC);
+    } else if (LiveUnits) {
+      CanClobberSCC = LiveUnits->available(AMDGPU::SCC);
       for (MCRegister Reg : AMDGPU::SGPR_32RegClass) {
-        if (LiveRegs->available(MF->getRegInfo(), Reg)) {
+        if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) {
           SOffset = Reg;
           break;
         }
@@ -1425,9 +1425,9 @@ void SIRegisterInfo::buildSpillLoadStore(
       if (RS) {
         TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);
       } else {
-        assert(LiveRegs);
+        assert(LiveUnits);
         for (MCRegister Reg : AMDGPU::VGPR_32RegClass) {
-          if (LiveRegs->available(MF->getRegInfo(), Reg)) {
+          if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) {
             TmpOffsetVGPR = Reg;
             break;
           }
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 57cdcb29fac4807..e45a0fae5d6c454 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -23,7 +23,7 @@ namespace llvm {
 
 class GCNSubtarget;
 class LiveIntervals;
-class LivePhysRegs;
+class LiveRegUnits;
 class RegisterBank;
 struct SGPRSpillBuilder;
 
@@ -415,14 +415,14 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
   // Insert spill or restore instructions.
   // When lowering spill pseudos, the RegScavenger should be set.
   // For creating spill instructions during frame lowering, where no scavenger
-  // is available, LiveRegs can be used.
+  // is available, LiveUnits can be used.
   void buildSpillLoadStore(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MI, const DebugLoc &DL,
                            unsigned LoadStoreOp, int Index, Register ValueReg,
                            bool ValueIsKill, MCRegister ScratchOffsetReg,
                            int64_t InstrOffset, MachineMemOperand *MMO,
                            RegScavenger *RS,
-                           LivePhysRegs *LiveRegs = nullptr) const;
+                           LiveRegUnits *LiveUnits = nullptr) const;
 
   // Return alignment in register file of first register in a register tuple.
   unsigned getRegClassAlignmentNumBits(const TargetRegisterClass *RC) const {

>From 8ea9e74682e590026860995c658fe4c4b3721ad3 Mon Sep 17 00:00:00 2001
From: Pranav <Pranav.Taneja at amd.com>
Date: Tue, 19 Sep 2023 05:45:36 +0000
Subject: [PATCH 2/2] [SIFrameLowering] Added TODO to findUnusedRegister

We need an 'isRegUnitUsed' function similar to
'isPhysRegUsed' in order to implement the loop
in 'findUnusedRegister' in terms of MCRegUnits.
---
 llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 279e8bab4b46dcf..8d2a36791970dab 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -28,6 +28,8 @@ static cl::opt<bool> EnableSpillVGPRToAGPR(
 
 // Find a register matching \p RC from \p LiveUnits which is unused and
 // available throughout the function. On failure, returns AMDGPU::NoRegister.
+// TODO: Rewrite the loop here to iterate over MCRegUnits instead of MCRegister.
+// This will reduce the number of iterations and redundant checking.
 static MCRegister findUnusedRegister(MachineRegisterInfo &MRI,
                                      const LiveRegUnits &LiveUnits,
                                      const TargetRegisterClass &RC) {



More information about the llvm-commits mailing list