[llvm] [AMDGPU] [SIFrameLowering] Use LiveRegUnits instead of LivePhysRegs (PR #65962)
Pranav Taneja via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 18 22:51:53 PDT 2023
https://github.com/pranavTanejaAMD updated https://github.com/llvm/llvm-project/pull/65962
>From a69dec01c18e5caa71bb13709628fb46efae92c6 Mon Sep 17 00:00:00 2001
From: Pranav <Pranav.Taneja at amd.com>
Date: Thu, 14 Sep 2023 06:44:24 +0000
Subject: [PATCH 1/2] [SIFrameLowering] Replaced LivePhysRegs with
LiveRegUnits.
---
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 187 +++++++++++----------
llvm/lib/Target/AMDGPU/SIFrameLowering.h | 4 +-
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 18 +-
llvm/lib/Target/AMDGPU/SIRegisterInfo.h | 6 +-
4 files changed, 108 insertions(+), 107 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 865caae240f3470..279e8bab4b46dcf 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -11,7 +11,7 @@
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetMachine.h"
@@ -26,13 +26,14 @@ static cl::opt<bool> EnableSpillVGPRToAGPR(
cl::ReallyHidden,
cl::init(true));
-// Find a register matching \p RC from \p LiveRegs which is unused and available
-// throughout the function. On failure, returns AMDGPU::NoRegister.
+// Find a register matching \p RC from \p LiveUnits which is unused and
+// available throughout the function. On failure, returns AMDGPU::NoRegister.
static MCRegister findUnusedRegister(MachineRegisterInfo &MRI,
- const LivePhysRegs &LiveRegs,
+ const LiveRegUnits &LiveUnits,
const TargetRegisterClass &RC) {
for (MCRegister Reg : RC) {
- if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
+ if (!MRI.isPhysRegUsed(Reg) && LiveUnits.available(Reg) &&
+ !MRI.isReserved(Reg))
return Reg;
}
return MCRegister();
@@ -42,22 +43,21 @@ static MCRegister findUnusedRegister(MachineRegisterInfo &MRI,
// callee-save registers since they may appear to be free when this is called
// from canUseAsPrologue (during shrink wrapping), but then no longer be free
// when this is called from emitPrologue.
-static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
- LivePhysRegs &LiveRegs,
- const TargetRegisterClass &RC,
- bool Unused = false) {
+static MCRegister findScratchNonCalleeSaveRegister(
+ MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits,
+ const TargetRegisterClass &RC, bool Unused = false) {
// Mark callee saved registers as used so we will not choose them.
const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
for (unsigned i = 0; CSRegs[i]; ++i)
- LiveRegs.addReg(CSRegs[i]);
+ LiveUnits.addReg(CSRegs[i]);
// We are looking for a register that can be used throughout the entire
// function, so any use is unacceptable.
if (Unused)
- return findUnusedRegister(MRI, LiveRegs, RC);
+ return findUnusedRegister(MRI, LiveUnits, RC);
for (MCRegister Reg : RC) {
- if (LiveRegs.available(MRI, Reg))
+ if (LiveUnits.available(Reg) && !MRI.isReserved(Reg))
return Reg;
}
@@ -65,9 +65,9 @@ static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
}
/// Query target location for spilling SGPRs
-/// \p IncludeScratchCopy : Also look for free scratch SGPRs
+/// \p IncludeScratchCopy : Also look for free scratch SGPRs
static void getVGPRSpillLaneOrTempRegister(
- MachineFunction &MF, LivePhysRegs &LiveRegs, Register SGPR,
+ MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR,
const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass,
bool IncludeScratchCopy = true) {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@@ -81,11 +81,11 @@ static void getVGPRSpillLaneOrTempRegister(
// We need to save and restore the given SGPR.
Register ScratchSGPR;
- // 1: Try to save the given register into an unused scratch SGPR. The LiveRegs
- // should have all the callee saved registers marked as used. For certain
- // cases we skip copy to scratch SGPR.
+ // 1: Try to save the given register into an unused scratch SGPR. The
+ // LiveUnits should have all the callee saved registers marked as used. For
+ // certain cases we skip copy to scratch SGPR.
if (IncludeScratchCopy)
- ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveRegs, RC);
+ ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveUnits, RC);
if (!ScratchSGPR) {
int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,
@@ -118,7 +118,7 @@ static void getVGPRSpillLaneOrTempRegister(
MFI->addToPrologEpilogSGPRSpills(
SGPR, PrologEpilogSGPRSaveRestoreInfo(
SGPRSaveKind::COPY_TO_SCRATCH_SGPR, ScratchSGPR));
- LiveRegs.addReg(ScratchSGPR);
+ LiveUnits.addReg(ScratchSGPR);
LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to "
<< printReg(ScratchSGPR, TRI) << '\n');
}
@@ -129,7 +129,7 @@ static void getVGPRSpillLaneOrTempRegister(
// use.
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
const SIMachineFunctionInfo &FuncInfo,
- LivePhysRegs &LiveRegs, MachineFunction &MF,
+ LiveRegUnits &LiveUnits, MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, const DebugLoc &DL,
Register SpillReg, int FI, Register FrameReg,
@@ -142,18 +142,18 @@ static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
FrameInfo.getObjectAlign(FI));
- LiveRegs.addReg(SpillReg);
+ LiveUnits.addReg(SpillReg);
bool IsKill = !MBB.isLiveIn(SpillReg);
TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,
- DwordOff, MMO, nullptr, &LiveRegs);
+ DwordOff, MMO, nullptr, &LiveUnits);
if (IsKill)
- LiveRegs.removeReg(SpillReg);
+ LiveUnits.removeReg(SpillReg);
}
static void buildEpilogRestore(const GCNSubtarget &ST,
const SIRegisterInfo &TRI,
const SIMachineFunctionInfo &FuncInfo,
- LivePhysRegs &LiveRegs, MachineFunction &MF,
+ LiveRegUnits &LiveUnits, MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, Register SpillReg, int FI,
@@ -167,7 +167,7 @@ static void buildEpilogRestore(const GCNSubtarget &ST,
PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
FrameInfo.getObjectAlign(FI));
TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg,
- DwordOff, MMO, nullptr, &LiveRegs);
+ DwordOff, MMO, nullptr, &LiveUnits);
}
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
@@ -195,18 +195,18 @@ static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addReg(GitPtrLo);
}
-static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI,
- const SIMachineFunctionInfo *FuncInfo,
- MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, bool IsProlog) {
- if (LiveRegs.empty()) {
- LiveRegs.init(TRI);
+static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI,
+ const SIMachineFunctionInfo *FuncInfo,
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, bool IsProlog) {
+ if (LiveUnits.empty()) {
+ LiveUnits.init(TRI);
if (IsProlog) {
- LiveRegs.addLiveIns(MBB);
+ LiveUnits.addLiveIns(MBB);
} else {
// In epilog.
- LiveRegs.addLiveOuts(MBB);
- LiveRegs.stepBackward(*MBBI);
+ LiveUnits.addLiveOuts(MBB);
+ LiveUnits.stepBackward(*MBBI);
}
}
}
@@ -228,7 +228,7 @@ class PrologEpilogSGPRSpillBuilder {
const SIRegisterInfo &TRI;
Register SuperReg;
const PrologEpilogSGPRSaveRestoreInfo SI;
- LivePhysRegs &LiveRegs;
+ LiveRegUnits &LiveUnits;
const DebugLoc &DL;
Register FrameReg;
ArrayRef<int16_t> SplitParts;
@@ -239,10 +239,10 @@ class PrologEpilogSGPRSpillBuilder {
MachineRegisterInfo &MRI = MF.getRegInfo();
assert(!MFI.isDeadObjectIndex(FI));
- initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);
+ initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);
MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
- MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
+ MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
if (!TmpVGPR)
report_fatal_error("failed to find free scratch register");
@@ -253,7 +253,7 @@ class PrologEpilogSGPRSpillBuilder {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
.addReg(SubReg);
- buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MI, DL, TmpVGPR,
+ buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR,
FI, FrameReg, DwordOff);
DwordOff += 4;
}
@@ -287,9 +287,9 @@ class PrologEpilogSGPRSpillBuilder {
void restoreFromMemory(const int FI) {
MachineRegisterInfo &MRI = MF.getRegInfo();
- initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);
+ initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);
MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
- MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
+ MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
if (!TmpVGPR)
report_fatal_error("failed to find free scratch register");
@@ -298,8 +298,8 @@ class PrologEpilogSGPRSpillBuilder {
? SuperReg
: Register(TRI.getSubReg(SuperReg, SplitParts[I]));
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MI, DL, TmpVGPR,
- FI, FrameReg, DwordOff);
+ buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL,
+ TmpVGPR, FI, FrameReg, DwordOff);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
.addReg(TmpVGPR, RegState::Kill);
DwordOff += 4;
@@ -335,11 +335,12 @@ class PrologEpilogSGPRSpillBuilder {
MachineBasicBlock::iterator MI,
const DebugLoc &DL, const SIInstrInfo *TII,
const SIRegisterInfo &TRI,
- LivePhysRegs &LiveRegs, Register FrameReg)
+ LiveRegUnits &LiveUnits, Register FrameReg)
: MI(MI), MBB(MBB), MF(*MBB.getParent()),
ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),
FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
- SuperReg(Reg), SI(SI), LiveRegs(LiveRegs), DL(DL), FrameReg(FrameReg) {
+ SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL),
+ FrameReg(FrameReg) {
const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
SplitParts = TRI.getRegSplitParts(RC, EltSize);
NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
@@ -396,9 +397,9 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
if (ST.isAmdPalOS()) {
// Extract the scratch offset from the descriptor in the GIT
- LivePhysRegs LiveRegs;
- LiveRegs.init(*TRI);
- LiveRegs.addLiveIns(MBB);
+ LiveRegUnits LiveUnits;
+ LiveUnits.init(*TRI);
+ LiveUnits.addLiveIns(MBB);
// Find unused reg to load flat scratch init into
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -409,8 +410,8 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
for (MCPhysReg Reg : AllSGPR64s) {
- if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) &&
- !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
+ if (LiveUnits.available(Reg) && !MRI.isReserved(Reg) &&
+ MRI.isAllocatable(Reg) && !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
FlatScrInit = Reg;
break;
}
@@ -873,7 +874,7 @@ bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
// Activate only the inactive lanes when \p EnableInactiveLanes is true.
// Otherwise, activate all lanes. It returns the saved exec.
-static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
+static Register buildScratchExecCopy(LiveRegUnits &LiveUnits,
MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -886,14 +887,14 @@ static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
const SIRegisterInfo &TRI = TII->getRegisterInfo();
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
- initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
+ initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
ScratchExecCopy = findScratchNonCalleeSaveRegister(
- MRI, LiveRegs, *TRI.getWaveMaskRegClass());
+ MRI, LiveUnits, *TRI.getWaveMaskRegClass());
if (!ScratchExecCopy)
report_fatal_error("failed to find free scratch register");
- LiveRegs.addReg(ScratchExecCopy);
+ LiveUnits.addReg(ScratchExecCopy);
const unsigned SaveExecOpc =
ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32
@@ -909,7 +910,7 @@ static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
void SIFrameLowering::emitCSRSpillStores(
MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, DebugLoc &DL, LivePhysRegs &LiveRegs,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,
Register FrameReg, Register FramePtrRegScratchCopy) const {
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -924,7 +925,7 @@ void SIFrameLowering::emitCSRSpillStores(
FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
if (!WWMScratchRegs.empty())
ScratchExecCopy =
- buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
+ buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
/*IsProlog*/ true, /*EnableInactiveLanes*/ true);
auto StoreWWMRegisters =
@@ -932,7 +933,7 @@ void SIFrameLowering::emitCSRSpillStores(
for (const auto &Reg : WWMRegs) {
Register VGPR = Reg.first;
int FI = Reg.second;
- buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
+ buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
VGPR, FI, FrameReg);
}
};
@@ -943,7 +944,7 @@ void SIFrameLowering::emitCSRSpillStores(
unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
} else {
- ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
+ ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
/*IsProlog*/ true,
/*EnableInactiveLanes*/ false);
}
@@ -955,7 +956,7 @@ void SIFrameLowering::emitCSRSpillStores(
unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())
.addReg(ScratchExecCopy, RegState::Kill);
- LiveRegs.addReg(ScratchExecCopy);
+ LiveUnits.addReg(ScratchExecCopy);
}
Register FramePtrReg = FuncInfo->getFrameOffsetReg();
@@ -971,7 +972,7 @@ void SIFrameLowering::emitCSRSpillStores(
continue;
PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
- LiveRegs, FrameReg);
+ LiveUnits, FrameReg);
SB.save();
}
@@ -986,16 +987,16 @@ void SIFrameLowering::emitCSRSpillStores(
MBB.sortUniqueLiveIns();
}
- if (!LiveRegs.empty()) {
+ if (!LiveUnits.empty()) {
for (MCPhysReg Reg : ScratchSGPRs)
- LiveRegs.addReg(Reg);
+ LiveUnits.addReg(Reg);
}
}
}
void SIFrameLowering::emitCSRSpillRestores(
MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, DebugLoc &DL, LivePhysRegs &LiveRegs,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,
Register FrameReg, Register FramePtrRegScratchCopy) const {
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -1015,7 +1016,7 @@ void SIFrameLowering::emitCSRSpillRestores(
continue;
PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
- LiveRegs, FrameReg);
+ LiveUnits, FrameReg);
SB.restore();
}
@@ -1027,7 +1028,7 @@ void SIFrameLowering::emitCSRSpillRestores(
FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
if (!WWMScratchRegs.empty())
ScratchExecCopy =
- buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
+ buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
/*IsProlog*/ false, /*EnableInactiveLanes*/ true);
auto RestoreWWMRegisters =
@@ -1035,7 +1036,7 @@ void SIFrameLowering::emitCSRSpillRestores(
for (const auto &Reg : WWMRegs) {
Register VGPR = Reg.first;
int FI = Reg.second;
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
+ buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
VGPR, FI, FrameReg);
}
};
@@ -1046,7 +1047,7 @@ void SIFrameLowering::emitCSRSpillRestores(
unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
} else {
- ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
+ ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
/*IsProlog*/ false,
/*EnableInactiveLanes*/ false);
}
@@ -1079,7 +1080,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
Register FramePtrReg = FuncInfo->getFrameOffsetReg();
Register BasePtrReg =
TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
- LivePhysRegs LiveRegs;
+ LiveRegUnits LiveUnits;
MachineBasicBlock::iterator MBBI = MBB.begin();
// DebugLoc must be unknown since the first instruction with DebugLoc is used
@@ -1097,14 +1098,14 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
Register FramePtrRegScratchCopy;
if (!HasFP && !hasFP(MF)) {
// Emit the CSR spill stores with SP base register.
- emitCSRSpillStores(MF, MBB, MBBI, DL, LiveRegs, StackPtrReg,
+ emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, StackPtrReg,
FramePtrRegScratchCopy);
} else {
// CSR spill stores will use FP as base register.
Register SGPRForFPSaveRestoreCopy =
FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
- initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
+ initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
if (SGPRForFPSaveRestoreCopy) {
// Copy FP to the scratch register now and emit the CFI entry. It avoids
// the extra FP copy needed in the other two cases when FP is spilled to
@@ -1112,18 +1113,18 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
PrologEpilogSGPRSpillBuilder SB(
FramePtrReg,
FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI,
- DL, TII, TRI, LiveRegs, FramePtrReg);
+ DL, TII, TRI, LiveUnits, FramePtrReg);
SB.save();
- LiveRegs.addReg(SGPRForFPSaveRestoreCopy);
+ LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
} else {
// Copy FP into a new scratch register so that its previous value can be
// spilled after setting up the new frame.
FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
- MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass);
+ MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
if (!FramePtrRegScratchCopy)
report_fatal_error("failed to find free scratch register");
- LiveRegs.addReg(FramePtrRegScratchCopy);
+ LiveUnits.addReg(FramePtrRegScratchCopy);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrRegScratchCopy)
.addReg(FramePtrReg);
}
@@ -1133,9 +1134,9 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
const unsigned Alignment = MFI.getMaxAlign().value();
RoundedSize += Alignment;
- if (LiveRegs.empty()) {
- LiveRegs.init(TRI);
- LiveRegs.addLiveIns(MBB);
+ if (LiveUnits.empty()) {
+ LiveUnits.init(TRI);
+ LiveUnits.addLiveIns(MBB);
}
// s_add_i32 s33, s32, NumBytes
@@ -1158,10 +1159,10 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
// If FP is used, emit the CSR spills with FP base register.
if (HasFP) {
- emitCSRSpillStores(MF, MBB, MBBI, DL, LiveRegs, FramePtrReg,
+ emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
FramePtrRegScratchCopy);
if (FramePtrRegScratchCopy)
- LiveRegs.removeReg(FramePtrRegScratchCopy);
+ LiveUnits.removeReg(FramePtrRegScratchCopy);
}
// If we need a base pointer, set it up here. It's whatever the value of
@@ -1210,7 +1211,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo &TRI = TII->getRegisterInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
- LivePhysRegs LiveRegs;
+ LiveRegUnits LiveUnits;
// Get the insert location for the epilogue. If there were no terminators in
// the block, get the last instruction.
MachineBasicBlock::iterator MBBI = MBB.end();
@@ -1240,19 +1241,19 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
// SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP
// into a new scratch register and copy to FP later when other registers are
// restored from the current stack frame.
- initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
+ initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
if (SGPRForFPSaveRestoreCopy) {
- LiveRegs.addReg(SGPRForFPSaveRestoreCopy);
+ LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
} else {
FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
- MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass);
+ MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
if (!FramePtrRegScratchCopy)
report_fatal_error("failed to find free scratch register");
- LiveRegs.addReg(FramePtrRegScratchCopy);
+ LiveUnits.addReg(FramePtrRegScratchCopy);
}
- emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveRegs, FramePtrReg,
+ emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
FramePtrRegScratchCopy);
}
@@ -1275,7 +1276,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
MIB.setMIFlag(MachineInstr::FrameDestroy);
} else {
// Insert the CSR spill restores with SP as the base register.
- emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveRegs, StackPtrReg,
+ emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, StackPtrReg,
FramePtrRegScratchCopy);
}
}
@@ -1472,30 +1473,30 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
- LivePhysRegs LiveRegs;
- LiveRegs.init(*TRI);
+ LiveRegUnits LiveUnits;
+ LiveUnits.init(*TRI);
// Initially mark callee saved registers as used so we will not choose them
// while looking for scratch SGPRs.
const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
for (unsigned I = 0; CSRegs[I]; ++I)
- LiveRegs.addReg(CSRegs[I]);
+ LiveUnits.addReg(CSRegs[I]);
const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass();
if (NeedExecCopyReservedReg) {
Register ReservedReg = MFI->getSGPRForEXECCopy();
assert(ReservedReg && "Should have reserved an SGPR for EXEC copy.");
- Register UnusedScratchReg = findUnusedRegister(MRI, LiveRegs, RC);
+ Register UnusedScratchReg = findUnusedRegister(MRI, LiveUnits, RC);
if (UnusedScratchReg) {
// If found any unused scratch SGPR, reserve the register itself for Exec
// copy and there is no need for any spills in that case.
MFI->setSGPRForEXECCopy(UnusedScratchReg);
- LiveRegs.addReg(UnusedScratchReg);
+ LiveUnits.addReg(UnusedScratchReg);
} else {
// Needs spill.
assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedReg) &&
"Re-reserving spill slot for EXEC copy register");
- getVGPRSpillLaneOrTempRegister(MF, LiveRegs, ReservedReg, RC,
+ getVGPRSpillLaneOrTempRegister(MF, LiveUnits, ReservedReg, RC,
/*IncludeScratchCopy=*/false);
}
}
@@ -1516,14 +1517,14 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
Register FramePtrReg = MFI->getFrameOffsetReg();
assert(!MFI->hasPrologEpilogSGPRSpillEntry(FramePtrReg) &&
"Re-reserving spill slot for FP");
- getVGPRSpillLaneOrTempRegister(MF, LiveRegs, FramePtrReg);
+ getVGPRSpillLaneOrTempRegister(MF, LiveUnits, FramePtrReg);
}
if (TRI->hasBasePointer(MF)) {
Register BasePtrReg = TRI->getBaseRegister();
assert(!MFI->hasPrologEpilogSGPRSpillEntry(BasePtrReg) &&
"Re-reserving spill slot for BP");
- getVGPRSpillLaneOrTempRegister(MF, LiveRegs, BasePtrReg);
+ getVGPRSpillLaneOrTempRegister(MF, LiveUnits, BasePtrReg);
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
index 0060fc0be431b18..b3feb759ed811fb 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
@@ -38,11 +38,11 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
bool NeedExecCopyReservedReg) const;
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
- LivePhysRegs &LiveRegs, Register FrameReg,
+ LiveRegUnits &LiveUnits, Register FrameReg,
Register FramePtrRegScratchCopy) const;
void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
- LivePhysRegs &LiveRegs, Register FrameReg,
+ LiveRegUnits &LiveUnits, Register FrameReg,
Register FramePtrRegScratchCopy) const;
bool
assignCalleeSavedSpillSlots(MachineFunction &MF,
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index b69bea225ac2efb..e065b104db4eb33 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -19,7 +19,7 @@
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
@@ -1309,8 +1309,8 @@ void SIRegisterInfo::buildSpillLoadStore(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL,
unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,
MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO,
- RegScavenger *RS, LivePhysRegs *LiveRegs) const {
- assert((!RS || !LiveRegs) && "Only RS or LiveRegs can be set but not both");
+ RegScavenger *RS, LiveRegUnits *LiveUnits) const {
+ assert((!RS || !LiveUnits) && "Only RS or LiveUnits can be set but not both");
MachineFunction *MF = MBB.getParent();
const SIInstrInfo *TII = ST.getInstrInfo();
@@ -1398,7 +1398,7 @@ void SIRegisterInfo::buildSpillLoadStore(
SOffset = MCRegister();
// We don't have access to the register scavenger if this function is called
- // during PEI::scavengeFrameVirtualRegs() so use LiveRegs in this case.
+ // during PEI::scavengeFrameVirtualRegs() so use LiveUnits in this case.
// TODO: Clobbering SCC is not necessary for scratch instructions in the
// entry.
if (RS) {
@@ -1406,10 +1406,10 @@ void SIRegisterInfo::buildSpillLoadStore(
// Piggy back on the liveness scan we just did see if SCC is dead.
CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC);
- } else if (LiveRegs) {
- CanClobberSCC = !LiveRegs->contains(AMDGPU::SCC);
+ } else if (LiveUnits) {
+ CanClobberSCC = LiveUnits->available(AMDGPU::SCC);
for (MCRegister Reg : AMDGPU::SGPR_32RegClass) {
- if (LiveRegs->available(MF->getRegInfo(), Reg)) {
+ if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) {
SOffset = Reg;
break;
}
@@ -1425,9 +1425,9 @@ void SIRegisterInfo::buildSpillLoadStore(
if (RS) {
TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);
} else {
- assert(LiveRegs);
+ assert(LiveUnits);
for (MCRegister Reg : AMDGPU::VGPR_32RegClass) {
- if (LiveRegs->available(MF->getRegInfo(), Reg)) {
+ if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) {
TmpOffsetVGPR = Reg;
break;
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 57cdcb29fac4807..e45a0fae5d6c454 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -23,7 +23,7 @@ namespace llvm {
class GCNSubtarget;
class LiveIntervals;
-class LivePhysRegs;
+class LiveRegUnits;
class RegisterBank;
struct SGPRSpillBuilder;
@@ -415,14 +415,14 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
// Insert spill or restore instructions.
// When lowering spill pseudos, the RegScavenger should be set.
// For creating spill instructions during frame lowering, where no scavenger
- // is available, LiveRegs can be used.
+ // is available, LiveUnits can be used.
void buildSpillLoadStore(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, const DebugLoc &DL,
unsigned LoadStoreOp, int Index, Register ValueReg,
bool ValueIsKill, MCRegister ScratchOffsetReg,
int64_t InstrOffset, MachineMemOperand *MMO,
RegScavenger *RS,
- LivePhysRegs *LiveRegs = nullptr) const;
+ LiveRegUnits *LiveUnits = nullptr) const;
// Return alignment in register file of first register in a register tuple.
unsigned getRegClassAlignmentNumBits(const TargetRegisterClass *RC) const {
>From 8ea9e74682e590026860995c658fe4c4b3721ad3 Mon Sep 17 00:00:00 2001
From: Pranav <Pranav.Taneja at amd.com>
Date: Tue, 19 Sep 2023 05:45:36 +0000
Subject: [PATCH 2/2] [SIFrameLowering] Added TODO to findUnusedRegister
We need an 'isRegUnitUsed' function similar to
'isPhysRegUsed' in order to implement the loop
in 'findUnusedRegister' in terms of MCRegUnits.
---
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 279e8bab4b46dcf..8d2a36791970dab 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -28,6 +28,8 @@ static cl::opt<bool> EnableSpillVGPRToAGPR(
// Find a register matching \p RC from \p LiveUnits which is unused and
// available throughout the function. On failure, returns AMDGPU::NoRegister.
+// TODO: Rewrite the loop here to iterate over MCRegUnits instead of MCRegister.
+// This will reduce the number of iterations and redundant checking.
static MCRegister findUnusedRegister(MachineRegisterInfo &MRI,
const LiveRegUnits &LiveUnits,
const TargetRegisterClass &RC) {
More information about the llvm-commits
mailing list