[llvm] [AMDGPU] [SIFrameLowering] Use LiveRegUnits instead of LivePhysRegs (PR #65962)
Pranav Taneja via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 14 04:06:14 PDT 2023
https://github.com/pranavTanejaAMD updated https://github.com/llvm/llvm-project/pull/65962:
>From cd00501675c7ec87999b35690376a83e8cc8bf22 Mon Sep 17 00:00:00 2001
From: Pranav <Pranav.Taneja at amd.com>
Date: Thu, 14 Sep 2023 06:44:24 +0000
Subject: [PATCH] [SIFrameLowering] Replaced LivePhysRegs with LiveRegUnits.
---
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 370 ++++---
llvm/lib/Target/AMDGPU/SIFrameLowering.h | 18 +-
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 1124 ++++++++++----------
llvm/lib/Target/AMDGPU/SIRegisterInfo.h | 41 +-
4 files changed, 768 insertions(+), 785 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 865caae240f3470..84d5740e68e08d6 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -11,7 +11,7 @@
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetMachine.h"
@@ -20,19 +20,19 @@ using namespace llvm;
#define DEBUG_TYPE "frame-info"
-static cl::opt<bool> EnableSpillVGPRToAGPR(
- "amdgpu-spill-vgpr-to-agpr",
- cl::desc("Enable spilling VGPRs to AGPRs"),
- cl::ReallyHidden,
- cl::init(true));
+static cl::opt<bool>
+ EnableSpillVGPRToAGPR("amdgpu-spill-vgpr-to-agpr",
+ cl::desc("Enable spilling VGPRs to AGPRs"),
+ cl::ReallyHidden, cl::init(true));
-// Find a register matching \p RC from \p LiveRegs which is unused and available
-// throughout the function. On failure, returns AMDGPU::NoRegister.
+// Find a register matching \p RC from \p LiveUnits which is unused and
+// available throughout the function. On failure, returns AMDGPU::NoRegister.
static MCRegister findUnusedRegister(MachineRegisterInfo &MRI,
- const LivePhysRegs &LiveRegs,
+ const LiveRegUnits &LiveUnits,
const TargetRegisterClass &RC) {
for (MCRegister Reg : RC) {
- if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
+ if (!MRI.isPhysRegUsed(Reg) && LiveUnits.available(Reg) &&
+ !MRI.isReserved(Reg))
return Reg;
}
return MCRegister();
@@ -42,22 +42,21 @@ static MCRegister findUnusedRegister(MachineRegisterInfo &MRI,
// callee-save registers since they may appear to be free when this is called
// from canUseAsPrologue (during shrink wrapping), but then no longer be free
// when this is called from emitPrologue.
-static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
- LivePhysRegs &LiveRegs,
- const TargetRegisterClass &RC,
- bool Unused = false) {
+static MCRegister findScratchNonCalleeSaveRegister(
+ MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits,
+ const TargetRegisterClass &RC, bool Unused = false) {
// Mark callee saved registers as used so we will not choose them.
const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
for (unsigned i = 0; CSRegs[i]; ++i)
- LiveRegs.addReg(CSRegs[i]);
+ LiveUnits.addReg(CSRegs[i]);
// We are looking for a register that can be used throughout the entire
// function, so any use is unacceptable.
if (Unused)
- return findUnusedRegister(MRI, LiveRegs, RC);
+ return findUnusedRegister(MRI, LiveUnits, RC);
for (MCRegister Reg : RC) {
- if (LiveRegs.available(MRI, Reg))
+ if (LiveUnits.available(Reg) && !MRI.isReserved(Reg))
return Reg;
}
@@ -65,9 +64,9 @@ static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
}
/// Query target location for spilling SGPRs
-/// \p IncludeScratchCopy : Also look for free scratch SGPRs
+/// \p IncludeScratchCopy : Also look for free scratch SGPRs
static void getVGPRSpillLaneOrTempRegister(
- MachineFunction &MF, LivePhysRegs &LiveRegs, Register SGPR,
+ MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR,
const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass,
bool IncludeScratchCopy = true) {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@@ -81,11 +80,11 @@ static void getVGPRSpillLaneOrTempRegister(
// We need to save and restore the given SGPR.
Register ScratchSGPR;
- // 1: Try to save the given register into an unused scratch SGPR. The LiveRegs
- // should have all the callee saved registers marked as used. For certain
- // cases we skip copy to scratch SGPR.
+ // 1: Try to save the given register into an unused scratch SGPR. The
+ // LiveUnits should have all the callee saved registers marked as used. For
+ // certain cases we skip copy to scratch SGPR.
if (IncludeScratchCopy)
- ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveRegs, RC);
+ ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveUnits, RC);
if (!ScratchSGPR) {
int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,
@@ -118,7 +117,7 @@ static void getVGPRSpillLaneOrTempRegister(
MFI->addToPrologEpilogSGPRSpills(
SGPR, PrologEpilogSGPRSaveRestoreInfo(
SGPRSaveKind::COPY_TO_SCRATCH_SGPR, ScratchSGPR));
- LiveRegs.addReg(ScratchSGPR);
+ LiveUnits.addReg(ScratchSGPR);
LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to "
<< printReg(ScratchSGPR, TRI) << '\n');
}
@@ -129,7 +128,7 @@ static void getVGPRSpillLaneOrTempRegister(
// use.
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
const SIMachineFunctionInfo &FuncInfo,
- LivePhysRegs &LiveRegs, MachineFunction &MF,
+ LiveRegUnits &LiveUnits, MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, const DebugLoc &DL,
Register SpillReg, int FI, Register FrameReg,
@@ -142,18 +141,18 @@ static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
FrameInfo.getObjectAlign(FI));
- LiveRegs.addReg(SpillReg);
+ LiveUnits.addReg(SpillReg);
bool IsKill = !MBB.isLiveIn(SpillReg);
TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,
- DwordOff, MMO, nullptr, &LiveRegs);
+ DwordOff, MMO, nullptr, &LiveUnits);
if (IsKill)
- LiveRegs.removeReg(SpillReg);
+ LiveUnits.removeReg(SpillReg);
}
static void buildEpilogRestore(const GCNSubtarget &ST,
const SIRegisterInfo &TRI,
const SIMachineFunctionInfo &FuncInfo,
- LivePhysRegs &LiveRegs, MachineFunction &MF,
+ LiveRegUnits &LiveUnits, MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, Register SpillReg, int FI,
@@ -167,7 +166,7 @@ static void buildEpilogRestore(const GCNSubtarget &ST,
PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
FrameInfo.getObjectAlign(FI));
TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg,
- DwordOff, MMO, nullptr, &LiveRegs);
+ DwordOff, MMO, nullptr, &LiveUnits);
}
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
@@ -191,22 +190,21 @@ static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
MF->getRegInfo().addLiveIn(GitPtrLo);
MBB.addLiveIn(GitPtrLo);
- BuildMI(MBB, I, DL, SMovB32, TargetLo)
- .addReg(GitPtrLo);
+ BuildMI(MBB, I, DL, SMovB32, TargetLo).addReg(GitPtrLo);
}
-static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI,
- const SIMachineFunctionInfo *FuncInfo,
- MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, bool IsProlog) {
- if (LiveRegs.empty()) {
- LiveRegs.init(TRI);
+static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI,
+ const SIMachineFunctionInfo *FuncInfo,
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, bool IsProlog) {
+ if (LiveUnits.empty()) {
+ LiveUnits.init(TRI);
if (IsProlog) {
- LiveRegs.addLiveIns(MBB);
+ LiveUnits.addLiveIns(MBB);
} else {
// In epilog.
- LiveRegs.addLiveOuts(MBB);
- LiveRegs.stepBackward(*MBBI);
+ LiveUnits.addLiveOuts(MBB);
+ LiveUnits.stepBackward(*MBBI);
}
}
}
@@ -228,7 +226,7 @@ class PrologEpilogSGPRSpillBuilder {
const SIRegisterInfo &TRI;
Register SuperReg;
const PrologEpilogSGPRSaveRestoreInfo SI;
- LivePhysRegs &LiveRegs;
+ LiveRegUnits &LiveUnits;
const DebugLoc &DL;
Register FrameReg;
ArrayRef<int16_t> SplitParts;
@@ -239,10 +237,10 @@ class PrologEpilogSGPRSpillBuilder {
MachineRegisterInfo &MRI = MF.getRegInfo();
assert(!MFI.isDeadObjectIndex(FI));
- initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);
+ initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);
MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
- MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
+ MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
if (!TmpVGPR)
report_fatal_error("failed to find free scratch register");
@@ -253,7 +251,7 @@ class PrologEpilogSGPRSpillBuilder {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
.addReg(SubReg);
- buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MI, DL, TmpVGPR,
+ buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR,
FI, FrameReg, DwordOff);
DwordOff += 4;
}
@@ -287,9 +285,9 @@ class PrologEpilogSGPRSpillBuilder {
void restoreFromMemory(const int FI) {
MachineRegisterInfo &MRI = MF.getRegInfo();
- initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);
+ initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);
MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
- MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
+ MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
if (!TmpVGPR)
report_fatal_error("failed to find free scratch register");
@@ -298,8 +296,8 @@ class PrologEpilogSGPRSpillBuilder {
? SuperReg
: Register(TRI.getSubReg(SuperReg, SplitParts[I]));
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MI, DL, TmpVGPR,
- FI, FrameReg, DwordOff);
+ buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL,
+ TmpVGPR, FI, FrameReg, DwordOff);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
.addReg(TmpVGPR, RegState::Kill);
DwordOff += 4;
@@ -335,11 +333,12 @@ class PrologEpilogSGPRSpillBuilder {
MachineBasicBlock::iterator MI,
const DebugLoc &DL, const SIInstrInfo *TII,
const SIRegisterInfo &TRI,
- LivePhysRegs &LiveRegs, Register FrameReg)
+ LiveRegUnits &LiveUnits, Register FrameReg)
: MI(MI), MBB(MBB), MF(*MBB.getParent()),
ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),
FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
- SuperReg(Reg), SI(SI), LiveRegs(LiveRegs), DL(DL), FrameReg(FrameReg) {
+ SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL),
+ FrameReg(FrameReg) {
const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
SplitParts = TRI.getRegSplitParts(RC, EltSize);
NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
@@ -396,9 +395,9 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
if (ST.isAmdPalOS()) {
// Extract the scratch offset from the descriptor in the GIT
- LivePhysRegs LiveRegs;
- LiveRegs.init(*TRI);
- LiveRegs.addLiveIns(MBB);
+ LiveRegUnits LiveUnits;
+ LiveUnits.init(*TRI);
+ LiveUnits.addLiveIns(MBB);
// Find unused reg to load flat scratch init into
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -409,8 +408,8 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
for (MCPhysReg Reg : AllSGPR64s) {
- if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) &&
- !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
+ if (LiveUnits.available(Reg) && !MRI.isReserved(Reg) &&
+ MRI.isAllocatable(Reg) && !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
FlatScrInit = Reg;
break;
}
@@ -444,8 +443,8 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
// Mask the offset in [47:0] of the descriptor
const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
- .addReg(FlatScrInitHi)
- .addImm(0xffff);
+ .addReg(FlatScrInitHi)
+ .addImm(0xffff);
And->getOperand(3).setIsDead(); // Mark SCC as dead.
} else {
Register FlatScratchInitReg =
@@ -464,33 +463,33 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
if (ST.flatScratchIsPointer()) {
if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
- .addReg(FlatScrInitLo)
- .addReg(ScratchWaveOffsetReg);
- auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
- FlatScrInitHi)
- .addReg(FlatScrInitHi)
- .addImm(0);
+ .addReg(FlatScrInitLo)
+ .addReg(ScratchWaveOffsetReg);
+ auto Addc =
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
+ .addReg(FlatScrInitHi)
+ .addImm(0);
Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
- addReg(FlatScrInitLo).
- addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
- (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
- addReg(FlatScrInitHi).
- addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
- (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))
+ .addReg(FlatScrInitLo)
+ .addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
+ (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))
+ .addReg(FlatScrInitHi)
+ .addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
+ (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
return;
}
// For GFX9.
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
- .addReg(FlatScrInitLo)
- .addReg(ScratchWaveOffsetReg);
- auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
- AMDGPU::FLAT_SCR_HI)
- .addReg(FlatScrInitHi)
- .addImm(0);
+ .addReg(FlatScrInitLo)
+ .addReg(ScratchWaveOffsetReg);
+ auto Addc =
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
+ .addReg(FlatScrInitHi)
+ .addImm(0);
Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
return;
@@ -500,7 +499,7 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
// Copy the size in bytes.
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
- .addReg(FlatScrInitHi, RegState::Kill);
+ .addReg(FlatScrInitHi, RegState::Kill);
// Add wave offset in bytes to private base offset.
// See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
@@ -509,18 +508,18 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
.addReg(ScratchWaveOffsetReg);
// Convert offset to 256-byte units.
- auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32),
- AMDGPU::FLAT_SCR_HI)
- .addReg(FlatScrInitLo, RegState::Kill)
- .addImm(8);
+ auto LShr =
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
+ .addReg(FlatScrInitLo, RegState::Kill)
+ .addImm(8);
LShr->getOperand(3).setIsDead(); // Mark SCC as dead.
}
// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
// memory. They should have been removed by now.
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
- for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
- I != E; ++I) {
+ for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I != E;
+ ++I) {
if (!MFI.isDeadObjectIndex(I))
return false;
}
@@ -561,7 +560,8 @@ Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
- AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
+ AllSGPR128s = AllSGPR128s.slice(
+ std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
// Skip the last N reserved elements because they should have already been
// reserved for VCC etc.
@@ -746,11 +746,11 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
- .addReg(Rsrc01)
- .addImm(EncodedOffset) // offset
- .addImm(0) // cpol
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
- .addMemOperand(MMO);
+ .addReg(Rsrc01)
+ .addImm(EncodedOffset) // offset
+ .addImm(0) // cpol
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
+ .addMemOperand(MMO);
// The driver will always set the SRD for wave 64 (bits 118:117 of
// descriptor / bits 22:21 of third sub-reg will be 0b11)
@@ -761,9 +761,7 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
// TODO: convert to using SCRATCH instructions or multiple SRD buffers
if (ST.isWave32()) {
const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
- BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
- .addImm(21)
- .addReg(Rsrc03);
+ BuildMI(MBB, I, DL, SBitsetB32, Rsrc03).addImm(21).addReg(Rsrc03);
}
} else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
assert(!ST.isAmdHsaOrMesa(Fn));
@@ -782,8 +780,8 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
BuildMI(MBB, I, DL, Mov64, Rsrc01)
- .addReg(MFI->getImplicitBufferPtrUserSGPR())
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ .addReg(MFI->getImplicitBufferPtrUserSGPR())
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
} else {
const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
@@ -794,11 +792,11 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
MachineMemOperand::MODereferenceable,
8, Align(4));
BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
- .addReg(MFI->getImplicitBufferPtrUserSGPR())
- .addImm(0) // offset
- .addImm(0) // cpol
- .addMemOperand(MMO)
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ .addReg(MFI->getImplicitBufferPtrUserSGPR())
+ .addImm(0) // offset
+ .addImm(0) // cpol
+ .addMemOperand(MMO)
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
@@ -808,22 +806,21 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
BuildMI(MBB, I, DL, SMovB32, Rsrc0)
- .addExternalSymbol("SCRATCH_RSRC_DWORD0")
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ .addExternalSymbol("SCRATCH_RSRC_DWORD0")
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
BuildMI(MBB, I, DL, SMovB32, Rsrc1)
- .addExternalSymbol("SCRATCH_RSRC_DWORD1")
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
+ .addExternalSymbol("SCRATCH_RSRC_DWORD1")
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
}
BuildMI(MBB, I, DL, SMovB32, Rsrc2)
- .addImm(Rsrc23 & 0xffffffff)
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ .addImm(Rsrc23 & 0xffffffff)
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
BuildMI(MBB, I, DL, SMovB32, Rsrc3)
- .addImm(Rsrc23 >> 32)
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ .addImm(Rsrc23 >> 32)
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
} else if (ST.isAmdHsaOrMesa(Fn)) {
assert(PreloadedScratchRsrcReg);
@@ -852,9 +849,9 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
.addReg(ScratchWaveOffsetReg)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
- .addReg(ScratchRsrcSub1)
- .addImm(0)
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ .addReg(ScratchRsrcSub1)
+ .addImm(0)
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
}
@@ -873,7 +870,7 @@ bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
// Activate only the inactive lanes when \p EnableInactiveLanes is true.
// Otherwise, activate all lanes. It returns the saved exec.
-static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
+static Register buildScratchExecCopy(LiveRegUnits &LiveUnits,
MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -886,14 +883,14 @@ static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
const SIRegisterInfo &TRI = TII->getRegisterInfo();
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
- initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
+ initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
ScratchExecCopy = findScratchNonCalleeSaveRegister(
- MRI, LiveRegs, *TRI.getWaveMaskRegClass());
+ MRI, LiveUnits, *TRI.getWaveMaskRegClass());
if (!ScratchExecCopy)
report_fatal_error("failed to find free scratch register");
- LiveRegs.addReg(ScratchExecCopy);
+ LiveUnits.addReg(ScratchExecCopy);
const unsigned SaveExecOpc =
ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32
@@ -909,7 +906,7 @@ static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
void SIFrameLowering::emitCSRSpillStores(
MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, DebugLoc &DL, LivePhysRegs &LiveRegs,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,
Register FrameReg, Register FramePtrRegScratchCopy) const {
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -924,7 +921,7 @@ void SIFrameLowering::emitCSRSpillStores(
FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
if (!WWMScratchRegs.empty())
ScratchExecCopy =
- buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
+ buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
/*IsProlog*/ true, /*EnableInactiveLanes*/ true);
auto StoreWWMRegisters =
@@ -932,7 +929,7 @@ void SIFrameLowering::emitCSRSpillStores(
for (const auto &Reg : WWMRegs) {
Register VGPR = Reg.first;
int FI = Reg.second;
- buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
+ buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
VGPR, FI, FrameReg);
}
};
@@ -943,7 +940,7 @@ void SIFrameLowering::emitCSRSpillStores(
unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
} else {
- ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
+ ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
/*IsProlog*/ true,
/*EnableInactiveLanes*/ false);
}
@@ -955,7 +952,7 @@ void SIFrameLowering::emitCSRSpillStores(
unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())
.addReg(ScratchExecCopy, RegState::Kill);
- LiveRegs.addReg(ScratchExecCopy);
+ LiveUnits.addReg(ScratchExecCopy);
}
Register FramePtrReg = FuncInfo->getFrameOffsetReg();
@@ -971,7 +968,7 @@ void SIFrameLowering::emitCSRSpillStores(
continue;
PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
- LiveRegs, FrameReg);
+ LiveUnits, FrameReg);
SB.save();
}
@@ -986,16 +983,16 @@ void SIFrameLowering::emitCSRSpillStores(
MBB.sortUniqueLiveIns();
}
- if (!LiveRegs.empty()) {
+ if (!LiveUnits.empty()) {
for (MCPhysReg Reg : ScratchSGPRs)
- LiveRegs.addReg(Reg);
+ LiveUnits.addReg(Reg);
}
}
}
void SIFrameLowering::emitCSRSpillRestores(
MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, DebugLoc &DL, LivePhysRegs &LiveRegs,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,
Register FrameReg, Register FramePtrRegScratchCopy) const {
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -1015,7 +1012,7 @@ void SIFrameLowering::emitCSRSpillRestores(
continue;
PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
- LiveRegs, FrameReg);
+ LiveUnits, FrameReg);
SB.restore();
}
@@ -1027,7 +1024,7 @@ void SIFrameLowering::emitCSRSpillRestores(
FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
if (!WWMScratchRegs.empty())
ScratchExecCopy =
- buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
+ buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
/*IsProlog*/ false, /*EnableInactiveLanes*/ true);
auto RestoreWWMRegisters =
@@ -1035,7 +1032,7 @@ void SIFrameLowering::emitCSRSpillRestores(
for (const auto &Reg : WWMRegs) {
Register VGPR = Reg.first;
int FI = Reg.second;
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
+ buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
VGPR, FI, FrameReg);
}
};
@@ -1046,7 +1043,7 @@ void SIFrameLowering::emitCSRSpillRestores(
unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
} else {
- ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
+ ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
/*IsProlog*/ false,
/*EnableInactiveLanes*/ false);
}
@@ -1079,7 +1076,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
Register FramePtrReg = FuncInfo->getFrameOffsetReg();
Register BasePtrReg =
TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
- LivePhysRegs LiveRegs;
+ LiveRegUnits LiveUnits;
MachineBasicBlock::iterator MBBI = MBB.begin();
// DebugLoc must be unknown since the first instruction with DebugLoc is used
@@ -1097,14 +1094,14 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
Register FramePtrRegScratchCopy;
if (!HasFP && !hasFP(MF)) {
// Emit the CSR spill stores with SP base register.
- emitCSRSpillStores(MF, MBB, MBBI, DL, LiveRegs, StackPtrReg,
+ emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, StackPtrReg,
FramePtrRegScratchCopy);
} else {
// CSR spill stores will use FP as base register.
Register SGPRForFPSaveRestoreCopy =
FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
- initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
+ initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
if (SGPRForFPSaveRestoreCopy) {
// Copy FP to the scratch register now and emit the CFI entry. It avoids
// the extra FP copy needed in the other two cases when FP is spilled to
@@ -1112,18 +1109,18 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
PrologEpilogSGPRSpillBuilder SB(
FramePtrReg,
FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI,
- DL, TII, TRI, LiveRegs, FramePtrReg);
+ DL, TII, TRI, LiveUnits, FramePtrReg);
SB.save();
- LiveRegs.addReg(SGPRForFPSaveRestoreCopy);
+ LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
} else {
// Copy FP into a new scratch register so that its previous value can be
// spilled after setting up the new frame.
FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
- MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass);
+ MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
if (!FramePtrRegScratchCopy)
report_fatal_error("failed to find free scratch register");
- LiveRegs.addReg(FramePtrRegScratchCopy);
+ LiveUnits.addReg(FramePtrRegScratchCopy);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrRegScratchCopy)
.addReg(FramePtrReg);
}
@@ -1133,9 +1130,9 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
const unsigned Alignment = MFI.getMaxAlign().value();
RoundedSize += Alignment;
- if (LiveRegs.empty()) {
- LiveRegs.init(TRI);
- LiveRegs.addLiveIns(MBB);
+ if (LiveUnits.empty()) {
+ LiveUnits.init(TRI);
+ LiveUnits.addLiveIns(MBB);
}
// s_add_i32 s33, s32, NumBytes
@@ -1145,9 +1142,9 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
.addImm((Alignment - 1) * getScratchScaleFactor(ST))
.setMIFlag(MachineInstr::FrameSetup);
auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
- .addReg(FramePtrReg, RegState::Kill)
- .addImm(-Alignment * getScratchScaleFactor(ST))
- .setMIFlag(MachineInstr::FrameSetup);
+ .addReg(FramePtrReg, RegState::Kill)
+ .addImm(-Alignment * getScratchScaleFactor(ST))
+ .setMIFlag(MachineInstr::FrameSetup);
And->getOperand(3).setIsDead(); // Mark SCC as dead.
FuncInfo->setIsStackRealigned(true);
} else if ((HasFP = hasFP(MF))) {
@@ -1158,10 +1155,10 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
// If FP is used, emit the CSR spills with FP base register.
if (HasFP) {
- emitCSRSpillStores(MF, MBB, MBBI, DL, LiveRegs, FramePtrReg,
+ emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
FramePtrRegScratchCopy);
if (FramePtrRegScratchCopy)
- LiveRegs.removeReg(FramePtrRegScratchCopy);
+ LiveUnits.removeReg(FramePtrRegScratchCopy);
}
// If we need a base pointer, set it up here. It's whatever the value of
@@ -1176,9 +1173,9 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
if (HasFP && RoundedSize != 0) {
auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
- .addReg(StackPtrReg)
- .addImm(RoundedSize * getScratchScaleFactor(ST))
- .setMIFlag(MachineInstr::FrameSetup);
+ .addReg(StackPtrReg)
+ .addImm(RoundedSize * getScratchScaleFactor(ST))
+ .setMIFlag(MachineInstr::FrameSetup);
Add->getOperand(3).setIsDead(); // Mark SCC as dead.
}
@@ -1210,7 +1207,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo &TRI = TII->getRegisterInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
- LivePhysRegs LiveRegs;
+ LiveRegUnits LiveUnits;
// Get the insert location for the epilogue. If there were no terminators in
// the block, get the last instruction.
MachineBasicBlock::iterator MBBI = MBB.end();
@@ -1240,27 +1237,28 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
// SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP
// into a new scratch register and copy to FP later when other registers are
// restored from the current stack frame.
- initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
+ initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
if (SGPRForFPSaveRestoreCopy) {
- LiveRegs.addReg(SGPRForFPSaveRestoreCopy);
+ LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
} else {
FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
- MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass);
+ MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
if (!FramePtrRegScratchCopy)
report_fatal_error("failed to find free scratch register");
- LiveRegs.addReg(FramePtrRegScratchCopy);
+ LiveUnits.addReg(FramePtrRegScratchCopy);
}
- emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveRegs, FramePtrReg,
+ emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
FramePtrRegScratchCopy);
}
if (RoundedSize != 0 && hasFP(MF)) {
auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
- .addReg(StackPtrReg)
- .addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST)))
- .setMIFlag(MachineInstr::FrameDestroy);
+ .addReg(StackPtrReg)
+ .addImm(-static_cast<int64_t>(RoundedSize *
+ getScratchScaleFactor(ST)))
+ .setMIFlag(MachineInstr::FrameDestroy);
Add->getOperand(3).setIsDead(); // Mark SCC as dead.
}
@@ -1275,7 +1273,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
MIB.setMIFlag(MachineInstr::FrameDestroy);
} else {
// Insert the CSR spill restores with SP as the base register.
- emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveRegs, StackPtrReg,
+ emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, StackPtrReg,
FramePtrRegScratchCopy);
}
}
@@ -1284,8 +1282,8 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
static bool allSGPRSpillsAreDead(const MachineFunction &MF) {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
- for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
- I != E; ++I) {
+ for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I != E;
+ ++I) {
if (!MFI.isDeadObjectIndex(I) &&
MFI.getStackID(I) == TargetStackID::SGPRSpill &&
!FuncInfo->checkIndexInPrologEpilogSGPRSpills(I)) {
@@ -1307,8 +1305,7 @@ StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF,
}
void SIFrameLowering::processFunctionBeforeFrameFinalized(
- MachineFunction &MF,
- RegScavenger *RS) const {
+ MachineFunction &MF, RegScavenger *RS) const {
MachineFrameInfo &MFI = MF.getFrameInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -1326,8 +1323,8 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
}
}
- const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
- && EnableSpillVGPRToAGPR;
+ const bool SpillVGPRToAGPR =
+ ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs() && EnableSpillVGPRToAGPR;
if (SpillVGPRToAGPR) {
// To track the spill frame indices handled in this pass.
@@ -1345,11 +1342,11 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
if (TII->isVGPRSpill(MI)) {
// Try to eliminate stack used by VGPR spills before frame
// finalization.
- unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
- AMDGPU::OpName::vaddr);
+ unsigned FIOp =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr);
int FI = MI.getOperand(FIOp).getIndex();
Register VReg =
- TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
+ TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
TRI->isAGPR(MRI, VReg))) {
assert(RS != nullptr);
@@ -1472,30 +1469,30 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
- LivePhysRegs LiveRegs;
- LiveRegs.init(*TRI);
+ LiveRegUnits LiveUnits;
+ LiveUnits.init(*TRI);
// Initially mark callee saved registers as used so we will not choose them
// while looking for scratch SGPRs.
const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
for (unsigned I = 0; CSRegs[I]; ++I)
- LiveRegs.addReg(CSRegs[I]);
+ LiveUnits.addReg(CSRegs[I]);
const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass();
if (NeedExecCopyReservedReg) {
Register ReservedReg = MFI->getSGPRForEXECCopy();
assert(ReservedReg && "Should have reserved an SGPR for EXEC copy.");
- Register UnusedScratchReg = findUnusedRegister(MRI, LiveRegs, RC);
+ Register UnusedScratchReg = findUnusedRegister(MRI, LiveUnits, RC);
if (UnusedScratchReg) {
// If found any unused scratch SGPR, reserve the register itself for Exec
// copy and there is no need for any spills in that case.
MFI->setSGPRForEXECCopy(UnusedScratchReg);
- LiveRegs.addReg(UnusedScratchReg);
+ LiveUnits.addReg(UnusedScratchReg);
} else {
// Needs spill.
assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedReg) &&
"Re-reserving spill slot for EXEC copy register");
- getVGPRSpillLaneOrTempRegister(MF, LiveRegs, ReservedReg, RC,
+ getVGPRSpillLaneOrTempRegister(MF, LiveUnits, ReservedReg, RC,
/*IncludeScratchCopy=*/false);
}
}
@@ -1516,14 +1513,14 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
Register FramePtrReg = MFI->getFrameOffsetReg();
assert(!MFI->hasPrologEpilogSGPRSpillEntry(FramePtrReg) &&
"Re-reserving spill slot for FP");
- getVGPRSpillLaneOrTempRegister(MF, LiveRegs, FramePtrReg);
+ getVGPRSpillLaneOrTempRegister(MF, LiveUnits, FramePtrReg);
}
if (TRI->hasBasePointer(MF)) {
Register BasePtrReg = TRI->getBaseRegister();
assert(!MFI->hasPrologEpilogSGPRSpillEntry(BasePtrReg) &&
"Re-reserving spill slot for BP");
- getVGPRSpillLaneOrTempRegister(MF, LiveRegs, BasePtrReg);
+ getVGPRSpillLaneOrTempRegister(MF, LiveUnits, BasePtrReg);
}
}
@@ -1563,16 +1560,18 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
else if (MI.getOpcode() == AMDGPU::SI_RETURN ||
MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) {
// We expect all return to be the same size.
- assert(!ReturnMI ||
- (count_if(MI.operands(), [](auto Op) { return Op.isReg(); }) ==
- count_if(ReturnMI->operands(), [](auto Op) { return Op.isReg(); })));
+ assert(!ReturnMI || (count_if(MI.operands(), [](auto Op) {
+ return Op.isReg();
+ }) == count_if(ReturnMI->operands(), [](auto Op) {
+ return Op.isReg();
+ })));
ReturnMI = &MI;
}
}
}
- // Remove any VGPRs used in the return value because these do not need to be saved.
- // This prevents CSR restore from clobbering return VGPRs.
+ // Remove any VGPRs used in the return value because these do not need to be
+ // saved. This prevents CSR restore from clobbering return VGPRs.
if (ReturnMI) {
for (auto &Op : ReturnMI->operands()) {
if (Op.isReg())
@@ -1691,7 +1690,7 @@ bool SIFrameLowering::assignCalleeSavedSpillSlots(
}
bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
- const MachineFunction &MF) const {
+ const MachineFunction &MF) const {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -1719,9 +1718,8 @@ bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
}
MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
- MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
int64_t Amount = I->getOperand(0).getImm();
if (Amount == 0)
return MBB.erase(I);
@@ -1743,8 +1741,8 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
if (IsDestroy)
Amount = -Amount;
auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
- .addReg(SPReg)
- .addImm(Amount);
+ .addReg(SPReg)
+ .addImm(Amount);
Add->getOperand(3).setIsDead(); // Mark SCC as dead.
} else if (CalleePopAmount != 0) {
llvm_unreachable("is this used?");
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
index 0060fc0be431b18..95b72040f2f1415 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
@@ -23,10 +23,8 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
void emitEntryFunctionPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const;
- void emitPrologue(MachineFunction &MF,
- MachineBasicBlock &MBB) const override;
- void emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const override;
+ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
Register &FrameReg) const override;
@@ -38,11 +36,11 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
bool NeedExecCopyReservedReg) const;
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
- LivePhysRegs &LiveRegs, Register FrameReg,
+ LiveRegUnits &LiveUnits, Register FrameReg,
Register FramePtrRegScratchCopy) const;
void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
- LivePhysRegs &LiveRegs, Register FrameReg,
+ LiveRegUnits &LiveUnits, Register FrameReg,
Register FramePtrRegScratchCopy) const;
bool
assignCalleeSavedSpillSlots(MachineFunction &MF,
@@ -50,20 +48,18 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
std::vector<CalleeSavedInfo> &CSI) const override;
bool allocateScavengingFrameIndexesNearIncomingSP(
- const MachineFunction &MF) const override;
+ const MachineFunction &MF) const override;
bool isSupportedStackID(TargetStackID::Value ID) const override;
void processFunctionBeforeFrameFinalized(
- MachineFunction &MF,
- RegScavenger *RS = nullptr) const override;
+ MachineFunction &MF, RegScavenger *RS = nullptr) const override;
void processFunctionBeforeFrameIndicesReplaced(
MachineFunction &MF, RegScavenger *RS = nullptr) const override;
MachineBasicBlock::iterator
- eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
+ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
private:
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index b69bea225ac2efb..99d686fe18d942e 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -11,15 +11,15 @@
//
//===----------------------------------------------------------------------===//
+#include "SIRegisterInfo.h"
#include "AMDGPU.h"
#include "AMDGPURegisterBankInfo.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
-#include "SIRegisterInfo.h"
#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
@@ -29,11 +29,10 @@ using namespace llvm;
#define GET_REGINFO_TARGET_DESC
#include "AMDGPUGenRegisterInfo.inc"
-static cl::opt<bool> EnableSpillSGPRToVGPR(
- "amdgpu-spill-sgpr-to-vgpr",
- cl::desc("Enable spilling SGPRs to VGPRs"),
- cl::ReallyHidden,
- cl::init(true));
+static cl::opt<bool>
+ EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr",
+ cl::desc("Enable spilling SGPRs to VGPRs"),
+ cl::ReallyHidden, cl::init(true));
std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
@@ -300,10 +299,12 @@ struct SGPRSpillBuilder {
TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad,
/*IsKill*/ false);
// Spill inactive lanes
- auto Not0 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
+ auto Not0 =
+ BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
Not0->getOperand(2).setIsDead(); // Mark SCC as dead.
TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
- auto Not1 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
+ auto Not1 =
+ BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
Not1->getOperand(2).setIsDead(); // Mark SCC as dead.
}
}
@@ -324,8 +325,9 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST)
assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&
getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&
(getSubRegIndexLaneMask(AMDGPU::lo16) |
- getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==
- getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
+ getSubRegIndexLaneMask(AMDGPU::hi16))
+ .getAsInteger() ==
+ getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
"getNumCoveredRegs() will not work with generated subreg masks!");
RegPressureIgnoredUnits.resize(getNumRegUnits());
@@ -385,8 +387,8 @@ void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved,
}
// Forced to be here by one .inc
-const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs(
- const MachineFunction *MF) const {
+const MCPhysReg *
+SIRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
CallingConv::ID CC = MF->getFunction().getCallingConv();
switch (CC) {
case CallingConv::C:
@@ -543,7 +545,7 @@ SIRegisterInfo::getAlignedHighSGPRForRC(const MachineFunction &MF,
}
MCRegister SIRegisterInfo::reservedPrivateSegmentBufferReg(
- const MachineFunction &MF) const {
+ const MachineFunction &MF) const {
return getAlignedHighSGPRForRC(MF, /*Align=*/4, &AMDGPU::SGPR_128RegClass);
}
@@ -733,7 +735,8 @@ bool SIRegisterInfo::shouldRealignStack(const MachineFunction &MF) const {
return TargetRegisterInfo::shouldRealignStack(MF);
}
-bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
+bool SIRegisterInfo::requiresRegisterScavenging(
+ const MachineFunction &Fn) const {
const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
if (Info->isEntryFunction()) {
const MachineFrameInfo &MFI = Fn.getFrameInfo();
@@ -745,7 +748,7 @@ bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const
}
bool SIRegisterInfo::requiresFrameIndexScavenging(
- const MachineFunction &MF) const {
+ const MachineFunction &MF) const {
// Do not use frame virtual registers. They used to be used for SGPRs, but
// once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the
// scavenger fails, we can increment/decrement the necessary SGPRs to avoid a
@@ -754,13 +757,13 @@ bool SIRegisterInfo::requiresFrameIndexScavenging(
}
bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
- const MachineFunction &MF) const {
+ const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
return MFI.hasStackObjects();
}
bool SIRegisterInfo::requiresVirtualBaseRegisters(
- const MachineFunction &) const {
+ const MachineFunction &) const {
// There are no special dedicated stack or frame pointers.
return true;
}
@@ -768,8 +771,8 @@ bool SIRegisterInfo::requiresVirtualBaseRegisters(
int64_t SIRegisterInfo::getScratchInstrOffset(const MachineInstr *MI) const {
assert(SIInstrInfo::isMUBUF(*MI) || SIInstrInfo::isFLATScratch(*MI));
- int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::offset);
+ int OffIdx =
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::offset);
return MI->getOperand(OffIdx).getImm();
}
@@ -780,8 +783,8 @@ int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
assert((Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::vaddr) ||
- (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::saddr))) &&
+ (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::saddr))) &&
"Should never see frame index on non-address operand");
return getScratchInstrOffset(MI);
@@ -813,31 +816,28 @@ Register SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
MachineFunction *MF = MBB->getParent();
const SIInstrInfo *TII = ST.getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned MovOpc = ST.enableFlatScratch() ? AMDGPU::S_MOV_B32
- : AMDGPU::V_MOV_B32_e32;
+ unsigned MovOpc =
+ ST.enableFlatScratch() ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
Register BaseReg = MRI.createVirtualRegister(
ST.enableFlatScratch() ? &AMDGPU::SReg_32_XEXEC_HIRegClass
: &AMDGPU::VGPR_32RegClass);
if (Offset == 0) {
- BuildMI(*MBB, Ins, DL, TII->get(MovOpc), BaseReg)
- .addFrameIndex(FrameIdx);
+ BuildMI(*MBB, Ins, DL, TII->get(MovOpc), BaseReg).addFrameIndex(FrameIdx);
return BaseReg;
}
Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- Register FIReg = MRI.createVirtualRegister(
- ST.enableFlatScratch() ? &AMDGPU::SReg_32_XM0RegClass
- : &AMDGPU::VGPR_32RegClass);
+ Register FIReg = MRI.createVirtualRegister(ST.enableFlatScratch()
+ ? &AMDGPU::SReg_32_XM0RegClass
+ : &AMDGPU::VGPR_32RegClass);
- BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
- .addImm(Offset);
- BuildMI(*MBB, Ins, DL, TII->get(MovOpc), FIReg)
- .addFrameIndex(FrameIdx);
+ BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg).addImm(Offset);
+ BuildMI(*MBB, Ins, DL, TII->get(MovOpc), FIReg).addFrameIndex(FrameIdx);
- if (ST.enableFlatScratch() ) {
+ if (ST.enableFlatScratch()) {
BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_I32), BaseReg)
.addReg(OffsetReg, RegState::Kill)
.addReg(FIReg);
@@ -845,9 +845,9 @@ Register SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
}
TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
- .addReg(OffsetReg, RegState::Kill)
- .addReg(FIReg)
- .addImm(0); // clamp bit
+ .addReg(OffsetReg, RegState::Kill)
+ .addReg(FIReg)
+ .addImm(0); // clamp bit
return BaseReg;
}
@@ -860,7 +860,7 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
#ifndef NDEBUG
// FIXME: Is it possible to be storing a frame index to itself?
bool SeenFI = false;
- for (const MachineOperand &MO: MI.operands()) {
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isFI()) {
if (SeenFI)
llvm_unreachable("should not see multiple frame indices");
@@ -870,9 +870,8 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
}
#endif
- MachineOperand *FIOp =
- TII->getNamedOperand(MI, IsFlat ? AMDGPU::OpName::saddr
- : AMDGPU::OpName::vaddr);
+ MachineOperand *FIOp = TII->getNamedOperand(
+ MI, IsFlat ? AMDGPU::OpName::saddr : AMDGPU::OpName::vaddr);
MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
int64_t NewOffset = OffsetOp->getImm() + Offset;
@@ -917,8 +916,9 @@ bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
SIInstrFlags::FlatScratch);
}
-const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
- const MachineFunction &MF, unsigned Kind) const {
+const TargetRegisterClass *
+SIRegisterInfo::getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind) const {
// This is inaccurate. It depends on the instruction and address space. The
// only place where we should hit this is for dealing with frame indexes /
// private accesses, so this is correct in that case.
@@ -1068,7 +1068,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
case AMDGPU::SI_SPILL_WWM_AV32_SAVE:
case AMDGPU::SI_SPILL_WWM_AV32_RESTORE:
return 1;
- default: llvm_unreachable("Invalid spill opcode");
+ default:
+ llvm_unreachable("Invalid spill opcode");
}
}
@@ -1204,7 +1205,7 @@ static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST,
bool IsStore = MI->mayStore();
MachineRegisterInfo &MRI = MF->getRegInfo();
- auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
+ auto *TRI = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
unsigned Dst = IsStore ? Reg : ValueReg;
unsigned Src = IsStore ? ValueReg : Reg;
@@ -1233,8 +1234,7 @@ static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST,
// need to handle the case where an SGPR may need to be spilled while spilling.
static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST,
MachineFrameInfo &MFI,
- MachineBasicBlock::iterator MI,
- int Index,
+ MachineBasicBlock::iterator MI, int Index,
int64_t Offset) {
const SIInstrInfo *TII = ST.getInstrInfo();
MachineBasicBlock *MBB = MI->getParent();
@@ -1242,8 +1242,8 @@ static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST,
bool IsStore = MI->mayStore();
unsigned Opc = MI->getOpcode();
- int LoadStoreOp = IsStore ?
- getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
+ int LoadStoreOp =
+ IsStore ? getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
if (LoadStoreOp == -1)
return false;
@@ -1261,8 +1261,8 @@ static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST,
.addImm(0) // swz
.cloneMemRefs(*MI);
- const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
- AMDGPU::OpName::vdata_in);
+ const MachineOperand *VDataIn =
+ TII->getNamedOperand(*MI, AMDGPU::OpName::vdata_in);
if (VDataIn)
NewMI.add(*VDataIn);
return true;
@@ -1309,8 +1309,8 @@ void SIRegisterInfo::buildSpillLoadStore(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL,
unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,
MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO,
- RegScavenger *RS, LivePhysRegs *LiveRegs) const {
- assert((!RS || !LiveRegs) && "Only RS or LiveRegs can be set but not both");
+ RegScavenger *RS, LiveRegUnits *LiveUnits) const {
+ assert((!RS || !LiveUnits) && "Only RS or LiveUnits can be set but not both");
MachineFunction *MF = MBB.getParent();
const SIInstrInfo *TII = ST.getInstrInfo();
@@ -1373,20 +1373,20 @@ void SIRegisterInfo::buildSpillLoadStore(
if (ST.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) >= 2) {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e64), TmpVGPR)
- .addReg(SGPRBase)
- .addImm(VOffset)
- .addImm(0); // clamp
+ .addReg(SGPRBase)
+ .addImm(VOffset)
+ .addImm(0); // clamp
} else {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
- .addReg(SGPRBase);
+ .addReg(SGPRBase);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e32), TmpVGPR)
- .addImm(VOffset)
- .addReg(TmpOffsetVGPR);
+ .addImm(VOffset)
+ .addReg(TmpOffsetVGPR);
}
} else {
assert(TmpOffsetVGPR);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
- .addImm(VOffset);
+ .addImm(VOffset);
}
};
@@ -1398,18 +1398,19 @@ void SIRegisterInfo::buildSpillLoadStore(
SOffset = MCRegister();
// We don't have access to the register scavenger if this function is called
- // during PEI::scavengeFrameVirtualRegs() so use LiveRegs in this case.
+ // during PEI::scavengeFrameVirtualRegs() so use LiveUnits in this case.
// TODO: Clobbering SCC is not necessary for scratch instructions in the
// entry.
if (RS) {
- SOffset = RS->scavengeRegisterBackwards(AMDGPU::SGPR_32RegClass, MI, false, 0, false);
+ SOffset = RS->scavengeRegisterBackwards(AMDGPU::SGPR_32RegClass, MI,
+ false, 0, false);
// Piggy back on the liveness scan we just did see if SCC is dead.
CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC);
- } else if (LiveRegs) {
- CanClobberSCC = !LiveRegs->contains(AMDGPU::SCC);
+ } else if (LiveUnits) {
+ CanClobberSCC = LiveUnits->available(AMDGPU::SCC);
for (MCRegister Reg : AMDGPU::SGPR_32RegClass) {
- if (LiveRegs->available(MF->getRegInfo(), Reg)) {
+ if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) {
SOffset = Reg;
break;
}
@@ -1423,11 +1424,12 @@ void SIRegisterInfo::buildSpillLoadStore(
UseVGPROffset = true;
if (RS) {
- TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);
+ TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
+ MI, false, 0);
} else {
- assert(LiveRegs);
+ assert(LiveUnits);
for (MCRegister Reg : AMDGPU::VGPR_32RegClass) {
- if (LiveRegs->available(MF->getRegInfo(), Reg)) {
+ if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) {
TmpOffsetVGPR = Reg;
break;
}
@@ -1471,8 +1473,8 @@ void SIRegisterInfo::buildSpillLoadStore(
} else {
assert(Offset != 0);
auto Add = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
- .addReg(ScratchOffsetReg)
- .addImm(Offset);
+ .addReg(ScratchOffsetReg)
+ .addImm(Offset);
Add->getOperand(3).setIsDead(); // Mark SCC as dead.
}
@@ -1480,8 +1482,8 @@ void SIRegisterInfo::buildSpillLoadStore(
}
if (IsFlat && SOffset == AMDGPU::NoRegister) {
- assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0
- && "Unexpected vaddr for flat scratch with a FI operand");
+ assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0 &&
+ "Unexpected vaddr for flat scratch with a FI operand");
if (UseVGPROffset) {
LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp);
@@ -1518,10 +1520,10 @@ void SIRegisterInfo::buildSpillLoadStore(
}
unsigned NumRegs = EltSize / 4;
- Register SubReg = e == 1
- ? ValueReg
- : Register(getSubReg(ValueReg,
- getSubRegFromChannel(RegOffset / 4, NumRegs)));
+ Register SubReg =
+ e == 1 ? ValueReg
+ : Register(getSubReg(
+ ValueReg, getSubRegFromChannel(RegOffset / 4, NumRegs)));
unsigned SOffsetRegState = 0;
unsigned SrcDstRegState = getDefRegState(!IsStore);
@@ -1552,17 +1554,19 @@ void SIRegisterInfo::buildSpillLoadStore(
LaneE = RegOffset / 4;
Lane >= LaneE; --Lane) {
bool IsSubReg = e > 1 || EltSize > 4;
- Register Sub = IsSubReg
- ? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane)))
- : ValueReg;
+ Register Sub =
+ IsSubReg ? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane)))
+ : ValueReg;
auto MIB = spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill);
if (!MIB.getInstr())
break;
- if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) {
+ if (NeedSuperRegDef ||
+ (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) {
MIB.addReg(ValueReg, RegState::ImplicitDefine);
NeedSuperRegDef = false;
}
- if ((IsSubReg || NeedSuperRegImpOperand) && (IsFirstSubReg || IsLastSubReg)) {
+ if ((IsSubReg || NeedSuperRegImpOperand) &&
+ (IsFirstSubReg || IsLastSubReg)) {
NeedSuperRegImpOperand = true;
unsigned State = SrcDstRegState;
if (!IsLastSubReg || (Lane != LaneE))
@@ -1581,8 +1585,8 @@ void SIRegisterInfo::buildSpillLoadStore(
assert(IsFlat && EltSize > 4);
unsigned NumRegs = RemEltSize / 4;
- SubReg = Register(getSubReg(ValueReg,
- getSubRegFromChannel(RegOffset / 4, NumRegs)));
+ SubReg = Register(
+ getSubReg(ValueReg, getSubRegFromChannel(RegOffset / 4, NumRegs)));
unsigned Opc = getFlatScratchSpillOpcode(TII, LoadStoreOp, RemEltSize);
Desc = &TII->get(Opc);
}
@@ -1597,10 +1601,10 @@ void SIRegisterInfo::buildSpillLoadStore(
assert(MF->getRegInfo().isReserved(TmpIntermediateVGPR));
}
if (IsStore) {
- auto AccRead = BuildMI(MBB, MI, DL,
- TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),
- TmpIntermediateVGPR)
- .addReg(SubReg, getKillRegState(IsKill));
+ auto AccRead =
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),
+ TmpIntermediateVGPR)
+ .addReg(SubReg, getKillRegState(IsKill));
if (NeedSuperRegDef)
AccRead.addReg(ValueReg, RegState::ImplicitDefine);
AccRead->setAsmPrinterFlag(MachineInstr::ReloadReuse);
@@ -1645,8 +1649,7 @@ void SIRegisterInfo::buildSpillLoadStore(
} else {
MIB.addReg(SOffset, SOffsetRegState);
}
- MIB.addImm(Offset + RegOffset)
- .addImm(0); // cpol
+ MIB.addImm(Offset + RegOffset).addImm(0); // cpol
if (!IsFlat)
MIB.addImm(0); // swz
MIB.addMemOperand(NewMMO);
@@ -1765,7 +1768,6 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
bool IsLastSubreg = i == SB.NumSubRegs - 1;
bool UseKill = SB.IsKill && IsLastSubreg;
-
// Mark the "old value of vgpr" input undef only if this is the first sgpr
// spill to this specific vgpr in the first basic block.
auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
@@ -2048,8 +2050,8 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
}
bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
- int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS) const {
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
MachineFunction *MF = MI->getParent()->getParent();
MachineBasicBlock *MBB = MI->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
@@ -2067,507 +2069,507 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
: getFrameRegister(*MF);
switch (MI->getOpcode()) {
- // SGPR register spill
- case AMDGPU::SI_SPILL_S1024_SAVE:
- case AMDGPU::SI_SPILL_S512_SAVE:
- case AMDGPU::SI_SPILL_S384_SAVE:
- case AMDGPU::SI_SPILL_S352_SAVE:
- case AMDGPU::SI_SPILL_S320_SAVE:
- case AMDGPU::SI_SPILL_S288_SAVE:
- case AMDGPU::SI_SPILL_S256_SAVE:
- case AMDGPU::SI_SPILL_S224_SAVE:
- case AMDGPU::SI_SPILL_S192_SAVE:
- case AMDGPU::SI_SPILL_S160_SAVE:
- case AMDGPU::SI_SPILL_S128_SAVE:
- case AMDGPU::SI_SPILL_S96_SAVE:
- case AMDGPU::SI_SPILL_S64_SAVE:
- case AMDGPU::SI_SPILL_S32_SAVE: {
- return spillSGPR(MI, Index, RS);
- }
+ // SGPR register spill
+ case AMDGPU::SI_SPILL_S1024_SAVE:
+ case AMDGPU::SI_SPILL_S512_SAVE:
+ case AMDGPU::SI_SPILL_S384_SAVE:
+ case AMDGPU::SI_SPILL_S352_SAVE:
+ case AMDGPU::SI_SPILL_S320_SAVE:
+ case AMDGPU::SI_SPILL_S288_SAVE:
+ case AMDGPU::SI_SPILL_S256_SAVE:
+ case AMDGPU::SI_SPILL_S224_SAVE:
+ case AMDGPU::SI_SPILL_S192_SAVE:
+ case AMDGPU::SI_SPILL_S160_SAVE:
+ case AMDGPU::SI_SPILL_S128_SAVE:
+ case AMDGPU::SI_SPILL_S96_SAVE:
+ case AMDGPU::SI_SPILL_S64_SAVE:
+ case AMDGPU::SI_SPILL_S32_SAVE: {
+ return spillSGPR(MI, Index, RS);
+ }
- // SGPR register restore
- case AMDGPU::SI_SPILL_S1024_RESTORE:
- case AMDGPU::SI_SPILL_S512_RESTORE:
- case AMDGPU::SI_SPILL_S384_RESTORE:
- case AMDGPU::SI_SPILL_S352_RESTORE:
- case AMDGPU::SI_SPILL_S320_RESTORE:
- case AMDGPU::SI_SPILL_S288_RESTORE:
- case AMDGPU::SI_SPILL_S256_RESTORE:
- case AMDGPU::SI_SPILL_S224_RESTORE:
- case AMDGPU::SI_SPILL_S192_RESTORE:
- case AMDGPU::SI_SPILL_S160_RESTORE:
- case AMDGPU::SI_SPILL_S128_RESTORE:
- case AMDGPU::SI_SPILL_S96_RESTORE:
- case AMDGPU::SI_SPILL_S64_RESTORE:
- case AMDGPU::SI_SPILL_S32_RESTORE: {
- return restoreSGPR(MI, Index, RS);
- }
+ // SGPR register restore
+ case AMDGPU::SI_SPILL_S1024_RESTORE:
+ case AMDGPU::SI_SPILL_S512_RESTORE:
+ case AMDGPU::SI_SPILL_S384_RESTORE:
+ case AMDGPU::SI_SPILL_S352_RESTORE:
+ case AMDGPU::SI_SPILL_S320_RESTORE:
+ case AMDGPU::SI_SPILL_S288_RESTORE:
+ case AMDGPU::SI_SPILL_S256_RESTORE:
+ case AMDGPU::SI_SPILL_S224_RESTORE:
+ case AMDGPU::SI_SPILL_S192_RESTORE:
+ case AMDGPU::SI_SPILL_S160_RESTORE:
+ case AMDGPU::SI_SPILL_S128_RESTORE:
+ case AMDGPU::SI_SPILL_S96_RESTORE:
+ case AMDGPU::SI_SPILL_S64_RESTORE:
+ case AMDGPU::SI_SPILL_S32_RESTORE: {
+ return restoreSGPR(MI, Index, RS);
+ }
- // VGPR register spill
- case AMDGPU::SI_SPILL_V1024_SAVE:
- case AMDGPU::SI_SPILL_V512_SAVE:
- case AMDGPU::SI_SPILL_V384_SAVE:
- case AMDGPU::SI_SPILL_V352_SAVE:
- case AMDGPU::SI_SPILL_V320_SAVE:
- case AMDGPU::SI_SPILL_V288_SAVE:
- case AMDGPU::SI_SPILL_V256_SAVE:
- case AMDGPU::SI_SPILL_V224_SAVE:
- case AMDGPU::SI_SPILL_V192_SAVE:
- case AMDGPU::SI_SPILL_V160_SAVE:
- case AMDGPU::SI_SPILL_V128_SAVE:
- case AMDGPU::SI_SPILL_V96_SAVE:
- case AMDGPU::SI_SPILL_V64_SAVE:
- case AMDGPU::SI_SPILL_V32_SAVE:
- case AMDGPU::SI_SPILL_A1024_SAVE:
- case AMDGPU::SI_SPILL_A512_SAVE:
- case AMDGPU::SI_SPILL_A384_SAVE:
- case AMDGPU::SI_SPILL_A352_SAVE:
- case AMDGPU::SI_SPILL_A320_SAVE:
- case AMDGPU::SI_SPILL_A288_SAVE:
- case AMDGPU::SI_SPILL_A256_SAVE:
- case AMDGPU::SI_SPILL_A224_SAVE:
- case AMDGPU::SI_SPILL_A192_SAVE:
- case AMDGPU::SI_SPILL_A160_SAVE:
- case AMDGPU::SI_SPILL_A128_SAVE:
- case AMDGPU::SI_SPILL_A96_SAVE:
- case AMDGPU::SI_SPILL_A64_SAVE:
- case AMDGPU::SI_SPILL_A32_SAVE:
- case AMDGPU::SI_SPILL_AV1024_SAVE:
- case AMDGPU::SI_SPILL_AV512_SAVE:
- case AMDGPU::SI_SPILL_AV384_SAVE:
- case AMDGPU::SI_SPILL_AV352_SAVE:
- case AMDGPU::SI_SPILL_AV320_SAVE:
- case AMDGPU::SI_SPILL_AV288_SAVE:
- case AMDGPU::SI_SPILL_AV256_SAVE:
- case AMDGPU::SI_SPILL_AV224_SAVE:
- case AMDGPU::SI_SPILL_AV192_SAVE:
- case AMDGPU::SI_SPILL_AV160_SAVE:
- case AMDGPU::SI_SPILL_AV128_SAVE:
- case AMDGPU::SI_SPILL_AV96_SAVE:
- case AMDGPU::SI_SPILL_AV64_SAVE:
- case AMDGPU::SI_SPILL_AV32_SAVE:
- case AMDGPU::SI_SPILL_WWM_V32_SAVE:
- case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {
- const MachineOperand *VData = TII->getNamedOperand(*MI,
- AMDGPU::OpName::vdata);
- assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
- MFI->getStackPtrOffsetReg());
-
- unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
- : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
- auto *MBB = MI->getParent();
- bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
- if (IsWWMRegSpill) {
- TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(),
- RS->isRegUsed(AMDGPU::SCC));
- }
- buildSpillLoadStore(
- *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
- TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
- *MI->memoperands_begin(), RS);
- MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
- if (IsWWMRegSpill)
- TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy());
-
- MI->eraseFromParent();
- return true;
+ // VGPR register spill
+ case AMDGPU::SI_SPILL_V1024_SAVE:
+ case AMDGPU::SI_SPILL_V512_SAVE:
+ case AMDGPU::SI_SPILL_V384_SAVE:
+ case AMDGPU::SI_SPILL_V352_SAVE:
+ case AMDGPU::SI_SPILL_V320_SAVE:
+ case AMDGPU::SI_SPILL_V288_SAVE:
+ case AMDGPU::SI_SPILL_V256_SAVE:
+ case AMDGPU::SI_SPILL_V224_SAVE:
+ case AMDGPU::SI_SPILL_V192_SAVE:
+ case AMDGPU::SI_SPILL_V160_SAVE:
+ case AMDGPU::SI_SPILL_V128_SAVE:
+ case AMDGPU::SI_SPILL_V96_SAVE:
+ case AMDGPU::SI_SPILL_V64_SAVE:
+ case AMDGPU::SI_SPILL_V32_SAVE:
+ case AMDGPU::SI_SPILL_A1024_SAVE:
+ case AMDGPU::SI_SPILL_A512_SAVE:
+ case AMDGPU::SI_SPILL_A384_SAVE:
+ case AMDGPU::SI_SPILL_A352_SAVE:
+ case AMDGPU::SI_SPILL_A320_SAVE:
+ case AMDGPU::SI_SPILL_A288_SAVE:
+ case AMDGPU::SI_SPILL_A256_SAVE:
+ case AMDGPU::SI_SPILL_A224_SAVE:
+ case AMDGPU::SI_SPILL_A192_SAVE:
+ case AMDGPU::SI_SPILL_A160_SAVE:
+ case AMDGPU::SI_SPILL_A128_SAVE:
+ case AMDGPU::SI_SPILL_A96_SAVE:
+ case AMDGPU::SI_SPILL_A64_SAVE:
+ case AMDGPU::SI_SPILL_A32_SAVE:
+ case AMDGPU::SI_SPILL_AV1024_SAVE:
+ case AMDGPU::SI_SPILL_AV512_SAVE:
+ case AMDGPU::SI_SPILL_AV384_SAVE:
+ case AMDGPU::SI_SPILL_AV352_SAVE:
+ case AMDGPU::SI_SPILL_AV320_SAVE:
+ case AMDGPU::SI_SPILL_AV288_SAVE:
+ case AMDGPU::SI_SPILL_AV256_SAVE:
+ case AMDGPU::SI_SPILL_AV224_SAVE:
+ case AMDGPU::SI_SPILL_AV192_SAVE:
+ case AMDGPU::SI_SPILL_AV160_SAVE:
+ case AMDGPU::SI_SPILL_AV128_SAVE:
+ case AMDGPU::SI_SPILL_AV96_SAVE:
+ case AMDGPU::SI_SPILL_AV64_SAVE:
+ case AMDGPU::SI_SPILL_AV32_SAVE:
+ case AMDGPU::SI_SPILL_WWM_V32_SAVE:
+ case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {
+ const MachineOperand *VData =
+ TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
+ assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
+ MFI->getStackPtrOffsetReg());
+
+ unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
+ : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
+ auto *MBB = MI->getParent();
+ bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
+ if (IsWWMRegSpill) {
+ TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(),
+ RS->isRegUsed(AMDGPU::SCC));
}
- case AMDGPU::SI_SPILL_V32_RESTORE:
- case AMDGPU::SI_SPILL_V64_RESTORE:
- case AMDGPU::SI_SPILL_V96_RESTORE:
- case AMDGPU::SI_SPILL_V128_RESTORE:
- case AMDGPU::SI_SPILL_V160_RESTORE:
- case AMDGPU::SI_SPILL_V192_RESTORE:
- case AMDGPU::SI_SPILL_V224_RESTORE:
- case AMDGPU::SI_SPILL_V256_RESTORE:
- case AMDGPU::SI_SPILL_V288_RESTORE:
- case AMDGPU::SI_SPILL_V320_RESTORE:
- case AMDGPU::SI_SPILL_V352_RESTORE:
- case AMDGPU::SI_SPILL_V384_RESTORE:
- case AMDGPU::SI_SPILL_V512_RESTORE:
- case AMDGPU::SI_SPILL_V1024_RESTORE:
- case AMDGPU::SI_SPILL_A32_RESTORE:
- case AMDGPU::SI_SPILL_A64_RESTORE:
- case AMDGPU::SI_SPILL_A96_RESTORE:
- case AMDGPU::SI_SPILL_A128_RESTORE:
- case AMDGPU::SI_SPILL_A160_RESTORE:
- case AMDGPU::SI_SPILL_A192_RESTORE:
- case AMDGPU::SI_SPILL_A224_RESTORE:
- case AMDGPU::SI_SPILL_A256_RESTORE:
- case AMDGPU::SI_SPILL_A288_RESTORE:
- case AMDGPU::SI_SPILL_A320_RESTORE:
- case AMDGPU::SI_SPILL_A352_RESTORE:
- case AMDGPU::SI_SPILL_A384_RESTORE:
- case AMDGPU::SI_SPILL_A512_RESTORE:
- case AMDGPU::SI_SPILL_A1024_RESTORE:
- case AMDGPU::SI_SPILL_AV32_RESTORE:
- case AMDGPU::SI_SPILL_AV64_RESTORE:
- case AMDGPU::SI_SPILL_AV96_RESTORE:
- case AMDGPU::SI_SPILL_AV128_RESTORE:
- case AMDGPU::SI_SPILL_AV160_RESTORE:
- case AMDGPU::SI_SPILL_AV192_RESTORE:
- case AMDGPU::SI_SPILL_AV224_RESTORE:
- case AMDGPU::SI_SPILL_AV256_RESTORE:
- case AMDGPU::SI_SPILL_AV288_RESTORE:
- case AMDGPU::SI_SPILL_AV320_RESTORE:
- case AMDGPU::SI_SPILL_AV352_RESTORE:
- case AMDGPU::SI_SPILL_AV384_RESTORE:
- case AMDGPU::SI_SPILL_AV512_RESTORE:
- case AMDGPU::SI_SPILL_AV1024_RESTORE:
- case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
- case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: {
- const MachineOperand *VData = TII->getNamedOperand(*MI,
- AMDGPU::OpName::vdata);
- assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
- MFI->getStackPtrOffsetReg());
-
- unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
- : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
- auto *MBB = MI->getParent();
- bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
- if (IsWWMRegSpill) {
- TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(),
- RS->isRegUsed(AMDGPU::SCC));
- }
- buildSpillLoadStore(
- *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
- TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
- *MI->memoperands_begin(), RS);
-
- if (IsWWMRegSpill)
- TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy());
-
- MI->eraseFromParent();
- return true;
+ buildSpillLoadStore(
+ *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
+ TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
+ *MI->memoperands_begin(), RS);
+ MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
+ if (IsWWMRegSpill)
+ TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy());
+
+ MI->eraseFromParent();
+ return true;
+ }
+ case AMDGPU::SI_SPILL_V32_RESTORE:
+ case AMDGPU::SI_SPILL_V64_RESTORE:
+ case AMDGPU::SI_SPILL_V96_RESTORE:
+ case AMDGPU::SI_SPILL_V128_RESTORE:
+ case AMDGPU::SI_SPILL_V160_RESTORE:
+ case AMDGPU::SI_SPILL_V192_RESTORE:
+ case AMDGPU::SI_SPILL_V224_RESTORE:
+ case AMDGPU::SI_SPILL_V256_RESTORE:
+ case AMDGPU::SI_SPILL_V288_RESTORE:
+ case AMDGPU::SI_SPILL_V320_RESTORE:
+ case AMDGPU::SI_SPILL_V352_RESTORE:
+ case AMDGPU::SI_SPILL_V384_RESTORE:
+ case AMDGPU::SI_SPILL_V512_RESTORE:
+ case AMDGPU::SI_SPILL_V1024_RESTORE:
+ case AMDGPU::SI_SPILL_A32_RESTORE:
+ case AMDGPU::SI_SPILL_A64_RESTORE:
+ case AMDGPU::SI_SPILL_A96_RESTORE:
+ case AMDGPU::SI_SPILL_A128_RESTORE:
+ case AMDGPU::SI_SPILL_A160_RESTORE:
+ case AMDGPU::SI_SPILL_A192_RESTORE:
+ case AMDGPU::SI_SPILL_A224_RESTORE:
+ case AMDGPU::SI_SPILL_A256_RESTORE:
+ case AMDGPU::SI_SPILL_A288_RESTORE:
+ case AMDGPU::SI_SPILL_A320_RESTORE:
+ case AMDGPU::SI_SPILL_A352_RESTORE:
+ case AMDGPU::SI_SPILL_A384_RESTORE:
+ case AMDGPU::SI_SPILL_A512_RESTORE:
+ case AMDGPU::SI_SPILL_A1024_RESTORE:
+ case AMDGPU::SI_SPILL_AV32_RESTORE:
+ case AMDGPU::SI_SPILL_AV64_RESTORE:
+ case AMDGPU::SI_SPILL_AV96_RESTORE:
+ case AMDGPU::SI_SPILL_AV128_RESTORE:
+ case AMDGPU::SI_SPILL_AV160_RESTORE:
+ case AMDGPU::SI_SPILL_AV192_RESTORE:
+ case AMDGPU::SI_SPILL_AV224_RESTORE:
+ case AMDGPU::SI_SPILL_AV256_RESTORE:
+ case AMDGPU::SI_SPILL_AV288_RESTORE:
+ case AMDGPU::SI_SPILL_AV320_RESTORE:
+ case AMDGPU::SI_SPILL_AV352_RESTORE:
+ case AMDGPU::SI_SPILL_AV384_RESTORE:
+ case AMDGPU::SI_SPILL_AV512_RESTORE:
+ case AMDGPU::SI_SPILL_AV1024_RESTORE:
+ case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
+ case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: {
+ const MachineOperand *VData =
+ TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
+ assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
+ MFI->getStackPtrOffsetReg());
+
+ unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
+ : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
+ auto *MBB = MI->getParent();
+ bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
+ if (IsWWMRegSpill) {
+ TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(),
+ RS->isRegUsed(AMDGPU::SCC));
}
+ buildSpillLoadStore(
+ *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
+ TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
+ *MI->memoperands_begin(), RS);
- default: {
- // Other access to frame index
- const DebugLoc &DL = MI->getDebugLoc();
+ if (IsWWMRegSpill)
+ TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy());
- int64_t Offset = FrameInfo.getObjectOffset(Index);
- if (ST.enableFlatScratch()) {
- if (TII->isFLATScratch(*MI)) {
- assert((int16_t)FIOperandNum ==
- AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::saddr));
+ MI->eraseFromParent();
+ return true;
+ }
- // The offset is always swizzled, just replace it
- if (FrameReg)
- FIOp.ChangeToRegister(FrameReg, false);
+ default: {
+ // Other access to frame index
+ const DebugLoc &DL = MI->getDebugLoc();
- if (!Offset)
- return false;
+ int64_t Offset = FrameInfo.getObjectOffset(Index);
+ if (ST.enableFlatScratch()) {
+ if (TII->isFLATScratch(*MI)) {
+ assert(
+ (int16_t)FIOperandNum ==
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr));
+
+ // The offset is always swizzled, just replace it
+ if (FrameReg)
+ FIOp.ChangeToRegister(FrameReg, false);
+
+ if (!Offset)
+ return false;
- MachineOperand *OffsetOp =
+ MachineOperand *OffsetOp =
TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
- int64_t NewOffset = Offset + OffsetOp->getImm();
- if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
- SIInstrFlags::FlatScratch)) {
- OffsetOp->setImm(NewOffset);
- if (FrameReg)
- return false;
- Offset = 0;
- }
+ int64_t NewOffset = Offset + OffsetOp->getImm();
+ if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
+ SIInstrFlags::FlatScratch)) {
+ OffsetOp->setImm(NewOffset);
+ if (FrameReg)
+ return false;
+ Offset = 0;
+ }
- if (!Offset) {
- unsigned Opc = MI->getOpcode();
- int NewOpc = -1;
- if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr)) {
- NewOpc = AMDGPU::getFlatScratchInstSVfromSVS(Opc);
- } else if (ST.hasFlatScratchSTMode()) {
- // On GFX10 we have ST mode to use no registers for an address.
- // Otherwise we need to materialize 0 into an SGPR.
- NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc);
- }
+ if (!Offset) {
+ unsigned Opc = MI->getOpcode();
+ int NewOpc = -1;
+ if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr)) {
+ NewOpc = AMDGPU::getFlatScratchInstSVfromSVS(Opc);
+ } else if (ST.hasFlatScratchSTMode()) {
+ // On GFX10 we have ST mode to use no registers for an address.
+ // Otherwise we need to materialize 0 into an SGPR.
+ NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc);
+ }
- if (NewOpc != -1) {
- // removeOperand doesn't fixup tied operand indexes as it goes, so
- // it asserts. Untie vdst_in for now and retie them afterwards.
- int VDstIn = AMDGPU::getNamedOperandIdx(Opc,
- AMDGPU::OpName::vdst_in);
- bool TiedVDst = VDstIn != -1 &&
- MI->getOperand(VDstIn).isReg() &&
- MI->getOperand(VDstIn).isTied();
- if (TiedVDst)
- MI->untieRegOperand(VDstIn);
-
- MI->removeOperand(
- AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
-
- if (TiedVDst) {
- int NewVDst =
- AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
- int NewVDstIn =
- AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
- assert (NewVDst != -1 && NewVDstIn != -1 && "Must be tied!");
- MI->tieOperands(NewVDst, NewVDstIn);
- }
- MI->setDesc(TII->get(NewOpc));
- return false;
+ if (NewOpc != -1) {
+ // removeOperand doesn't fixup tied operand indexes as it goes, so
+ // it asserts. Untie vdst_in for now and retie them afterwards.
+ int VDstIn =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
+ bool TiedVDst = VDstIn != -1 && MI->getOperand(VDstIn).isReg() &&
+ MI->getOperand(VDstIn).isTied();
+ if (TiedVDst)
+ MI->untieRegOperand(VDstIn);
+
+ MI->removeOperand(
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
+
+ if (TiedVDst) {
+ int NewVDst =
+ AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
+ int NewVDstIn =
+ AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
+ assert(NewVDst != -1 && NewVDstIn != -1 && "Must be tied!");
+ MI->tieOperands(NewVDst, NewVDstIn);
}
+ MI->setDesc(TII->get(NewOpc));
+ return false;
}
}
+ }
- if (!FrameReg) {
- FIOp.ChangeToImmediate(Offset);
- if (TII->isImmOperandLegal(*MI, FIOperandNum, FIOp))
- return false;
- }
+ if (!FrameReg) {
+ FIOp.ChangeToImmediate(Offset);
+ if (TII->isImmOperandLegal(*MI, FIOperandNum, FIOp))
+ return false;
+ }
- // We need to use register here. Check if we can use an SGPR or need
- // a VGPR.
- FIOp.ChangeToRegister(AMDGPU::M0, false);
- bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, &FIOp);
+ // We need to use register here. Check if we can use an SGPR or need
+ // a VGPR.
+ FIOp.ChangeToRegister(AMDGPU::M0, false);
+ bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, &FIOp);
- if (!Offset && FrameReg && UseSGPR) {
- FIOp.setReg(FrameReg);
- return false;
- }
+ if (!Offset && FrameReg && UseSGPR) {
+ FIOp.setReg(FrameReg);
+ return false;
+ }
- const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass
- : &AMDGPU::VGPR_32RegClass;
+ const TargetRegisterClass *RC =
+ UseSGPR ? &AMDGPU::SReg_32_XM0RegClass : &AMDGPU::VGPR_32RegClass;
- Register TmpReg =
- RS->scavengeRegisterBackwards(*RC, MI, false, 0, !UseSGPR);
- FIOp.setReg(TmpReg);
- FIOp.setIsKill();
+ Register TmpReg =
+ RS->scavengeRegisterBackwards(*RC, MI, false, 0, !UseSGPR);
+ FIOp.setReg(TmpReg);
+ FIOp.setIsKill();
- if ((!FrameReg || !Offset) && TmpReg) {
- unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
- auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg);
- if (FrameReg)
- MIB.addReg(FrameReg);
- else
- MIB.addImm(Offset);
+ if ((!FrameReg || !Offset) && TmpReg) {
+ unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
+ auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg);
+ if (FrameReg)
+ MIB.addReg(FrameReg);
+ else
+ MIB.addImm(Offset);
- return false;
- }
+ return false;
+ }
- bool NeedSaveSCC =
- RS->isRegUsed(AMDGPU::SCC) && !MI->definesRegister(AMDGPU::SCC);
+ bool NeedSaveSCC =
+ RS->isRegUsed(AMDGPU::SCC) && !MI->definesRegister(AMDGPU::SCC);
- Register TmpSReg =
- UseSGPR ? TmpReg
- : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
- MI, false, 0, !UseSGPR);
+ Register TmpSReg =
+ UseSGPR ? TmpReg
+ : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
+ MI, false, 0, !UseSGPR);
- // TODO: for flat scratch another attempt can be made with a VGPR index
- // if no SGPRs can be scavenged.
- if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
- report_fatal_error("Cannot scavenge register in FI elimination!");
+ // TODO: for flat scratch another attempt can be made with a VGPR index
+ // if no SGPRs can be scavenged.
+ if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
+ report_fatal_error("Cannot scavenge register in FI elimination!");
- if (!TmpSReg) {
- // Use frame register and restore it after.
- TmpSReg = FrameReg;
- FIOp.setReg(FrameReg);
- FIOp.setIsKill(false);
- }
+ if (!TmpSReg) {
+ // Use frame register and restore it after.
+ TmpSReg = FrameReg;
+ FIOp.setReg(FrameReg);
+ FIOp.setIsKill(false);
+ }
- if (NeedSaveSCC) {
- assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!");
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADDC_U32), TmpSReg)
- .addReg(FrameReg)
- .addImm(Offset);
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITCMP1_B32))
- .addReg(TmpSReg)
- .addImm(0);
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITSET0_B32), TmpSReg)
+ if (NeedSaveSCC) {
+ assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!");
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADDC_U32), TmpSReg)
+ .addReg(FrameReg)
+ .addImm(Offset);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITCMP1_B32))
+ .addReg(TmpSReg)
+ .addImm(0);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITSET0_B32), TmpSReg)
+ .addImm(0)
+ .addReg(TmpSReg);
+ } else {
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
+ .addReg(FrameReg)
+ .addImm(Offset);
+ }
+
+ if (!UseSGPR)
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
+ .addReg(TmpSReg, RegState::Kill);
+
+ if (TmpSReg == FrameReg) {
+ // Undo frame register modification.
+ if (NeedSaveSCC && !MI->registerDefIsDead(AMDGPU::SCC)) {
+ MachineBasicBlock::iterator I =
+ BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADDC_U32),
+ TmpSReg)
+ .addReg(FrameReg)
+ .addImm(-Offset);
+ I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32))
+ .addReg(TmpSReg)
+ .addImm(0);
+ BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32),
+ TmpSReg)
.addImm(0)
.addReg(TmpSReg);
} else {
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
+ BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
+ FrameReg)
.addReg(FrameReg)
- .addImm(Offset);
+ .addImm(-Offset);
}
+ }
- if (!UseSGPR)
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
- .addReg(TmpSReg, RegState::Kill);
-
- if (TmpSReg == FrameReg) {
- // Undo frame register modification.
- if (NeedSaveSCC && !MI->registerDefIsDead(AMDGPU::SCC)) {
- MachineBasicBlock::iterator I =
- BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADDC_U32),
- TmpSReg)
- .addReg(FrameReg)
- .addImm(-Offset);
- I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32))
- .addReg(TmpSReg)
- .addImm(0);
- BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32),
- TmpSReg)
- .addImm(0)
- .addReg(TmpSReg);
- } else {
- BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
- FrameReg)
- .addReg(FrameReg)
- .addImm(-Offset);
- }
- }
+ return false;
+ }
- return false;
- }
+ bool IsMUBUF = TII->isMUBUF(*MI);
+
+ if (!IsMUBUF && !MFI->isEntryFunction()) {
+ // Convert to a swizzled stack address by scaling by the wave size.
+ // In an entry function/kernel the offset is already swizzled.
+ bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum));
+ bool LiveSCC =
+ RS->isRegUsed(AMDGPU::SCC) && !MI->definesRegister(AMDGPU::SCC);
+ const TargetRegisterClass *RC = IsSALU && !LiveSCC
+ ? &AMDGPU::SReg_32RegClass
+ : &AMDGPU::VGPR_32RegClass;
+ bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
+ MI->getOpcode() == AMDGPU::V_MOV_B32_e64;
+ Register ResultReg =
+ IsCopy ? MI->getOperand(0).getReg()
+ : RS->scavengeRegisterBackwards(*RC, MI, false, 0);
- bool IsMUBUF = TII->isMUBUF(*MI);
-
- if (!IsMUBUF && !MFI->isEntryFunction()) {
- // Convert to a swizzled stack address by scaling by the wave size.
- // In an entry function/kernel the offset is already swizzled.
- bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum));
- bool LiveSCC =
- RS->isRegUsed(AMDGPU::SCC) && !MI->definesRegister(AMDGPU::SCC);
- const TargetRegisterClass *RC = IsSALU && !LiveSCC
- ? &AMDGPU::SReg_32RegClass
- : &AMDGPU::VGPR_32RegClass;
- bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
- MI->getOpcode() == AMDGPU::V_MOV_B32_e64;
- Register ResultReg =
- IsCopy ? MI->getOperand(0).getReg()
- : RS->scavengeRegisterBackwards(*RC, MI, false, 0);
-
- int64_t Offset = FrameInfo.getObjectOffset(Index);
- if (Offset == 0) {
- unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32
- : AMDGPU::V_LSHRREV_B32_e64;
- // XXX - This never happens because of emergency scavenging slot at 0?
- auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), ResultReg)
- .addImm(ST.getWavefrontSizeLog2())
- .addReg(FrameReg);
- if (IsSALU && !LiveSCC)
- Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead.
- if (IsSALU && LiveSCC) {
- Register NewDest = RS->scavengeRegisterBackwards(
- AMDGPU::SReg_32RegClass, Shift, false, 0);
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
- NewDest)
- .addReg(ResultReg);
- ResultReg = NewDest;
- }
- } else {
- MachineInstrBuilder MIB;
- if (!IsSALU) {
- if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) !=
- nullptr) {
- // Reuse ResultReg in intermediate step.
- Register ScaledReg = ResultReg;
-
- BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
- ScaledReg)
+ int64_t Offset = FrameInfo.getObjectOffset(Index);
+ if (Offset == 0) {
+ unsigned OpCode =
+ IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32 : AMDGPU::V_LSHRREV_B32_e64;
+ // XXX - This never happens because of emergency scavenging slot at 0?
+ auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), ResultReg)
+ .addImm(ST.getWavefrontSizeLog2())
+ .addReg(FrameReg);
+ if (IsSALU && !LiveSCC)
+ Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead.
+ if (IsSALU && LiveSCC) {
+ Register NewDest = RS->scavengeRegisterBackwards(
+ AMDGPU::SReg_32RegClass, Shift, false, 0);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), NewDest)
+ .addReg(ResultReg);
+ ResultReg = NewDest;
+ }
+ } else {
+ MachineInstrBuilder MIB;
+ if (!IsSALU) {
+ if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) !=
+ nullptr) {
+ // Reuse ResultReg in intermediate step.
+ Register ScaledReg = ResultReg;
+
+ BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
+ ScaledReg)
.addImm(ST.getWavefrontSizeLog2())
.addReg(FrameReg);
- const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
-
- // TODO: Fold if use instruction is another add of a constant.
- if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
- // FIXME: This can fail
- MIB.addImm(Offset);
- MIB.addReg(ScaledReg, RegState::Kill);
- if (!IsVOP2)
- MIB.addImm(0); // clamp bit
- } else {
- assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 &&
- "Need to reuse carry out register");
-
- // Use scavenged unused carry out as offset register.
- Register ConstOffsetReg;
- if (!isWave32)
- ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
- else
- ConstOffsetReg = MIB.getReg(1);
-
- BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
- .addImm(Offset);
- MIB.addReg(ConstOffsetReg, RegState::Kill);
- MIB.addReg(ScaledReg, RegState::Kill);
+ const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
+
+ // TODO: Fold if use instruction is another add of a constant.
+ if (IsVOP2 ||
+ AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
+ // FIXME: This can fail
+ MIB.addImm(Offset);
+ MIB.addReg(ScaledReg, RegState::Kill);
+ if (!IsVOP2)
MIB.addImm(0); // clamp bit
- }
+ } else {
+ assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 &&
+ "Need to reuse carry out register");
+
+ // Use scavenged unused carry out as offset register.
+ Register ConstOffsetReg;
+ if (!isWave32)
+ ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
+ else
+ ConstOffsetReg = MIB.getReg(1);
+
+ BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32),
+ ConstOffsetReg)
+ .addImm(Offset);
+ MIB.addReg(ConstOffsetReg, RegState::Kill);
+ MIB.addReg(ScaledReg, RegState::Kill);
+ MIB.addImm(0); // clamp bit
}
}
- if (!MIB || IsSALU) {
- // We have to produce a carry out, and there isn't a free SGPR pair
- // for it. We can keep the whole computation on the SALU to avoid
- // clobbering an additional register at the cost of an extra mov.
-
- // We may have 1 free scratch SGPR even though a carry out is
- // unavailable. Only one additional mov is needed.
- Register TmpScaledReg = RS->scavengeRegisterBackwards(
- AMDGPU::SReg_32_XM0RegClass, MI, false, 0, false);
- Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
-
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
+ }
+ if (!MIB || IsSALU) {
+ // We have to produce a carry out, and there isn't a free SGPR pair
+ // for it. We can keep the whole computation on the SALU to avoid
+ // clobbering an additional register at the cost of an extra mov.
+
+ // We may have 1 free scratch SGPR even though a carry out is
+ // unavailable. Only one additional mov is needed.
+ Register TmpScaledReg = RS->scavengeRegisterBackwards(
+ AMDGPU::SReg_32_XM0RegClass, MI, false, 0, false);
+ Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
+
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
.addReg(FrameReg)
.addImm(ST.getWavefrontSizeLog2());
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
+ .addReg(ScaledReg, RegState::Kill)
+ .addImm(Offset);
+ if (!IsSALU)
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
+ .addReg(ScaledReg, RegState::Kill);
+ else
+ ResultReg = ScaledReg;
+
+ // If there were truly no free SGPRs, we need to undo everything.
+ if (!TmpScaledReg.isValid()) {
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
- .addReg(ScaledReg, RegState::Kill)
- .addImm(Offset);
- if (!IsSALU)
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
- .addReg(ScaledReg, RegState::Kill);
- else
- ResultReg = ScaledReg;
-
- // If there were truly no free SGPRs, we need to undo everything.
- if (!TmpScaledReg.isValid()) {
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
.addReg(ScaledReg, RegState::Kill)
.addImm(-Offset);
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
.addReg(FrameReg)
.addImm(ST.getWavefrontSizeLog2());
- }
}
}
-
- // Don't introduce an extra copy if we're just materializing in a mov.
- if (IsCopy) {
- MI->eraseFromParent();
- return true;
- }
- FIOp.ChangeToRegister(ResultReg, false, false, true);
- return false;
}
- if (IsMUBUF) {
- // Disable offen so we don't need a 0 vgpr base.
- assert(static_cast<int>(FIOperandNum) ==
- AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::vaddr));
+ // Don't introduce an extra copy if we're just materializing in a mov.
+ if (IsCopy) {
+ MI->eraseFromParent();
+ return true;
+ }
+ FIOp.ChangeToRegister(ResultReg, false, false, true);
+ return false;
+ }
- auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);
- assert((SOffset.isImm() && SOffset.getImm() == 0));
+ if (IsMUBUF) {
+ // Disable offen so we don't need a 0 vgpr base.
+ assert(
+ static_cast<int>(FIOperandNum) ==
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr));
- if (FrameReg != AMDGPU::NoRegister)
- SOffset.ChangeToRegister(FrameReg, false);
+ auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);
+ assert((SOffset.isImm() && SOffset.getImm() == 0));
- int64_t Offset = FrameInfo.getObjectOffset(Index);
- int64_t OldImm
- = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
- int64_t NewOffset = OldImm + Offset;
+ if (FrameReg != AMDGPU::NoRegister)
+ SOffset.ChangeToRegister(FrameReg, false);
- if (SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) &&
- buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
- MI->eraseFromParent();
- return true;
- }
+ int64_t Offset = FrameInfo.getObjectOffset(Index);
+ int64_t OldImm =
+ TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
+ int64_t NewOffset = OldImm + Offset;
+
+ if (SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) &&
+ buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
+ MI->eraseFromParent();
+ return true;
}
+ }
- // If the offset is simply too big, don't convert to a scratch wave offset
- // relative index.
+ // If the offset is simply too big, don't convert to a scratch wave offset
+ // relative index.
- FIOp.ChangeToImmediate(Offset);
- if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
- Register TmpReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
- MI, false, 0);
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
+ FIOp.ChangeToImmediate(Offset);
+ if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
+ Register TmpReg =
+ RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
.addImm(Offset);
- FIOp.ChangeToRegister(TmpReg, false, false, true);
- }
+ FIOp.ChangeToRegister(TmpReg, false, false, true);
}
}
+ }
return false;
}
@@ -2895,11 +2897,10 @@ bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
OpType <= AMDGPU::OPERAND_SRC_LAST;
}
-bool SIRegisterInfo::shouldRewriteCopySrc(
- const TargetRegisterClass *DefRC,
- unsigned DefSubReg,
- const TargetRegisterClass *SrcRC,
- unsigned SrcSubReg) const {
+bool SIRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
+ unsigned DefSubReg,
+ const TargetRegisterClass *SrcRC,
+ unsigned SrcSubReg) const {
// We want to prefer the smallest register class possible, so we don't want to
// stop and rewrite on anything that looks like a subregister
// extract. Operations mostly don't care about the super register class, so we
@@ -2954,8 +2955,9 @@ bool SIRegisterInfo::isUniformReg(const MachineRegisterInfo &MRI,
return !RBI.isDivergentRegBank(RB);
}
-ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
- unsigned EltSize) const {
+ArrayRef<int16_t>
+SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
+ unsigned EltSize) const {
const unsigned RegBitWidth = AMDGPU::getRegBitWidth(*RC);
assert(RegBitWidth >= 32 && RegBitWidth <= 1024);
@@ -2969,7 +2971,7 @@ ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC
return ArrayRef(Parts.data(), NumParts);
}
-const TargetRegisterClass*
+const TargetRegisterClass *
SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
Register Reg) const {
return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegBaseClass(Reg);
@@ -2997,13 +2999,10 @@ bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI,
return RC && isAGPRClass(RC);
}
-bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
- const TargetRegisterClass *SrcRC,
- unsigned SubReg,
- const TargetRegisterClass *DstRC,
- unsigned DstSubReg,
- const TargetRegisterClass *NewRC,
- LiveIntervals &LIS) const {
+bool SIRegisterInfo::shouldCoalesce(
+ MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg,
+ const TargetRegisterClass *DstRC, unsigned DstSubReg,
+ const TargetRegisterClass *NewRC, LiveIntervals &LIS) const {
unsigned SrcSize = getRegSizeInBits(*SrcRC);
unsigned DstSize = getRegSizeInBits(*DstRC);
unsigned NewSize = getRegSizeInBits(*NewRC);
@@ -3022,8 +3021,8 @@ unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
- MF.getFunction());
+ unsigned Occupancy =
+ ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(), MF.getFunction());
switch (RC->getID()) {
default:
return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
@@ -3052,7 +3051,7 @@ unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
}
const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
- static const int Empty[] = { -1 };
+ static const int Empty[] = {-1};
if (RegPressureIgnoredUnits[RegUnit])
return Empty;
@@ -3060,7 +3059,8 @@ const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
}
-MCRegister SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const {
+MCRegister
+SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const {
// Not a callee saved register.
return AMDGPU::SGPR30_SGPR31;
}
@@ -3084,11 +3084,10 @@ SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size,
}
}
-const TargetRegisterClass *
-SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
- const MachineRegisterInfo &MRI) const {
+const TargetRegisterClass *SIRegisterInfo::getConstrainedRegClassForOperand(
+ const MachineOperand &MO, const MachineRegisterInfo &MRI) const {
const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg());
- if (const RegisterBank *RB = RCOrRB.dyn_cast<const RegisterBank*>())
+ if (const RegisterBank *RB = RCOrRB.dyn_cast<const RegisterBank *>())
return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB);
if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>())
@@ -3111,14 +3110,13 @@ const TargetRegisterClass *SIRegisterInfo::getVGPR64Class() const {
: &AMDGPU::VReg_64RegClass;
}
-const TargetRegisterClass *
-SIRegisterInfo::getRegClass(unsigned RCID) const {
+const TargetRegisterClass *SIRegisterInfo::getRegClass(unsigned RCID) const {
switch ((int)RCID) {
case AMDGPU::SReg_1RegClassID:
return getBoolRC();
case AMDGPU::SReg_1_XEXECRegClassID:
return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
- : &AMDGPU::SReg_64_XEXECRegClass;
+ : &AMDGPU::SReg_64_XEXECRegClass;
case -1:
return nullptr;
default:
@@ -3183,15 +3181,15 @@ MachineInstr *SIRegisterInfo::findReachingDef(Register Reg, unsigned SubReg,
MCPhysReg SIRegisterInfo::get32BitRegister(MCPhysReg Reg) const {
assert(getRegSizeInBits(*getPhysRegBaseClass(Reg)) <= 32);
- for (const TargetRegisterClass &RC : { AMDGPU::VGPR_32RegClass,
- AMDGPU::SReg_32RegClass,
- AMDGPU::AGPR_32RegClass } ) {
+ for (const TargetRegisterClass &RC :
+ {AMDGPU::VGPR_32RegClass, AMDGPU::SReg_32RegClass,
+ AMDGPU::AGPR_32RegClass}) {
if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
return Super;
}
- if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
- &AMDGPU::VGPR_32RegClass)) {
- return Super;
+ if (MCPhysReg Super =
+ getMatchingSuperReg(Reg, AMDGPU::hi16, &AMDGPU::VGPR_32RegClass)) {
+ return Super;
}
return AMDGPU::NoRegister;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 57cdcb29fac4807..0226f66957d991b 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -23,7 +23,7 @@ namespace llvm {
class GCNSubtarget;
class LiveIntervals;
-class LivePhysRegs;
+class LiveRegUnits;
class RegisterBank;
struct SGPRSpillBuilder;
@@ -66,9 +66,7 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
/// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1);
- bool spillSGPRToVGPR() const {
- return SpillSGPRToVGPR;
- }
+ bool spillSGPRToVGPR() const { return SpillSGPRToVGPR; }
/// Return the largest available SGPR aligned to \p Align for the register
/// class \p RC.
@@ -92,9 +90,7 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
// Stack access is very expensive. CSRs are also the high registers, and we
// want to minimize the number of used registers.
- unsigned getCSRFirstUseCost() const override {
- return 100;
- }
+ unsigned getCSRFirstUseCost() const override { return 100; }
const TargetRegisterClass *
getLargestLegalSuperClass(const TargetRegisterClass *RC,
@@ -110,7 +106,7 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
bool requiresFrameIndexReplacementScavenging(
- const MachineFunction &MF) const override;
+ const MachineFunction &MF) const override;
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override;
int64_t getScratchInstrOffset(const MachineInstr *MI) const;
@@ -129,8 +125,9 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
int64_t Offset) const override;
- const TargetRegisterClass *getPointerRegClass(
- const MachineFunction &MF, unsigned Kind = 0) const override;
+ const TargetRegisterClass *
+ getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind = 0) const override;
/// Returns a legal register class to copy a register in the specified class
/// to or from. If it is possible to copy the register directly without using
@@ -305,12 +302,9 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
unsigned EltSize) const;
- bool shouldCoalesce(MachineInstr *MI,
- const TargetRegisterClass *SrcRC,
- unsigned SubReg,
- const TargetRegisterClass *DstRC,
- unsigned DstSubReg,
- const TargetRegisterClass *NewRC,
+ bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC,
+ unsigned SubReg, const TargetRegisterClass *DstRC,
+ unsigned DstSubReg, const TargetRegisterClass *NewRC,
LiveIntervals &LIS) const override;
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
@@ -331,13 +325,11 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
return getRegClassForSizeOnBank(Ty.getSizeInBits(), Bank);
}
- const TargetRegisterClass *
- getConstrainedRegClassForOperand(const MachineOperand &MO,
- const MachineRegisterInfo &MRI) const override;
+ const TargetRegisterClass *getConstrainedRegClassForOperand(
+ const MachineOperand &MO, const MachineRegisterInfo &MRI) const override;
const TargetRegisterClass *getBoolRC() const {
- return isWave32 ? &AMDGPU::SReg_32RegClass
- : &AMDGPU::SReg_64RegClass;
+ return isWave32 ? &AMDGPU::SReg_32RegClass : &AMDGPU::SReg_64RegClass;
}
const TargetRegisterClass *getWaveMaskRegClass() const {
@@ -357,8 +349,7 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
// Find reaching register definition
MachineInstr *findReachingDef(Register Reg, unsigned SubReg,
- MachineInstr &Use,
- MachineRegisterInfo &MRI,
+ MachineInstr &Use, MachineRegisterInfo &MRI,
LiveIntervals *LIS) const;
const uint32_t *getAllVGPRRegMask() const;
@@ -415,14 +406,14 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
// Insert spill or restore instructions.
// When lowering spill pseudos, the RegScavenger should be set.
// For creating spill instructions during frame lowering, where no scavenger
- // is available, LiveRegs can be used.
+ // is available, LiveUnits can be used.
void buildSpillLoadStore(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, const DebugLoc &DL,
unsigned LoadStoreOp, int Index, Register ValueReg,
bool ValueIsKill, MCRegister ScratchOffsetReg,
int64_t InstrOffset, MachineMemOperand *MMO,
RegScavenger *RS,
- LivePhysRegs *LiveRegs = nullptr) const;
+ LiveRegUnits *LiveUnits = nullptr) const;
// Return alignment in register file of first register in a register tuple.
unsigned getRegClassAlignmentNumBits(const TargetRegisterClass *RC) const {
More information about the llvm-commits
mailing list