[llvm] 9569d5b - [AMDGPU] Allow buildSpillLoadStore in empty bb
Sebastian Neubauer via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 29 03:54:11 PDT 2021
Author: Sebastian Neubauer
Date: 2021-04-29T12:53:20+02:00
New Revision: 9569d5ba02697f64dda86591cb202f8a4390f710
URL: https://github.com/llvm/llvm-project/commit/9569d5ba02697f64dda86591cb202f8a4390f710
DIFF: https://github.com/llvm/llvm-project/commit/9569d5ba02697f64dda86591cb202f8a4390f710.diff
LOG: [AMDGPU] Allow buildSpillLoadStore in empty bb
This allows calling buildSpillLoadStore for an empty basic block, where
MI points at the end of the block instead of to an instruction.
This only happens with downstream CFI changes, so I was not able to
create a testcase that works with upstream LLVM.
Differential Revision: https://reviews.llvm.org/D101356
Added:
Modified:
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index c31bc10155846..c8ac34dc15231 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -118,6 +118,7 @@ static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF,
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
const SIMachineFunctionInfo &FuncInfo,
LivePhysRegs &LiveRegs, MachineFunction &MF,
+ MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, Register SpillReg,
int FI) {
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
@@ -129,7 +130,7 @@ static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
FrameInfo.getObjectAlign(FI));
LiveRegs.addReg(SpillReg);
- TRI.buildSpillLoadStore(I, Opc, FI, SpillReg, true,
+ TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, true,
FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
&LiveRegs);
LiveRegs.removeReg(SpillReg);
@@ -139,6 +140,7 @@ static void buildEpilogRestore(const GCNSubtarget &ST,
const SIRegisterInfo &TRI,
const SIMachineFunctionInfo &FuncInfo,
LivePhysRegs &LiveRegs, MachineFunction &MF,
+ MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, Register SpillReg,
int FI) {
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
@@ -149,7 +151,7 @@ static void buildEpilogRestore(const GCNSubtarget &ST,
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
FrameInfo.getObjectAlign(FI));
- TRI.buildSpillLoadStore(I, Opc, FI, SpillReg, false,
+ TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, false,
FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
&LiveRegs);
}
@@ -745,7 +747,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI,
/*IsProlog*/ true);
- buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, Reg.VGPR, *Reg.FI);
+ buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR,
+ *Reg.FI);
}
// VGPRs used for Whole Wave Mode
@@ -759,7 +762,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
ScratchExecCopy =
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true);
- buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, VGPR, *FI);
+ buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI);
}
if (ScratchExecCopy) {
@@ -785,7 +788,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
.addReg(FramePtrReg);
- buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR,
+ buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
FramePtrFI);
}
@@ -803,7 +806,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
.addReg(BasePtrReg);
- buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR,
+ buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
BasePtrFI);
}
@@ -996,7 +999,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
if (!TmpVGPR)
report_fatal_error("failed to find free scratch register");
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR,
+ buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
FramePtrFI);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg)
.addReg(TmpVGPR, RegState::Kill);
@@ -1022,7 +1025,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
if (!TmpVGPR)
report_fatal_error("failed to find free scratch register");
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR,
+ buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
BasePtrFI);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg)
.addReg(TmpVGPR, RegState::Kill);
@@ -1048,7 +1051,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
ScratchExecCopy =
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, Reg.VGPR,
+ buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR,
*Reg.FI);
}
@@ -1062,7 +1065,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
ScratchExecCopy =
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, VGPR, *FI);
+ buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI);
}
if (ScratchExecCopy) {
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 2db5180e04bf9..583cd45707e0f 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -915,13 +915,11 @@ static int getOffsetMUBUFLoad(unsigned Opc) {
}
static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST,
+ MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- int Index,
- unsigned Lane,
- unsigned ValueReg,
- bool IsKill) {
- MachineBasicBlock *MBB = MI->getParent();
- MachineFunction *MF = MI->getParent()->getParent();
+ int Index, unsigned Lane,
+ unsigned ValueReg, bool IsKill) {
+ MachineFunction *MF = MBB.getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
const SIInstrInfo *TII = ST.getInstrInfo();
@@ -939,8 +937,8 @@ static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST,
unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
: AMDGPU::V_ACCVGPR_READ_B32_e64;
- auto MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
- .addReg(Src, getKillRegState(IsKill));
+ auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
+ .addReg(Src, getKillRegState(IsKill));
MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
return MIB;
}
@@ -964,7 +962,7 @@ static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST,
return false;
const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
- if (spillVGPRtoAGPR(ST, MI, Index, 0, Reg->getReg(), false).getInstr())
+ if (spillVGPRtoAGPR(ST, *MBB, MI, Index, 0, Reg->getReg(), false).getInstr())
return true;
MachineInstrBuilder NewMI =
@@ -1021,20 +1019,19 @@ static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII,
}
void SIRegisterInfo::buildSpillLoadStore(
- MachineBasicBlock::iterator MI, unsigned LoadStoreOp, int Index,
- Register ValueReg, bool IsKill, MCRegister ScratchOffsetReg,
- int64_t InstOffset, MachineMemOperand *MMO, RegScavenger *RS,
- LivePhysRegs *LiveRegs) const {
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,
+ MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO,
+ RegScavenger *RS, LivePhysRegs *LiveRegs) const {
assert((!RS || !LiveRegs) && "Only RS or LiveRegs can be set but not both");
- MachineBasicBlock *MBB = MI->getParent();
- MachineFunction *MF = MI->getParent()->getParent();
+ MachineFunction *MF = MBB.getParent();
const SIInstrInfo *TII = ST.getInstrInfo();
const MachineFrameInfo &MFI = MF->getFrameInfo();
const SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
const MCInstrDesc *Desc = &TII->get(LoadStoreOp);
- const DebugLoc &DL = MI->getDebugLoc();
+ const DebugLoc &DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
bool IsStore = Desc->mayStore();
bool IsFlat = TII->isFLATScratch(LoadStoreOp);
@@ -1114,10 +1111,9 @@ void SIRegisterInfo::buildSpillLoadStore(
report_fatal_error("could not scavenge SGPR to spill in entry function");
if (ScratchOffsetReg == AMDGPU::NoRegister) {
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset)
- .addImm(Offset);
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset);
} else {
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
.addReg(ScratchOffsetReg)
.addImm(Offset);
}
@@ -1170,7 +1166,7 @@ void SIRegisterInfo::buildSpillLoadStore(
Register Sub = IsSubReg
? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane)))
: ValueReg;
- auto MIB = spillVGPRtoAGPR(ST, MI, Index, Lane, Sub, IsKill);
+ auto MIB = spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill);
if (!MIB.getInstr())
break;
if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == 0)) {
@@ -1216,9 +1212,9 @@ void SIRegisterInfo::buildSpillLoadStore(
RS->setRegUsed(TmpReg);
}
if (IsStore) {
- auto AccRead = BuildMI(*MBB, MI, DL,
- TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TmpReg)
- .addReg(SubReg, getKillRegState(IsKill));
+ auto AccRead = BuildMI(MBB, MI, DL,
+ TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TmpReg)
+ .addReg(SubReg, getKillRegState(IsKill));
if (NeedSuperRegDef)
AccRead.addReg(ValueReg, RegState::ImplicitDefine);
AccRead->setAsmPrinterFlag(MachineInstr::ReloadReuse);
@@ -1231,9 +1227,9 @@ void SIRegisterInfo::buildSpillLoadStore(
MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize,
commonAlignment(Alignment, RemRegOffset));
- auto MIB = BuildMI(*MBB, MI, DL, *Desc)
- .addReg(SubReg,
- getDefRegState(!IsStore) | getKillRegState(IsKill));
+ auto MIB =
+ BuildMI(MBB, MI, DL, *Desc)
+ .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill));
if (!IsFlat)
MIB.addReg(FuncInfo->getScratchRSrcReg());
@@ -1254,9 +1250,9 @@ void SIRegisterInfo::buildSpillLoadStore(
MIB.addReg(ValueReg, RegState::ImplicitDefine);
if (!IsStore && TmpReg != AMDGPU::NoRegister) {
- MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64),
+ MIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64),
FinalReg)
- .addReg(TmpReg, RegState::Kill);
+ .addReg(TmpReg, RegState::Kill);
MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
}
@@ -1266,7 +1262,7 @@ void SIRegisterInfo::buildSpillLoadStore(
if (ScratchOffsetRegDelta != 0) {
// Subtract the offset we added to the ScratchOffset register.
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), SOffset)
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), SOffset)
.addReg(SOffset)
.addImm(ScratchOffsetRegDelta);
}
@@ -1293,12 +1289,12 @@ void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index,
if (IsLoad) {
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
: AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
- buildSpillLoadStore(SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg,
+ buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg,
Offset * SB.EltSize, MMO, SB.RS);
} else {
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
: AMDGPU::BUFFER_STORE_DWORD_OFFSET;
- buildSpillLoadStore(SB.MI, Opc, Index, SB.TmpVGPR, IsKill, FrameReg,
+ buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, IsKill, FrameReg,
Offset * SB.EltSize, MMO, SB.RS);
// This only ever adds one VGPR spill
SB.MFI.addToSpilledVGPRs(1);
@@ -1573,13 +1569,11 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
: AMDGPU::BUFFER_STORE_DWORD_OFFSET;
- buildSpillLoadStore(MI, Opc,
- Index,
- VData->getReg(), VData->isKill(),
- FrameReg,
- TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
- *MI->memoperands_begin(),
- RS);
+ auto *MBB = MI->getParent();
+ buildSpillLoadStore(
+ *MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
+ TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
+ *MI->memoperands_begin(), RS);
MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
MI->eraseFromParent();
break;
@@ -1609,13 +1603,11 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
: AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
- buildSpillLoadStore(MI, Opc,
- Index,
- VData->getReg(), VData->isKill(),
- FrameReg,
- TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
- *MI->memoperands_begin(),
- RS);
+ auto *MBB = MI->getParent();
+ buildSpillLoadStore(
+ *MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
+ TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
+ *MI->memoperands_begin(), RS);
MI->eraseFromParent();
break;
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index fff25f04e680c..424e3d0af804b 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -349,7 +349,8 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
// When lowering spill pseudos, the RegScavenger should be set.
// For creating spill instructions during frame lowering, where no scavenger
// is available, LiveRegs can be used.
- void buildSpillLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp,
+ void buildSpillLoadStore(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, unsigned LoadStoreOp,
int Index, Register ValueReg, bool ValueIsKill,
MCRegister ScratchOffsetReg, int64_t InstrOffset,
MachineMemOperand *MMO, RegScavenger *RS,
More information about the llvm-commits
mailing list