[llvm] [AMDGPU] Support block load/store for CSR (PR #130013)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 11 22:08:20 PDT 2025
================
@@ -1763,6 +1866,101 @@ bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
return true;
}
+bool SIFrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+ MachineFunction *MF = MBB.getParent();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ if (!ST.useVGPRBlockOpsForCSR())
+ return false;
+
+ MachineFrameInfo &FrameInfo = MF->getFrameInfo();
+ SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
+
+ for (const CalleeSavedInfo &CS : CSI) {
+ Register Reg = CS.getReg();
+ if (!CS.isHandledByTarget())
+ continue;
+
+ // Build a scratch block store.
+ uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg);
+ int FrameIndex = CS.getFrameIdx();
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(*MF, FrameIndex);
+ MachineMemOperand *MMO =
+ MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+ FrameInfo.getObjectSize(FrameIndex),
+ FrameInfo.getObjectAlign(FrameIndex));
+
+ BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE))
+ .addReg(Reg, getKillRegState(false))
+ .addFrameIndex(FrameIndex)
+ .addReg(MFI->getStackPtrOffsetReg())
+ .addImm(0)
+ .addImm(Mask)
+ .addMemOperand(MMO);
+
+ FuncInfo->setHasSpilledVGPRs();
+
+ // Add the register to the liveins. This is necessary because if any of the
+ // VGPRs in the register block is reserved (e.g. if it's a WWM register),
+ // then the whole block will be marked as reserved and `updateLiveness` will
+ // skip it.
+ MBB.addLiveIn(Reg);
+ }
+
+ return false;
+}
+
+bool SIFrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+ MachineFunction *MF = MBB.getParent();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ if (!ST.useVGPRBlockOpsForCSR())
+ return false;
+
+ SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo *SITRI = static_cast<const SIRegisterInfo *>(TRI);
+ for (const CalleeSavedInfo &CS : reverse(CSI)) {
+ if (!CS.isHandledByTarget())
+ continue;
+
+ // Build a scratch block load.
+ Register Reg = CS.getReg();
+ uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg);
+ int FrameIndex = CS.getFrameIdx();
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(*MF, FrameIndex);
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),
+ MFI.getObjectAlign(FrameIndex));
+
+ auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE), Reg)
+ .addFrameIndex(FrameIndex)
+ .addReg(FuncInfo->getStackPtrOffsetReg())
+ .addImm(0)
+ .addImm(Mask)
+ .addMemOperand(MMO);
+ SITRI->addImplicitUsesForBlockCSRLoad(MIB, Reg);
+
+ // Add the register to the liveins. This is necessary because if any of the
+ // VGPRs in the register block is reserved (e.g. if it's a WWM register),
+ // then the whole block will be marked as reserved and `updateLiveness` will
+ // skip it.
+ if (!MBB.isLiveIn(Reg))
+ MBB.addLiveIn(Reg);
----------------
arsenm wrote:
I believe the way you are supposed to do this is just add and use sortAndUniqueLiveIns instead of performing the isLiveIn check every time
https://github.com/llvm/llvm-project/pull/130013
More information about the llvm-commits
mailing list