[llvm] [AMDGPU] Support block load/store for CSR (PR #130013)
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 10 00:53:07 PDT 2025
================
@@ -1694,6 +1694,110 @@ void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
}
}
+static void assignSlotsUsingVGPRBlocks(MachineFunction &MF,
+ const GCNSubtarget &ST,
+ const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI,
+ unsigned &MinCSFrameIndex,
+ unsigned &MaxCSFrameIndex) {
+ SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo *MRI = ST.getRegisterInfo();
+
+ assert(std::is_sorted(CSI.begin(), CSI.end(),
+ [](const CalleeSavedInfo &A, const CalleeSavedInfo &B) {
+ return A.getReg() < B.getReg();
+ }) &&
+ "Callee saved registers not sorted");
+
+ auto CanUseBlockOps = [&](const CalleeSavedInfo &CSI) {
+ return !CSI.isSpilledToReg() &&
+ MRI->isVGPR(MF.getRegInfo(), CSI.getReg()) &&
+ !FuncInfo->isWWMReservedRegister(CSI.getReg());
+ };
+
+ auto CSEnd = CSI.end();
+ for (auto CSIt = CSI.begin(); CSIt != CSEnd; ++CSIt) {
+ Register Reg = CSIt->getReg();
+ if (!CanUseBlockOps(*CSIt))
+ continue;
+
+ // Find all the regs that will fit in a 32-bit block starting at the current
+ // reg and build the mask. It should have 1 for every register that's
+ // included, with the current register as the least significant bit.
+ uint32_t Mask = 1;
+ CSEnd = std::remove_if(
+ CSIt + 1, CSEnd, [&](const CalleeSavedInfo &CSI) -> bool {
+ if (CanUseBlockOps(CSI) && CSI.getReg() < Reg + 32) {
+ Mask |= 1 << (CSI.getReg() - Reg);
+ return true;
+ } else {
+ return false;
+ }
+ });
+
+ const TargetRegisterClass *BlockRegClass =
----------------
perlfu wrote:
I might have missed something here, but you use a helper to retrieve a register class while the code below seems to assume the 1024b register?
https://github.com/llvm/llvm-project/pull/130013
More information about the llvm-commits
mailing list