[llvm] Revert "[AMDGPU] Support block load/store for CSR" (PR #136846)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 23 04:05:46 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Diana Picus (rovka)
<details>
<summary>Changes</summary>
Reverts llvm/llvm-project#<!-- -->130013 due to failures with expensive checks on.
---
Patch is 82.13 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/136846.diff
19 Files Affected:
- (modified) llvm/include/llvm/CodeGen/MachineFrameInfo.h (-1)
- (modified) llvm/include/llvm/CodeGen/TargetFrameLowering.h (-17)
- (modified) llvm/lib/CodeGen/PrologEpilogInserter.cpp (+29-6)
- (modified) llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp (-35)
- (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (-8)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp (-37)
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (-3)
- (modified) llvm/lib/Target/AMDGPU/SIFrameLowering.cpp (-204)
- (modified) llvm/lib/Target/AMDGPU/SIFrameLowering.h (-17)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.h (-14)
- (modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+5-16)
- (modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h (-32)
- (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp (+8-65)
- (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.h (-16)
- (removed) llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir (-294)
- (removed) llvm/test/CodeGen/AMDGPU/spill-vgpr-block.ll (-93)
- (removed) llvm/test/CodeGen/AMDGPU/vgpr-blocks-funcinfo.mir (-47)
- (modified) llvm/unittests/Target/AMDGPU/CMakeLists.txt (-1)
- (removed) llvm/unittests/Target/AMDGPU/LiveRegUnits.cpp (-160)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index 9d1b536d23331..172c3e8c9a847 100644
--- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -61,7 +61,6 @@ class CalleeSavedInfo {
MCRegister getReg() const { return Reg; }
int getFrameIdx() const { return FrameIdx; }
MCRegister getDstReg() const { return DstReg; }
- void setReg(MCRegister R) { Reg = R; }
void setFrameIdx(int FI) {
FrameIdx = FI;
SpilledToReg = false;
diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index 58b63f1769003..cdbefb36c00c7 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -270,14 +270,6 @@ class TargetFrameLowering {
return false;
}
- /// spillCalleeSavedRegister - Default implementation for spilling a single
- /// callee saved register.
- void spillCalleeSavedRegister(MachineBasicBlock &SaveBlock,
- MachineBasicBlock::iterator MI,
- const CalleeSavedInfo &CS,
- const TargetInstrInfo *TII,
- const TargetRegisterInfo *TRI) const;
-
/// restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee
/// saved registers and returns true if it isn't possible / profitable to do
/// so by issuing a series of load instructions via loadRegToStackSlot().
@@ -292,15 +284,6 @@ class TargetFrameLowering {
return false;
}
- // restoreCalleeSavedRegister - Default implementation for restoring a single
- // callee saved register. Should be called in reverse order. Can insert
- // multiple instructions.
- void restoreCalleeSavedRegister(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const CalleeSavedInfo &CS,
- const TargetInstrInfo *TII,
- const TargetRegisterInfo *TRI) const;
-
/// hasFP - Return true if the specified function should have a dedicated
/// frame pointer register. For most targets this is true only if the function
/// has variable sized allocas or if frame pointer elimination is disabled.
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 0cd25c4feb8b9..9b852c0fd49cf 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -476,8 +476,8 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
// Now that we know which registers need to be saved and restored, allocate
// stack slots for them.
for (auto &CS : CSI) {
- // If the target has spilled this register to another register or already
- // handled it , we don't need to allocate a stack slot.
+ // If the target has spilled this register to another register, we don't
+ // need to allocate a stack slot.
if (CS.isSpilledToReg())
continue;
@@ -597,14 +597,25 @@ static void updateLiveness(MachineFunction &MF) {
static void insertCSRSaves(MachineBasicBlock &SaveBlock,
ArrayRef<CalleeSavedInfo> CSI) {
MachineFunction &MF = *SaveBlock.getParent();
- const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
MachineBasicBlock::iterator I = SaveBlock.begin();
if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
for (const CalleeSavedInfo &CS : CSI) {
- TFI->spillCalleeSavedRegister(SaveBlock, I, CS, TII, TRI);
+ // Insert the spill to the stack frame.
+ MCRegister Reg = CS.getReg();
+
+ if (CS.isSpilledToReg()) {
+ BuildMI(SaveBlock, I, DebugLoc(),
+ TII.get(TargetOpcode::COPY), CS.getDstReg())
+ .addReg(Reg, getKillRegState(true));
+ } else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
+ TRI, Register());
+ }
}
}
}
@@ -613,7 +624,7 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock,
static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
std::vector<CalleeSavedInfo> &CSI) {
MachineFunction &MF = *RestoreBlock.getParent();
- const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
@@ -623,7 +634,19 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
for (const CalleeSavedInfo &CI : reverse(CSI)) {
- TFI->restoreCalleeSavedRegister(RestoreBlock, I, CI, TII, TRI);
+ MCRegister Reg = CI.getReg();
+ if (CI.isSpilledToReg()) {
+ BuildMI(RestoreBlock, I, DebugLoc(), TII.get(TargetOpcode::COPY), Reg)
+ .addReg(CI.getDstReg(), getKillRegState(true));
+ } else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC,
+ TRI, Register());
+ assert(I != RestoreBlock.begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+ }
}
}
}
diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 70c3b2cbae9a6..be73b73c93989 100644
--- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -15,7 +15,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
@@ -183,37 +182,3 @@ TargetFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const {
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
return DwarfFrameBase{DwarfFrameBase::Register, {RI->getFrameRegister(MF).id()}};
}
-
-void TargetFrameLowering::spillCalleeSavedRegister(
- MachineBasicBlock &SaveBlock, MachineBasicBlock::iterator MI,
- const CalleeSavedInfo &CS, const TargetInstrInfo *TII,
- const TargetRegisterInfo *TRI) const {
- // Insert the spill to the stack frame.
- MCRegister Reg = CS.getReg();
-
- if (CS.isSpilledToReg()) {
- BuildMI(SaveBlock, MI, DebugLoc(), TII->get(TargetOpcode::COPY),
- CS.getDstReg())
- .addReg(Reg, getKillRegState(true));
- } else {
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII->storeRegToStackSlot(SaveBlock, MI, Reg, true, CS.getFrameIdx(), RC,
- TRI, Register());
- }
-}
-
-void TargetFrameLowering::restoreCalleeSavedRegister(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const CalleeSavedInfo &CS, const TargetInstrInfo *TII,
- const TargetRegisterInfo *TRI) const {
- MCRegister Reg = CS.getReg();
- if (CS.isSpilledToReg()) {
- BuildMI(MBB, MI, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
- .addReg(CS.getDstReg(), getKillRegState(true));
- } else {
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII->loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI,
- Register());
- assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!");
- }
-}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index d896589825fc7..b2098b41acb7e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1275,14 +1275,6 @@ def FeatureDynamicVGPRBlockSize32 : SubtargetFeature<"dynamic-vgpr-block-size-32
"Use a block size of 32 for dynamic VGPR allocation (default is 16)"
>;
-// Enable the use of SCRATCH_STORE/LOAD_BLOCK instructions for saving and
-// restoring the callee-saved registers.
-def FeatureUseBlockVGPROpsForCSR : SubtargetFeature<"block-vgpr-csr",
- "UseBlockVGPROpsForCSR",
- "true",
- "Use block load/store for VGPR callee saved registers"
->;
-
def FeatureLshlAddU64Inst
: SubtargetFeature<"lshl-add-u64-inst", "HasLshlAddU64Inst", "true",
"Has v_lshl_add_u64 instruction">;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index 2dec16de940d1..3d6b974d1f027 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -19,7 +19,6 @@
#include "MCTargetDesc/AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCExpr.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/Constants.h"
@@ -244,36 +243,6 @@ const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV,
return AsmPrinter::lowerConstant(CV, BaseCV, Offset);
}
-static void emitVGPRBlockComment(const MachineInstr *MI, const SIInstrInfo *TII,
- const TargetRegisterInfo *TRI,
- const SIMachineFunctionInfo *MFI,
- MCStreamer &OS) {
- // The instruction will only transfer a subset of the registers in the block,
- // based on the mask that is stored in m0. We could search for the instruction
- // that sets m0, but most of the time we'll already have the mask stored in
- // the machine function info. Try to use that. This assumes that we only use
- // block loads/stores for CSR spills.
- Register RegBlock =
- TII->getNamedOperand(*MI, MI->mayLoad() ? AMDGPU::OpName::vdst
- : AMDGPU::OpName::vdata)
- ->getReg();
- Register FirstRegInBlock = TRI->getSubReg(RegBlock, AMDGPU::sub0);
- uint32_t Mask = MFI->getMaskForVGPRBlockOps(RegBlock);
-
- if (!Mask)
- return; // Nothing to report
-
- SmallString<512> TransferredRegs;
- for (unsigned I = 0; I < sizeof(Mask) * 8; ++I) {
- if (Mask & (1 << I)) {
- (llvm::Twine(" ") + TRI->getRegAsmName(FirstRegInBlock + I))
- .toVector(TransferredRegs);
- }
- }
-
- OS.emitRawComment(" transferring at most " + TransferredRegs);
-}
-
void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
// FIXME: Enable feature predicate checks once all the test pass.
// AMDGPU_MC::verifyInstructionPredicates(MI->getOpcode(),
@@ -362,12 +331,6 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
return;
}
- if (isVerbose())
- if (STI.getInstrInfo()->isBlockLoadStore(MI->getOpcode()))
- emitVGPRBlockComment(MI, STI.getInstrInfo(), STI.getRegisterInfo(),
- MF->getInfo<SIMachineFunctionInfo>(),
- *OutStreamer);
-
MCInst TmpInst;
MCInstLowering.lower(MI, TmpInst);
EmitToStreamer(*OutStreamer, TmpInst);
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index fea17baa17722..7dd91c0775a48 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -262,7 +262,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasPointSampleAccel = false;
bool RequiresCOV6 = false;
- bool UseBlockVGPROpsForCSR = false;
// Dummy feature to use for assembler in tablegen.
bool FeatureDisable = false;
@@ -1278,8 +1277,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool requiresCodeObjectV6() const { return RequiresCOV6; }
- bool useVGPRBlockOpsForCSR() const { return UseBlockVGPROpsForCSR; }
-
bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
bool hasVALUReadSGPRHazard() const { return getGeneration() == GFX12; }
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 0c1cd9ceddb02..9c737b4f3e378 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1739,105 +1739,6 @@ void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
}
}
-static void assignSlotsUsingVGPRBlocks(MachineFunction &MF,
- const GCNSubtarget &ST,
- std::vector<CalleeSavedInfo> &CSI,
- unsigned &MinCSFrameIndex,
- unsigned &MaxCSFrameIndex) {
- SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
- MachineFrameInfo &MFI = MF.getFrameInfo();
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
-
- assert(std::is_sorted(CSI.begin(), CSI.end(),
- [](const CalleeSavedInfo &A, const CalleeSavedInfo &B) {
- return A.getReg() < B.getReg();
- }) &&
- "Callee saved registers not sorted");
-
- auto CanUseBlockOps = [&](const CalleeSavedInfo &CSI) {
- return !CSI.isSpilledToReg() &&
- TRI->getPhysRegBaseClass(CSI.getReg()) == &AMDGPU::VGPR_32RegClass &&
- !FuncInfo->isWWMReservedRegister(CSI.getReg());
- };
-
- auto CSEnd = CSI.end();
- for (auto CSIt = CSI.begin(); CSIt != CSEnd; ++CSIt) {
- Register Reg = CSIt->getReg();
- if (!CanUseBlockOps(*CSIt))
- continue;
-
- // Find all the regs that will fit in a 32-bit mask starting at the current
- // reg and build said mask. It should have 1 for every register that's
- // included, with the current register as the least significant bit.
- uint32_t Mask = 1;
- CSEnd = std::remove_if(
- CSIt + 1, CSEnd, [&](const CalleeSavedInfo &CSI) -> bool {
- if (CanUseBlockOps(CSI) && CSI.getReg() < Reg + 32) {
- Mask |= 1 << (CSI.getReg() - Reg);
- return true;
- } else {
- return false;
- }
- });
-
- const TargetRegisterClass *BlockRegClass = TRI->getRegClassForBlockOp(MF);
- Register RegBlock =
- TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, BlockRegClass);
- if (!RegBlock) {
- // We couldn't find a super register for the block. This can happen if
- // the register we started with is too high (e.g. v232 if the maximum is
- // v255). We therefore try to get the last register block and figure out
- // the mask from there.
- Register LastBlockStart =
- AMDGPU::VGPR0 + alignDown(Reg - AMDGPU::VGPR0, 32);
- RegBlock =
- TRI->getMatchingSuperReg(LastBlockStart, AMDGPU::sub0, BlockRegClass);
- assert(RegBlock && TRI->isSubRegister(RegBlock, Reg) &&
- "Couldn't find super register");
- int RegDelta = Reg - LastBlockStart;
- assert(RegDelta > 0 && llvm::countl_zero(Mask) >= RegDelta &&
- "Bad shift amount");
- Mask <<= RegDelta;
- }
-
- FuncInfo->setMaskForVGPRBlockOps(RegBlock, Mask);
-
- // The stack objects can be a bit smaller than the register block if we know
- // some of the high bits of Mask are 0. This may happen often with calling
- // conventions where the caller and callee-saved VGPRs are interleaved at
- // a small boundary (e.g. 8 or 16).
- int UnusedBits = llvm::countl_zero(Mask);
- unsigned BlockSize = TRI->getSpillSize(*BlockRegClass) - UnusedBits * 4;
- int FrameIdx =
- MFI.CreateStackObject(BlockSize, TRI->getSpillAlign(*BlockRegClass),
- /*isSpillSlot=*/true);
- if ((unsigned)FrameIdx < MinCSFrameIndex)
- MinCSFrameIndex = FrameIdx;
- if ((unsigned)FrameIdx > MaxCSFrameIndex)
- MaxCSFrameIndex = FrameIdx;
-
- CSIt->setFrameIdx(FrameIdx);
- CSIt->setReg(RegBlock);
- }
- CSI.erase(CSEnd, CSI.end());
-}
-
-bool SIFrameLowering::assignCalleeSavedSpillSlots(
- MachineFunction &MF, const TargetRegisterInfo *TRI,
- std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex,
- unsigned &MaxCSFrameIndex) const {
- if (CSI.empty())
- return true; // Early exit if no callee saved registers are modified!
-
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- bool UseVGPRBlocks = ST.useVGPRBlockOpsForCSR();
-
- if (UseVGPRBlocks)
- assignSlotsUsingVGPRBlocks(MF, ST, CSI, MinCSFrameIndex, MaxCSFrameIndex);
-
- return assignCalleeSavedSpillSlots(MF, TRI, CSI) || UseVGPRBlocks;
-}
-
bool SIFrameLowering::assignCalleeSavedSpillSlots(
MachineFunction &MF, const TargetRegisterInfo *TRI,
std::vector<CalleeSavedInfo> &CSI) const {
@@ -1907,111 +1808,6 @@ bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
return true;
}
-bool SIFrameLowering::spillCalleeSavedRegisters(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
- MachineFunction *MF = MBB.getParent();
- const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
- if (!ST.useVGPRBlockOpsForCSR())
- return false;
-
- MachineFrameInfo &FrameInfo = MF->getFrameInfo();
- SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- const SIInstrInfo *TII = ST.getInstrInfo();
- SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
-
- const TargetRegisterClass *BlockRegClass =
- static_cast<const SIRegisterInfo *>(TRI)->getRegClassForBlockOp(*MF);
- for (const CalleeSavedInfo &CS : CSI) {
- Register Reg = CS.getReg();
- if (!BlockRegClass->contains(Reg) ||
- !FuncInfo->hasMaskForVGPRBlockOps(Reg)) {
- spillCalleeSavedRegister(MBB, MI, CS, TII, TRI);
- continue;
- }
-
- // Build a scratch block store.
- uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg);
- int FrameIndex = CS.getFrameIdx();
- MachinePointerInfo PtrInfo =
- MachinePointerInfo::getFixedStack(*MF, FrameIndex);
- MachineMemOperand *MMO =
- MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
- FrameInfo.getObjectSize(FrameIndex),
- FrameInfo.getObjectAlign(FrameIndex));
-
- BuildMI(MBB, MI, MI->getDebugLoc(),
- TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE))
- .addReg(Reg, getKillRegState(false))
- .addFrameIndex(FrameIndex)
- .addReg(MFI->getStackPtrOffsetReg())
- .addImm(0)
- .addImm(Mask)
- .addMemOperand(MMO);
-
- FuncInfo->setHasSpilledVGPRs();
-
- // Add the register to the liveins. This is necessary because if any of the
- // VGPRs in the register block is reserved (e.g. if it's a WWM register),
- // then the whole block will be marked as reserved and `updateLiveness` will
- // skip it.
- MBB.addLiveIn(Reg);
- }
- MBB.sortUniqueLiveIns();
-
- return true;
-}
-
-bool SIFrameLowering::restoreCalleeSavedRegisters(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
- MachineFunction *MF = MBB.getParent();
- const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
- if (!ST.useVGPRBlockOpsForCSR())
- return false;
-
- SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
- MachineFrameInfo &MFI = MF->getFrameInfo();
- const SIInstrInfo *TII = ST.getInstrInfo();
- const SIRegisterInfo *SITRI = static_cast<const SIRegisterInfo *>(TRI);
- const TargetRegisterClass *BlockRegClass = SITRI->getRegClassForBlockOp(*MF);
- for (const CalleeSavedInfo &CS : reverse(CSI)) {
- Register Reg = CS.getReg();
- if (!BlockRegClass->contains(Reg) ||
- !FuncInfo->hasMaskForVGPRBlockOps(Reg)) {
- restoreCalleeSavedRegister(MBB, MI, CS, TII, TRI);
- contin...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/136846
More information about the llvm-commits
mailing list