[llvm-branch-commits] [llvm] [AArch64] Prepare for split ZPR and PPR area allocation (NFCI) (PR #142391)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Jun 2 06:59:34 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Benjamin Maxwell (MacDue)
<details>
<summary>Changes</summary>
This patch attempts to refactor AArch64FrameLowering to allow the size of the ZPR and PPR areas to be calculated separately. This will be used by a subsequent patch to support allocating ZPRs and PPRs to separate areas. This patch should be an NFC and is split out to make later functional changes easier to spot.
---
Patch is 34.11 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/142391.diff
4 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (+209-96)
- (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.h (+8-4)
- (modified) llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h (+26-21)
- (modified) llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp (+4-3)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 643778c742497..e5592a921e192 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -326,7 +326,10 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
static bool produceCompactUnwindFrame(MachineFunction &MF);
static bool needsWinCFI(const MachineFunction &MF);
+static StackOffset getZPRStackSize(const MachineFunction &MF);
+static StackOffset getPPRStackSize(const MachineFunction &MF);
static StackOffset getSVEStackSize(const MachineFunction &MF);
+static bool hasSVEStackSize(const MachineFunction &MF);
static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);
/// Returns true if a homogeneous prolog or epilog code can be emitted
@@ -345,7 +348,7 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
if (needsWinCFI(MF))
return false;
// TODO: SVE is not supported yet.
- if (getSVEStackSize(MF))
+ if (hasSVEStackSize(MF))
return false;
// Bail on stack adjustment needed on return for simplicity.
@@ -445,10 +448,36 @@ static unsigned getFixedObjectSize(const MachineFunction &MF,
}
}
-/// Returns the size of the entire SVE stackframe (calleesaves + spills).
+static unsigned getStackHazardSize(const MachineFunction &MF) {
+ return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
+}
+
+/// Returns the size of the entire ZPR stackframe (calleesaves + spills).
+static StackOffset getZPRStackSize(const MachineFunction &MF) {
+ const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ return StackOffset::getScalable(AFI->getStackSizeZPR());
+}
+
+/// Returns the size of the entire PPR stackframe (calleesaves + spills).
+static StackOffset getPPRStackSize(const MachineFunction &MF) {
+ const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ return StackOffset::getScalable(AFI->getStackSizePPR());
+}
+
+/// Returns the size of the entire SVE stackframe (PPRs + ZPRs).
static StackOffset getSVEStackSize(const MachineFunction &MF) {
+ return getZPRStackSize(MF) + getPPRStackSize(MF);
+}
+
+static bool hasSVEStackSize(const MachineFunction &MF) {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE());
+ return AFI->getStackSizeZPR() > 0 || AFI->getStackSizePPR() > 0;
+}
+
+/// Returns true if PPRs are spilled as ZPRs.
+static bool arePPRsSpilledAsZPR(const MachineFunction &MF) {
+ return MF.getSubtarget().getRegisterInfo()->getSpillSize(
+ AArch64::PPRRegClass) == 16;
}
bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
@@ -476,7 +505,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
!Subtarget.hasSVE();
return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize ||
- getSVEStackSize(MF) || LowerQRegCopyThroughMem);
+ hasSVEStackSize(MF) || LowerQRegCopyThroughMem);
}
/// hasFPImpl - Return true if the specified function should have a dedicated
@@ -1144,7 +1173,7 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
// When there is an SVE area on the stack, always allocate the
// callee-saves and spills/locals separately.
- if (getSVEStackSize(MF))
+ if (hasSVEStackSize(MF))
return false;
return true;
@@ -1570,30 +1599,40 @@ static bool isTargetWindows(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
}
-static unsigned getStackHazardSize(const MachineFunction &MF) {
- return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
-}
-
// Convenience function to determine whether I is an SVE callee save.
-static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
+static bool IsZPRCalleeSave(MachineBasicBlock::iterator I) {
switch (I->getOpcode()) {
default:
return false;
- case AArch64::PTRUE_C_B:
case AArch64::LD1B_2Z_IMM:
case AArch64::ST1B_2Z_IMM:
case AArch64::STR_ZXI:
- case AArch64::STR_PXI:
case AArch64::LDR_ZXI:
- case AArch64::LDR_PXI:
- case AArch64::PTRUE_B:
case AArch64::CPY_ZPzI_B:
case AArch64::CMPNE_PPzZI_B:
+ case AArch64::PTRUE_C_B:
+ case AArch64::PTRUE_B:
+ return I->getFlag(MachineInstr::FrameSetup) ||
+ I->getFlag(MachineInstr::FrameDestroy);
+ }
+}
+
+// Convenience function to determine whether I is an SVE predicate callee save.
+static bool IsPPRCalleeSave(MachineBasicBlock::iterator I) {
+ switch (I->getOpcode()) {
+ default:
+ return false;
+ case AArch64::STR_PXI:
+ case AArch64::LDR_PXI:
return I->getFlag(MachineInstr::FrameSetup) ||
I->getFlag(MachineInstr::FrameDestroy);
}
}
+static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
+ return IsZPRCalleeSave(I) || IsPPRCalleeSave(I);
+}
+
static void emitShadowCallStackPrologue(const TargetInstrInfo &TII,
MachineFunction &MF,
MachineBasicBlock &MBB,
@@ -1825,8 +1864,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
else
AFI->setTaggedBasePointerOffset(MFI.getStackSize());
- const StackOffset &SVEStackSize = getSVEStackSize(MF);
-
// getStackSize() includes all the locals in its size calculation. We don't
// include these locals when computing the stack size of a funclet, as they
// are allocated in the parent's stack frame and accessed via the frame
@@ -1837,7 +1874,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
IsFunclet ? getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
assert(!HasFP && "unexpected function without stack frame but with FP");
- assert(!SVEStackSize &&
+ assert(!hasSVEStackSize(MF) &&
"unexpected function without stack frame but with SVE objects");
// All of the stack allocation is for locals.
AFI->setLocalStackSize(NumBytes);
@@ -1879,7 +1916,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
bool HomPrologEpilog = homogeneousPrologEpilog(MF);
if (CombineSPBump) {
- assert(!SVEStackSize && "Cannot combine SP bump with SVE");
+ assert(!hasSVEStackSize(MF) && "Cannot combine SP bump with SVE");
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(-NumBytes), TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
@@ -2105,34 +2142,63 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
}
- StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
- MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI;
+ StackOffset PPRCalleeSavesSize =
+ StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
+ StackOffset ZPRCalleeSavesSize =
+ StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
+ StackOffset PPRLocalsSize = getPPRStackSize(MF);
+ StackOffset ZPRLocalsSize = getZPRStackSize(MF);
+
+ MachineBasicBlock::iterator ZPRCalleeSavesBegin = MBBI,
+ ZPRCalleeSavesEnd = MBBI;
+ MachineBasicBlock::iterator PPRCalleeSavesBegin = MBBI,
+ PPRCalleeSavesEnd = MBBI;
// Process the SVE callee-saves to determine what space needs to be
// allocated.
- if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
- LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize
+
+ if (int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize()) {
+ LLVM_DEBUG(dbgs() << "PPRCalleeSavedStackSize = " << PPRCalleeSavedSize
+ << "\n");
+
+ PPRCalleeSavesBegin = MBBI;
+ assert(IsPPRCalleeSave(PPRCalleeSavesBegin) && "Unexpected instruction");
+ while (IsPPRCalleeSave(MBBI) && MBBI != MBB.getFirstTerminator())
+ ++MBBI;
+ PPRCalleeSavesEnd = MBBI;
+
+ PPRLocalsSize -= StackOffset::getScalable(PPRCalleeSavedSize);
+ }
+
+ if (int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize()) {
+ LLVM_DEBUG(dbgs() << "ZPRCalleeSavedStackSize = " << ZPRCalleeSavedSize
<< "\n");
// Find callee save instructions in frame.
- CalleeSavesBegin = MBBI;
- assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
- while (IsSVECalleeSave(MBBI) && MBBI != MBB.getFirstTerminator())
+ ZPRCalleeSavesBegin = MBBI;
+ assert(IsZPRCalleeSave(ZPRCalleeSavesBegin) && "Unexpected instruction");
+ while (IsZPRCalleeSave(MBBI) && MBBI != MBB.getFirstTerminator())
++MBBI;
- CalleeSavesEnd = MBBI;
+ ZPRCalleeSavesEnd = MBBI;
- SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
- SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
+ ZPRLocalsSize -= StackOffset::getScalable(ZPRCalleeSavedSize);
}
// Allocate space for the callee saves (if any).
StackOffset CFAOffset =
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
- StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
+ StackOffset LocalsSize =
+ PPRLocalsSize + ZPRLocalsSize + StackOffset::getFixed(NumBytes);
+ StackOffset SVECalleeSavesSize = PPRCalleeSavesSize + ZPRCalleeSavesSize;
+ MachineBasicBlock::iterator CalleeSavesBegin =
+ AFI->getPPRCalleeSavedStackSize() ? PPRCalleeSavesBegin
+ : ZPRCalleeSavesBegin;
allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false,
nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
MFI.hasVarSizedObjects() || LocalsSize);
CFAOffset += SVECalleeSavesSize;
+ MachineBasicBlock::iterator CalleeSavesEnd =
+ AFI->getZPRCalleeSavedStackSize() ? ZPRCalleeSavesEnd : PPRCalleeSavesEnd;
if (EmitAsyncCFI)
emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);
@@ -2144,6 +2210,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
// the correct value here, as NumBytes also includes padding bytes,
// which shouldn't be counted here.
+ StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize;
allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding,
SVELocalsSize + StackOffset::getFixed(NumBytes),
NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
@@ -2193,7 +2260,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
emitDefineCFAWithFP(MF, MBB, MBBI, FixedObject);
} else {
StackOffset TotalSize =
- SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
+ getSVEStackSize(MF) +
+ StackOffset::getFixed((int64_t)MFI.getStackSize());
CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
CFIBuilder.insertCFIInst(
createDefCFA(*RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
@@ -2388,7 +2456,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
}
}
- const StackOffset &SVEStackSize = getSVEStackSize(MF);
+ StackOffset SVEStackSize = getSVEStackSize(MF);
// If there is a single SP update, insert it before the ret and we're done.
if (CombineSPBump) {
@@ -2413,7 +2481,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// deallocated.
StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI;
- if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
+ int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize();
+ int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize();
+ int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize;
+
+ if (SVECalleeSavedSize) {
RestoreBegin = std::prev(RestoreEnd);
while (RestoreBegin != MBB.begin() &&
IsSVECalleeSave(std::prev(RestoreBegin)))
@@ -2423,7 +2495,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
StackOffset CalleeSavedSizeAsOffset =
- StackOffset::getScalable(CalleeSavedSize);
+ StackOffset::getScalable(SVECalleeSavedSize);
DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
DeallocateAfter = CalleeSavedSizeAsOffset;
}
@@ -2434,16 +2506,16 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// restore the stack pointer from the frame pointer prior to SVE CSR
// restoration.
if (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) {
- if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
+ if (SVECalleeSavedSize) {
// Set SP to start of SVE callee-save area from which they can
// be reloaded. The code below will deallocate the stack space
// space by moving FP -> SP.
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP,
- StackOffset::getScalable(-CalleeSavedSize), TII,
+ StackOffset::getScalable(-SVECalleeSavedSize), TII,
MachineInstr::FrameDestroy);
}
} else {
- if (AFI->getSVECalleeSavedStackSize()) {
+ if (SVECalleeSavedSize) {
// Deallocate the non-SVE locals first before we can deallocate (and
// restore callee saves) from the SVE area.
emitFrameOffset(
@@ -2572,7 +2644,9 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
const auto &MFI = MF.getFrameInfo();
int64_t ObjectOffset = MFI.getObjectOffset(FI);
- StackOffset SVEStackSize = getSVEStackSize(MF);
+ StackOffset ZPRStackSize = getZPRStackSize(MF);
+ StackOffset PPRStackSize = getPPRStackSize(MF);
+ StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
// For VLA-area objects, just emit an offset at the end of the stack frame.
// Whilst not quite correct, these objects do live at the end of the frame and
@@ -2663,7 +2737,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
bool isCSR =
!isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));
- const StackOffset &SVEStackSize = getSVEStackSize(MF);
+ const StackOffset SVEStackSize = getSVEStackSize(MF);
// Use frame pointer to reference fixed objects. Use it for locals if
// there are VLAs or a dynamically realigned SP (and thus the SP isn't
@@ -2800,7 +2874,9 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) {
!(Subtarget.getTargetLowering()->supportSwiftError() &&
Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&
MF.getFunction().getCallingConv() != CallingConv::SwiftTail &&
- !requiresSaveVG(MF) && AFI->getSVECalleeSavedStackSize() == 0;
+ !requiresSaveVG(MF) &&
+ (AFI->getZPRCalleeSavedStackSize() +
+ AFI->getPPRCalleeSavedStackSize()) == 0;
}
static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
@@ -2932,9 +3008,13 @@ static void computeCalleeSaveRegisterPairs(
RegInc = -1;
FirstReg = Count - 1;
}
- int ScalableByteOffset = AFI->getSVECalleeSavedStackSize();
+
+ int ScalableByteOffset =
+ AFI->getZPRCalleeSavedStackSize() + AFI->getPPRCalleeSavedStackSize();
+
bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace();
Register LastReg = 0;
+ bool HasCSHazardPadding = AFI->hasStackHazardSlotIndex();
// When iterating backwards, the loop condition relies on unsigned wraparound.
for (unsigned i = FirstReg; i < Count; i += RegInc) {
@@ -2964,7 +3044,7 @@ static void computeCalleeSaveRegisterPairs(
}
// Add the stack hazard size as we transition from GPR->FPR CSRs.
- if (AFI->hasStackHazardSlotIndex() &&
+ if (HasCSHazardPadding &&
(!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) &&
AArch64InstrInfo::isFpOrNEON(RPI.Reg1))
ByteOffset += StackFillDir * StackHazardSize;
@@ -2972,7 +3052,7 @@ static void computeCalleeSaveRegisterPairs(
int Scale = TRI->getSpillSize(*RPI.RC);
// Add the next reg to the pair if it is in the same register class.
- if (unsigned(i + RegInc) < Count && !AFI->hasStackHazardSlotIndex()) {
+ if (unsigned(i + RegInc) < Count && !HasCSHazardPadding) {
MCRegister NextReg = CSI[i + RegInc].getReg();
bool IsFirst = i == FirstReg;
switch (RPI.Type) {
@@ -3541,8 +3621,9 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI,
void AArch64FrameLowering::determineStackHazardSlot(
MachineFunction &MF, BitVector &SavedRegs) const {
unsigned StackHazardSize = getStackHazardSize(MF);
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
if (StackHazardSize == 0 || StackHazardSize % 16 != 0 ||
- MF.getInfo<AArch64FunctionInfo>()->hasStackHazardSlotIndex())
+ AFI->hasStackHazardSlotIndex())
return;
// Stack hazards are only needed in streaming functions.
@@ -3594,10 +3675,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
- const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
unsigned UnspilledCSGPR = AArch64::NoRegister;
unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
@@ -3718,19 +3800,29 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// Calculates the callee saved stack size.
unsigned CSStackSize = 0;
- unsigned SVECSStackSize = 0;
+ unsigned ZPRCSStackSize = 0;
+ unsigned PPRCSStackSize = 0;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
for (unsigned Reg : SavedRegs.set_bits()) {
auto *RC = TRI->getMinimalPhysRegClass(Reg);
assert(RC && "expected register class!");
auto SpillSize = TRI->getSpillSize(*RC);
- if (AArch64::PPRRegClass.contains(Reg) ||
- AArch64::ZPRRegClass.contains(Reg))
- SVECSStackSize += SpillSize;
+ bool IsZPR = AArch64::ZPRRegClass.contains(Reg);
+ bool IsPPR = !IsZPR && AArch64::PPRRegClass.contains(Reg);
+ if (IsZPR || (IsPPR && arePPRsSpilledAsZPR(MF)))
+ ZPRCSStackSize += SpillSize;
+ else if (IsPPR)
+ PPRCSStackSize += SpillSize;
else
CSStackSize += SpillSize;
}
+ // Determine if a Hazard slot should be used, and increase the CSStackSize by
+ // StackHazardSize if so.
+ determineStackHazardSlot(MF, SavedRegs);
+ if (AFI->hasStackHazardSlotIndex())
+ CSStackSize += getStackHazardSize(MF);
+
// Increase the callee-saved stack size if the function has streaming mode
// changes, as we will need to spill the value of the VG register.
// For locally streaming functions, we spill both the streaming and
@@ -3744,12 +3836,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
CSStackSize += 8;
}
- // Determine if a Hazard slot should be used, and increase the CSStackSize by
- // StackHazardSize if so.
- determineStackHazardSlot(MF, SavedRegs);
- if (AFI->hasStackHazardSlotIndex())
- CSStackSize += getStackHazardSize(MF);
-
// Save number of saved regs, so we can easily update CSStackSize later.
unsigned NumSavedRegs = SavedRegs.count();
@@ -3769,8 +3855,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
});
// If any callee-saved registers are used, the frame cannot be eliminated.
+ auto [ZPRLocalStackSize, PPRLocalStackSize] =
+ estimateSVEStackObjectOffsets(MF);
+ int64_t SVELocals = ZPRLocalStackSize + PPRLocalStackSize;
int64_t SVEStackSize =
- alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);
+ alignTo(ZPRCSStackSize + PPRCSStackSize + SVELocals, 16);
bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/142391
More information about the llvm-branch-commits
mailing list