[llvm] [AArch64] Break up `AArch64FrameLowering::emitPrologue` (NFCI) (PR #157485)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 8 08:02:18 PDT 2025
https://github.com/MacDue created https://github.com/llvm/llvm-project/pull/157485
`emitPrologue` was almost 1k SLOC, with a large portion not actually related to emitting the vast majority of prologues.
This patch creates a new class `AArch64PrologueEmitter` for emitting the prologue, which keeps common state/target classes as members. This makes adding methods that handle niche cases easy, and allows methods to be marked "const" when they don't redefine flags/state.
With this change, the core "emitPrologue" is around 275 LOC, with cases like Windows stack probes or Swift frame pointers split into routines. This makes following the logic much easier.
>From 61d73a63699dfc0257312bb95c9ce274da820c9f Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 8 Sep 2025 13:04:54 +0000
Subject: [PATCH] [AArch64] Break up `AArch64FrameLowering::emitPrologue`
(NFCI)
`emitPrologue` was almost 1k SLOC, with a large portion of that not
actually related to emitting the vast majority of prologues.
This patch creates a new class `AArch64PrologueEmitter` for emitting
the prologue, which keeps common state/target classes as members. This
makes adding methods that handle niche cases easy, and allows methods to
be marked "const" when they don't redefine flags/state.
With this change, the core "emitPrologue" is around 275 LOC, with cases
like Windows stack probes, or Swift frame pointers, split off into their
own routines.
---
.../Target/AArch64/AArch64FrameLowering.cpp | 883 ++----------------
.../lib/Target/AArch64/AArch64FrameLowering.h | 70 +-
.../AArch64/AArch64PrologueEpilogue.cpp | 794 ++++++++++++++++
.../Target/AArch64/AArch64PrologueEpilogue.h | 111 +++
llvm/lib/Target/AArch64/CMakeLists.txt | 1 +
5 files changed, 1037 insertions(+), 822 deletions(-)
create mode 100644 llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
create mode 100644 llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 87a09b72933db..175b5e04d82ff 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -211,6 +211,7 @@
#include "AArch64FrameLowering.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
+#include "AArch64PrologueEpilogue.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
@@ -218,7 +219,6 @@
#include "Utils/AArch64SMEAttributes.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/CFIInstBuilder.h"
#include "llvm/CodeGen/LivePhysRegs.h"
@@ -293,8 +293,6 @@ static cl::opt<bool> DisableMultiVectorSpillFill(
cl::desc("Disable use of LD/ST pairs for SME2 or SVE2p1"), cl::init(false),
cl::Hidden);
-STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
-
/// Returns how much of the incoming argument stack area (in bytes) we should
/// clean up in an epilogue. For the C calling convention this will be 0, for
/// guaranteed tail call conventions it can be positive (a normal return or a
@@ -328,23 +326,20 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
return ArgumentPopSize;
}
-static bool produceCompactUnwindFrame(MachineFunction &MF);
-static bool needsWinCFI(const MachineFunction &MF);
-static StackOffset getSVEStackSize(const MachineFunction &MF);
-static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
- bool HasCall = false);
-static bool requiresSaveVG(const MachineFunction &MF);
+static bool produceCompactUnwindFrame(const AArch64FrameLowering &,
+ MachineFunction &MF);
// Conservatively, returns true if the function is likely to have an SVE vectors
// on the stack. This function is safe to be called before callee-saves or
// object offsets have been determined.
-static bool isLikelyToHaveSVEStack(const MachineFunction &MF) {
+static bool isLikelyToHaveSVEStack(const AArch64FrameLowering &AFL,
+ const MachineFunction &MF) {
auto *AFI = MF.getInfo<AArch64FunctionInfo>();
if (AFI->isSVECC())
return true;
if (AFI->hasCalculatedStackSizeSVE())
- return bool(getSVEStackSize(MF));
+ return bool(AFL.getSVEStackSize(MF));
const MachineFrameInfo &MFI = MF.getFrameInfo();
for (int FI = MFI.getObjectIndexBegin(); FI < MFI.getObjectIndexEnd(); FI++) {
@@ -372,7 +367,7 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
return false;
// TODO: SVE is not supported yet.
- if (isLikelyToHaveSVEStack(MF))
+ if (isLikelyToHaveSVEStack(*this, MF))
return false;
// Bail on stack adjustment needed on return for simplicity.
@@ -409,7 +404,7 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
/// Returns true if CSRs should be paired.
bool AArch64FrameLowering::producePairRegisters(MachineFunction &MF) const {
- return produceCompactUnwindFrame(MF) || homogeneousPrologEpilog(MF);
+ return produceCompactUnwindFrame(*this, MF) || homogeneousPrologEpilog(MF);
}
/// This is the biggest offset to the stack pointer we can encode in aarch64
@@ -451,11 +446,10 @@ AArch64FrameLowering::getStackIDForScalableVectors() const {
return TargetStackID::ScalableVector;
}
-/// Returns the size of the fixed object area (allocated next to sp on entry)
-/// On Win64 this may include a var args area and an UnwindHelp object for EH.
-static unsigned getFixedObjectSize(const MachineFunction &MF,
- const AArch64FunctionInfo *AFI, bool IsWin64,
- bool IsFunclet) {
+unsigned
+AArch64FrameLowering::getFixedObjectSize(const MachineFunction &MF,
+ const AArch64FunctionInfo *AFI,
+ bool IsWin64, bool IsFunclet) const {
assert(AFI->getTailCallReservedStack() % 16 == 0 &&
"Tail call reserved stack must be aligned to 16 bytes");
if (!IsWin64 || IsFunclet) {
@@ -494,7 +488,8 @@ static unsigned getFixedObjectSize(const MachineFunction &MF,
}
/// Returns the size of the entire SVE stackframe (calleesaves + spills).
-static StackOffset getSVEStackSize(const MachineFunction &MF) {
+StackOffset
+AArch64FrameLowering::getSVEStackSize(const MachineFunction &MF) const {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE());
}
@@ -683,70 +678,6 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
return MBB.erase(I);
}
-void AArch64FrameLowering::emitCalleeSavedGPRLocations(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = MF.getFrameInfo();
-
- const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
- if (CSI.empty())
- return;
-
- CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
- for (const auto &Info : CSI) {
- unsigned FrameIdx = Info.getFrameIdx();
- if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector)
- continue;
-
- assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
- int64_t Offset = MFI.getObjectOffset(FrameIdx) - getOffsetOfLocalArea();
- CFIBuilder.buildOffset(Info.getReg(), Offset);
- }
-}
-
-void AArch64FrameLowering::emitCalleeSavedSVELocations(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = MF.getFrameInfo();
-
- // Add callee saved registers to move list.
- const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
- if (CSI.empty())
- return;
-
- const TargetSubtargetInfo &STI = MF.getSubtarget();
- const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
- AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
- CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
-
- std::optional<int64_t> IncomingVGOffsetFromDefCFA;
- if (requiresSaveVG(MF)) {
- auto IncomingVG = *find_if(
- reverse(CSI), [](auto &Info) { return Info.getReg() == AArch64::VG; });
- IncomingVGOffsetFromDefCFA =
- MFI.getObjectOffset(IncomingVG.getFrameIdx()) - getOffsetOfLocalArea();
- }
-
- for (const auto &Info : CSI) {
- if (MFI.getStackID(Info.getFrameIdx()) != TargetStackID::ScalableVector)
- continue;
-
- // Not all unwinders may know about SVE registers, so assume the lowest
- // common denominator.
- assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
- MCRegister Reg = Info.getReg();
- if (!static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
- continue;
-
- StackOffset Offset =
- StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
- StackOffset::getFixed(AFI.getCalleeSavedStackSize(MFI));
-
- CFIBuilder.insertCFIInst(
- createCFAOffset(TRI, Reg, Offset, IncomingVGOffsetFromDefCFA));
- }
-}
-
void AArch64FrameLowering::resetCFIToInitialState(
MachineBasicBlock &MBB) const {
@@ -1088,8 +1019,8 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
}
}
-static bool windowsRequiresStackProbe(const MachineFunction &MF,
- uint64_t StackSizeInBytes) {
+bool AArch64FrameLowering::windowsRequiresStackProbe(
+ const MachineFunction &MF, uint64_t StackSizeInBytes) const {
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const AArch64FunctionInfo &MFI = *MF.getInfo<AArch64FunctionInfo>();
// TODO: When implementing stack protectors, take that into account
@@ -1108,19 +1039,9 @@ static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs,
LiveRegs.addReg(CSRegs[i]);
}
-// Find a scratch register that we can use at the start of the prologue to
-// re-align the stack pointer. We avoid using callee-save registers since they
-// may appear to be free when this is called from canUseAsPrologue (during
-// shrink wrapping), but then no longer be free when this is called from
-// emitPrologue.
-//
-// FIXME: This is a bit conservative, since in the above case we could use one
-// of the callee-save registers as a scratch temp to re-align the stack pointer,
-// but we would then have to make sure that we were in fact saving at least one
-// callee-save register in the prologue, which is additional complexity that
-// doesn't seem worth the benefit.
-static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
- bool HasCall) {
+Register
+AArch64FrameLowering::findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
+ bool HasCall) const {
MachineFunction *MF = MBB->getParent();
// If MBB is an entry block, use X9 as the scratch register
@@ -1193,13 +1114,14 @@ bool AArch64FrameLowering::canUseAsPrologue(
return true;
}
-static bool needsWinCFI(const MachineFunction &MF) {
+bool AArch64FrameLowering::needsWinCFI(const MachineFunction &MF) const {
const Function &F = MF.getFunction();
return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
F.needsUnwindTableEntry();
}
-static bool shouldSignReturnAddressEverywhere(const MachineFunction &MF) {
+bool AArch64FrameLowering::shouldSignReturnAddressEverywhere(
+ const MachineFunction &MF) const {
// FIXME: With WinCFI, extra care should be taken to place SEH_PACSignLR
// and SEH_EpilogEnd instructions in the correct order.
if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
@@ -1475,13 +1397,13 @@ static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
}
-bool requiresGetVGCall(MachineFunction &MF) {
- AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+bool AArch64FrameLowering::requiresGetVGCall(const MachineFunction &MF) const {
+ auto *AFI = MF.getInfo<AArch64FunctionInfo>();
return AFI->hasStreamingModeChanges() &&
!MF.getSubtarget<AArch64Subtarget>().hasSVE();
}
-static bool requiresSaveVG(const MachineFunction &MF) {
+bool AArch64FrameLowering::requiresSaveVG(const MachineFunction &MF) const {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
if (!AFI->needsDwarfUnwindInfo(MF) || !AFI->hasStreamingModeChanges())
return false;
@@ -1499,8 +1421,8 @@ static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO,
StringRef(TLI.getLibcallName(LC)) == MO.getSymbolName();
}
-bool isVGInstruction(MachineBasicBlock::iterator MBBI,
- const TargetLowering &TLI) {
+bool AArch64FrameLowering::isVGInstruction(MachineBasicBlock::iterator MBBI,
+ const TargetLowering &TLI) const {
unsigned Opc = MBBI->getOpcode();
if (Opc == AArch64::CNTD_XPiI)
return true;
@@ -1514,15 +1436,12 @@ bool isVGInstruction(MachineBasicBlock::iterator MBBI,
return Opc == TargetOpcode::COPY;
}
-// Convert callee-save register save/restore instruction to do stack pointer
-// decrement/increment to allocate/deallocate the callee-save stack area by
-// converting store/load to use pre/post increment version.
-static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
+MachineBasicBlock::iterator
+AArch64FrameLowering::convertCalleeSaveRestoreToSPPrePostIncDec(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
- MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup,
- int CFAOffset = 0) {
+ MachineInstr::MIFlag FrameFlag, int CFAOffset) const {
unsigned NewOpc;
// If the function contains streaming mode changes, we expect instructions
@@ -1643,12 +1562,9 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
return std::prev(MBB.erase(MBBI));
}
-// Fixup callee-save register save/restore instructions to take into account
-// combined SP bump by adding the local stack size to the stack offsets.
-static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
- uint64_t LocalStackSize,
- bool NeedsWinCFI,
- bool *HasWinCFI) {
+void AArch64FrameLowering::fixupCalleeSaveRestoreStackOffset(
+ MachineInstr &MI, uint64_t LocalStackSize, bool NeedsWinCFI,
+ bool *HasWinCFI) const {
if (AArch64InstrInfo::isSEHInstruction(MI))
return;
@@ -1703,7 +1619,8 @@ static unsigned getStackHazardSize(const MachineFunction &MF) {
}
// Convenience function to determine whether I is an SVE callee save.
-static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
+bool AArch64FrameLowering::isSVECalleeSave(
+ MachineBasicBlock::iterator I) const {
switch (I->getOpcode()) {
default:
return false;
@@ -1725,42 +1642,6 @@ static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
}
}
-static void emitShadowCallStackPrologue(const TargetInstrInfo &TII,
- MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, bool NeedsWinCFI,
- bool NeedsUnwindInfo) {
- // Shadow call stack prolog: str x30, [x18], #8
- BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXpost))
- .addReg(AArch64::X18, RegState::Define)
- .addReg(AArch64::LR)
- .addReg(AArch64::X18)
- .addImm(8)
- .setMIFlag(MachineInstr::FrameSetup);
-
- // This instruction also makes x18 live-in to the entry block.
- MBB.addLiveIn(AArch64::X18);
-
- if (NeedsWinCFI)
- BuildMI(MBB, MBBI, DL, TII.get(AArch64::SEH_Nop))
- .setMIFlag(MachineInstr::FrameSetup);
-
- if (NeedsUnwindInfo) {
- // Emit a CFI instruction that causes 8 to be subtracted from the value of
- // x18 when unwinding past this frame.
- static const char CFIInst[] = {
- dwarf::DW_CFA_val_expression,
- 18, // register
- 2, // length
- static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
- static_cast<char>(-8) & 0x7f, // addend (sleb128)
- };
- CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
- .buildEscape(StringRef(CFIInst, sizeof(CFIInst)));
- }
-}
-
static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII,
MachineFunction &MF,
MachineBasicBlock &MBB,
@@ -1783,36 +1664,6 @@ static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII,
.buildRestore(AArch64::X18);
}
-// Define the current CFA rule to use the provided FP.
-static void emitDefineCFAWithFP(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned FixedObject) {
- const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
- const AArch64RegisterInfo *TRI = STI.getRegisterInfo();
- AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
-
- const int OffsetToFirstCalleeSaveFromFP =
- AFI->getCalleeSaveBaseToFrameRecordOffset() -
- AFI->getCalleeSavedStackSize();
- Register FramePtr = TRI->getFrameRegister(MF);
- CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
- .buildDefCFA(FramePtr, FixedObject - OffsetToFirstCalleeSaveFromFP);
-}
-
-#ifndef NDEBUG
-/// Collect live registers from the end of \p MI's parent up to (including) \p
-/// MI in \p LiveRegs.
-static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI,
- LivePhysRegs &LiveRegs) {
-
- MachineBasicBlock &MBB = *MI.getParent();
- LiveRegs.addLiveOuts(MBB);
- for (const MachineInstr &MI :
- reverse(make_range(MI.getIterator(), MBB.instr_end())))
- LiveRegs.stepBackward(MI);
-}
-#endif
-
void AArch64FrameLowering::emitPacRetPlusLeafHardening(
MachineFunction &MF) const {
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
@@ -1848,616 +1699,8 @@ void AArch64FrameLowering::emitPacRetPlusLeafHardening(
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.begin();
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- const Function &F = MF.getFunction();
- const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
- const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
- const TargetInstrInfo *TII = Subtarget.getInstrInfo();
-
- AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- bool EmitCFI = AFI->needsDwarfUnwindInfo(MF);
- bool EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
- bool HasFP = hasFP(MF);
- bool NeedsWinCFI = needsWinCFI(MF);
- bool HasWinCFI = false;
- auto Cleanup = make_scope_exit([&]() { MF.setHasWinCFI(HasWinCFI); });
-
- MachineBasicBlock::iterator End = MBB.end();
-#ifndef NDEBUG
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- // Collect live register from the end of MBB up to the start of the existing
- // frame setup instructions.
- MachineBasicBlock::iterator NonFrameStart = MBB.begin();
- while (NonFrameStart != End &&
- NonFrameStart->getFlag(MachineInstr::FrameSetup))
- ++NonFrameStart;
-
- LivePhysRegs LiveRegs(*TRI);
- if (NonFrameStart != MBB.end()) {
- getLivePhysRegsUpTo(*NonFrameStart, *TRI, LiveRegs);
- // Ignore registers used for stack management for now.
- LiveRegs.removeReg(AArch64::SP);
- LiveRegs.removeReg(AArch64::X19);
- LiveRegs.removeReg(AArch64::FP);
- LiveRegs.removeReg(AArch64::LR);
-
- // X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
- // This is necessary to spill VG if required where SVE is unavailable, but
- // X0 is preserved around this call.
- if (requiresGetVGCall(MF))
- LiveRegs.removeReg(AArch64::X0);
- }
-
- auto VerifyClobberOnExit = make_scope_exit([&]() {
- if (NonFrameStart == MBB.end())
- return;
- // Check if any of the newly instructions clobber any of the live registers.
- for (MachineInstr &MI :
- make_range(MBB.instr_begin(), NonFrameStart->getIterator())) {
- for (auto &Op : MI.operands())
- if (Op.isReg() && Op.isDef())
- assert(!LiveRegs.contains(Op.getReg()) &&
- "live register clobbered by inserted prologue instructions");
- }
- });
-#endif
-
- bool IsFunclet = MBB.isEHFuncletEntry();
-
- // At this point, we're going to decide whether or not the function uses a
- // redzone. In most cases, the function doesn't have a redzone so let's
- // assume that's false and set it to true in the case that there's a redzone.
- AFI->setHasRedZone(false);
-
- // Debug location must be unknown since the first debug location is used
- // to determine the end of the prologue.
- DebugLoc DL;
-
- const auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
- if (MFnI.shouldSignReturnAddress(MF)) {
- // If pac-ret+leaf is in effect, PAUTH_PROLOGUE pseudo instructions
- // are inserted by emitPacRetPlusLeafHardening().
- if (!shouldSignReturnAddressEverywhere(MF)) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::PAUTH_PROLOGUE))
- .setMIFlag(MachineInstr::FrameSetup);
- }
- // AArch64PointerAuth pass will insert SEH_PACSignLR
- HasWinCFI |= NeedsWinCFI;
- }
-
- if (MFnI.needsShadowCallStackPrologueEpilogue(MF)) {
- emitShadowCallStackPrologue(*TII, MF, MBB, MBBI, DL, NeedsWinCFI,
- MFnI.needsDwarfUnwindInfo(MF));
- HasWinCFI |= NeedsWinCFI;
- }
-
- if (EmitCFI && MFnI.isMTETagged()) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITMTETAGGED))
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- // We signal the presence of a Swift extended frame to external tools by
- // storing FP with 0b0001 in bits 63:60. In normal userland operation a simple
- // ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
- // bits so that is still true.
- if (HasFP && AFI->hasSwiftAsyncContext()) {
- switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
- case SwiftAsyncFramePointerMode::DeploymentBased:
- if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
- // The special symbol below is absolute and has a *value* that can be
- // combined with the frame pointer to signal an extended frame.
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
- .addExternalSymbol("swift_async_extendedFramePointerFlags",
- AArch64II::MO_GOT);
- if (NeedsWinCFI) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
- .setMIFlags(MachineInstr::FrameSetup);
- HasWinCFI = true;
- }
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
- .addUse(AArch64::FP)
- .addUse(AArch64::X16)
- .addImm(Subtarget.isTargetILP32() ? 32 : 0);
- if (NeedsWinCFI) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
- .setMIFlags(MachineInstr::FrameSetup);
- HasWinCFI = true;
- }
- break;
- }
- [[fallthrough]];
-
- case SwiftAsyncFramePointerMode::Always:
- // ORR x29, x29, #0x1000_0000_0000_0000
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
- .addUse(AArch64::FP)
- .addImm(0x1100)
- .setMIFlag(MachineInstr::FrameSetup);
- if (NeedsWinCFI) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
- .setMIFlags(MachineInstr::FrameSetup);
- HasWinCFI = true;
- }
- break;
-
- case SwiftAsyncFramePointerMode::Never:
- break;
- }
- }
-
- // All calls are tail calls in GHC calling conv, and functions have no
- // prologue/epilogue.
- if (MF.getFunction().getCallingConv() == CallingConv::GHC)
- return;
-
- // Set tagged base pointer to the requested stack slot.
- // Ideally it should match SP value after prologue.
- std::optional<int> TBPI = AFI->getTaggedBasePointerIndex();
- if (TBPI)
- AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
- else
- AFI->setTaggedBasePointerOffset(MFI.getStackSize());
-
- const StackOffset &SVEStackSize = getSVEStackSize(MF);
-
- // getStackSize() includes all the locals in its size calculation. We don't
- // include these locals when computing the stack size of a funclet, as they
- // are allocated in the parent's stack frame and accessed via the frame
- // pointer from the funclet. We only save the callee saved registers in the
- // funclet, which are really the callee saved registers of the parent
- // function, including the funclet.
- int64_t NumBytes =
- IsFunclet ? getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
- if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
- assert(!HasFP && "unexpected function without stack frame but with FP");
- assert(!SVEStackSize &&
- "unexpected function without stack frame but with SVE objects");
- // All of the stack allocation is for locals.
- AFI->setLocalStackSize(NumBytes);
- if (!NumBytes) {
- if (NeedsWinCFI && HasWinCFI) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
- .setMIFlag(MachineInstr::FrameSetup);
- }
- return;
- }
- // REDZONE: If the stack size is less than 128 bytes, we don't need
- // to actually allocate.
- if (canUseRedZone(MF)) {
- AFI->setHasRedZone(true);
- ++NumRedZoneFunctions;
- } else {
- emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(-NumBytes), TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
- if (EmitCFI) {
- // Label used to tie together the PROLOG_LABEL and the MachineMoves.
- MCSymbol *FrameLabel = MF.getContext().createTempSymbol();
- // Encode the stack size of the leaf function.
- CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
- .buildDefCFAOffset(NumBytes, FrameLabel);
- }
- }
-
- if (NeedsWinCFI) {
- HasWinCFI = true;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- return;
- }
-
- bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
- unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
-
- // Windows unwind can't represent the required stack adjustments if we have
- // both SVE callee-saves and dynamic stack allocations, and the frame
- // pointer is before the SVE spills. The allocation of the frame pointer
- // must be the last instruction in the prologue so the unwinder can restore
- // the stack pointer correctly. (And there isn't any unwind opcode for
- // `addvl sp, x29, -17`.)
- //
- // Because of this, we do spills in the opposite order on Windows: first SVE,
- // then GPRs. The main side-effect of this is that it makes accessing
- // parameters passed on the stack more expensive.
- //
- // We could consider rearranging the spills for simpler cases.
- bool FPAfterSVECalleeSaves =
- Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();
-
- if (FPAfterSVECalleeSaves && AFI->hasStackHazardSlotIndex())
- reportFatalUsageError("SME hazard padding is not supported on Windows");
-
- auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
- // All of the remaining stack allocations are for locals.
- AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
- bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
- bool HomPrologEpilog = homogeneousPrologEpilog(MF);
- if (FPAfterSVECalleeSaves) {
- // If we're doing SVE saves first, we need to immediately allocate space
- // for fixed objects, then space for the SVE callee saves.
- //
- // Windows unwind requires that the scalable size is a multiple of 16;
- // that's handled when the callee-saved size is computed.
- auto SaveSize =
- StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()) +
- StackOffset::getFixed(FixedObject);
- allocateStackSpace(MBB, MBBI, 0, SaveSize, NeedsWinCFI, &HasWinCFI,
- /*EmitCFI=*/false, StackOffset{},
- /*FollowupAllocs=*/true);
- NumBytes -= FixedObject;
-
- // Now allocate space for the GPR callee saves.
- while (MBBI != End && IsSVECalleeSave(MBBI))
- ++MBBI;
- MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
- MBB, MBBI, DL, TII, -AFI->getCalleeSavedStackSize(), NeedsWinCFI,
- &HasWinCFI, EmitAsyncCFI);
- NumBytes -= AFI->getCalleeSavedStackSize();
- } else if (CombineSPBump) {
- assert(!SVEStackSize && "Cannot combine SP bump with SVE");
- emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(-NumBytes), TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
- EmitAsyncCFI);
- NumBytes = 0;
- } else if (HomPrologEpilog) {
- // Stack has been already adjusted.
- NumBytes -= PrologueSaveSize;
- } else if (PrologueSaveSize != 0) {
- MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
- MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI,
- EmitAsyncCFI);
- NumBytes -= PrologueSaveSize;
- }
- assert(NumBytes >= 0 && "Negative stack allocation size!?");
-
- // Move past the saves of the callee-saved registers, fixing up the offsets
- // and pre-inc if we decided to combine the callee-save and local stack
- // pointer bump above.
- auto &TLI = *MF.getSubtarget().getTargetLowering();
- while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
- !IsSVECalleeSave(MBBI)) {
- if (CombineSPBump &&
- // Only fix-up frame-setup load/store instructions.
- (!requiresSaveVG(MF) || !isVGInstruction(MBBI, TLI)))
- fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
- NeedsWinCFI, &HasWinCFI);
- ++MBBI;
- }
-
- // For funclets the FP belongs to the containing function.
- if (!IsFunclet && HasFP) {
- // Only set up FP if we actually need to.
- int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
-
- if (CombineSPBump)
- FPOffset += AFI->getLocalStackSize();
-
- if (AFI->hasSwiftAsyncContext()) {
- // Before we update the live FP we have to ensure there's a valid (or
- // null) asynchronous context in its slot just before FP in the frame
- // record, so store it now.
- const auto &Attrs = MF.getFunction().getAttributes();
- bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
- if (HaveInitialContext)
- MBB.addLiveIn(AArch64::X22);
- Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
- .addUse(Reg)
- .addUse(AArch64::SP)
- .addImm(FPOffset - 8)
- .setMIFlags(MachineInstr::FrameSetup);
- if (NeedsWinCFI) {
- // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
- // to multiple instructions, should be mutually-exclusive.
- assert(Subtarget.getTargetTriple().getArchName() != "arm64e");
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
- .setMIFlags(MachineInstr::FrameSetup);
- HasWinCFI = true;
- }
- }
-
- if (HomPrologEpilog) {
- auto Prolog = MBBI;
- --Prolog;
- assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
- Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
- } else {
- // Issue sub fp, sp, FPOffset or
- // mov fp,sp when FPOffset is zero.
- // Note: All stores of callee-saved registers are marked as "FrameSetup".
- // This code marks the instruction(s) that set the FP also.
- emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
- StackOffset::getFixed(FPOffset), TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
- if (NeedsWinCFI && HasWinCFI) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
- .setMIFlag(MachineInstr::FrameSetup);
- // After setting up the FP, the rest of the prolog doesn't need to be
- // included in the SEH unwind info.
- NeedsWinCFI = false;
- }
- }
- if (EmitAsyncCFI)
- emitDefineCFAWithFP(MF, MBB, MBBI, FixedObject);
- }
-
- // Now emit the moves for whatever callee saved regs we have (including FP,
- // LR if those are saved). Frame instructions for SVE register are emitted
- // later, after the instruction which actually save SVE regs.
- if (EmitAsyncCFI)
- emitCalleeSavedGPRLocations(MBB, MBBI);
-
- // Alignment is required for the parent frame, not the funclet
- const bool NeedsRealignment =
- NumBytes && !IsFunclet && RegInfo->hasStackRealignment(MF);
- const int64_t RealignmentPadding =
- (NeedsRealignment && MFI.getMaxAlign() > Align(16))
- ? MFI.getMaxAlign().value() - 16
- : 0;
-
- if (windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding)) {
- if (AFI->getSVECalleeSavedStackSize())
- report_fatal_error(
- "SVE callee saves not yet supported with stack probing");
-
- // Find an available register to spill the value of X15 to, if X15 is being
- // used already for nest.
- unsigned X15Scratch = AArch64::NoRegister;
- const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
- if (llvm::any_of(MBB.liveins(),
- [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
- return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
- AArch64::X15, LiveIn.PhysReg);
- })) {
- X15Scratch = findScratchNonCalleeSaveRegister(&MBB, true);
- assert(X15Scratch != AArch64::NoRegister &&
- (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17));
-#ifndef NDEBUG
- LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it
-#endif
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), X15Scratch)
- .addReg(AArch64::XZR)
- .addReg(AArch64::X15, RegState::Undef)
- .addReg(AArch64::X15, RegState::Implicit)
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
- if (NeedsWinCFI) {
- HasWinCFI = true;
- // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
- // exceed this amount. We need to move at most 2^24 - 1 into x15.
- // This is at most two instructions, MOVZ followed by MOVK.
- // TODO: Fix to use multiple stack alloc unwind codes for stacks
- // exceeding 256MB in size.
- if (NumBytes >= (1 << 28))
- report_fatal_error("Stack size cannot exceed 256MB for stack "
- "unwinding purposes");
-
- uint32_t LowNumWords = NumWords & 0xFFFF;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
- .addImm(LowNumWords)
- .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
- .setMIFlag(MachineInstr::FrameSetup);
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
- .setMIFlag(MachineInstr::FrameSetup);
- if ((NumWords & 0xFFFF0000) != 0) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
- .addReg(AArch64::X15)
- .addImm((NumWords & 0xFFFF0000) >> 16) // High half
- .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16))
- .setMIFlag(MachineInstr::FrameSetup);
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
- .setMIFlag(MachineInstr::FrameSetup);
- }
- } else {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
- .addImm(NumWords)
- .setMIFlags(MachineInstr::FrameSetup);
- }
-
- const char *ChkStk = Subtarget.getChkStkName();
- switch (MF.getTarget().getCodeModel()) {
- case CodeModel::Tiny:
- case CodeModel::Small:
- case CodeModel::Medium:
- case CodeModel::Kernel:
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
- .addExternalSymbol(ChkStk)
- .addReg(AArch64::X15, RegState::Implicit)
- .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
- .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
- .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
- .setMIFlags(MachineInstr::FrameSetup);
- if (NeedsWinCFI) {
- HasWinCFI = true;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
- .setMIFlag(MachineInstr::FrameSetup);
- }
- break;
- case CodeModel::Large:
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
- .addReg(AArch64::X16, RegState::Define)
- .addExternalSymbol(ChkStk)
- .addExternalSymbol(ChkStk)
- .setMIFlags(MachineInstr::FrameSetup);
- if (NeedsWinCFI) {
- HasWinCFI = true;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- BuildMI(MBB, MBBI, DL, TII->get(getBLRCallOpcode(MF)))
- .addReg(AArch64::X16, RegState::Kill)
- .addReg(AArch64::X15, RegState::Implicit | RegState::Define)
- .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
- .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
- .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
- .setMIFlags(MachineInstr::FrameSetup);
- if (NeedsWinCFI) {
- HasWinCFI = true;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
- .setMIFlag(MachineInstr::FrameSetup);
- }
- break;
- }
-
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
- .addReg(AArch64::SP, RegState::Kill)
- .addReg(AArch64::X15, RegState::Kill)
- .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
- .setMIFlags(MachineInstr::FrameSetup);
- if (NeedsWinCFI) {
- HasWinCFI = true;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
- .addImm(NumBytes)
- .setMIFlag(MachineInstr::FrameSetup);
- }
- NumBytes = 0;
-
- if (RealignmentPadding > 0) {
- if (RealignmentPadding >= 4096) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm))
- .addReg(AArch64::X16, RegState::Define)
- .addImm(RealignmentPadding)
- .setMIFlags(MachineInstr::FrameSetup);
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15)
- .addReg(AArch64::SP)
- .addReg(AArch64::X16, RegState::Kill)
- .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
- .setMIFlag(MachineInstr::FrameSetup);
- } else {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
- .addReg(AArch64::SP)
- .addImm(RealignmentPadding)
- .addImm(0)
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
- .addReg(AArch64::X15, RegState::Kill)
- .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64));
- AFI->setStackRealigned(true);
-
- // No need for SEH instructions here; if we're realigning the stack,
- // we've set a frame pointer and already finished the SEH prologue.
- assert(!NeedsWinCFI);
- }
- if (X15Scratch != AArch64::NoRegister) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), AArch64::X15)
- .addReg(AArch64::XZR)
- .addReg(X15Scratch, RegState::Undef)
- .addReg(X15Scratch, RegState::Implicit)
- .setMIFlag(MachineInstr::FrameSetup);
- }
- }
-
- StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
- MachineBasicBlock::iterator CalleeSavesEnd = MBBI;
-
- StackOffset CFAOffset =
- StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
-
- // Process the SVE callee-saves to determine what space needs to be
- // allocated.
- if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
- LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize
- << "\n");
- SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
- SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
- // Find callee save instructions in frame.
- // Note: With FPAfterSVECalleeSaves the callee saves have already been
- // allocated.
- if (!FPAfterSVECalleeSaves) {
- MachineBasicBlock::iterator CalleeSavesBegin = MBBI;
- assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
- while (IsSVECalleeSave(MBBI) && MBBI != MBB.getFirstTerminator())
- ++MBBI;
- CalleeSavesEnd = MBBI;
-
- StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
- // Allocate space for the callee saves (if any).
- allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false,
- nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
- MFI.hasVarSizedObjects() || LocalsSize);
- }
- }
- CFAOffset += SVECalleeSavesSize;
-
- if (EmitAsyncCFI)
- emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);
-
- // Allocate space for the rest of the frame including SVE locals. Align the
- // stack as necessary.
- assert(!(canUseRedZone(MF) && NeedsRealignment) &&
- "Cannot use redzone with stack realignment");
- if (!canUseRedZone(MF)) {
- // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
- // the correct value here, as NumBytes also includes padding bytes,
- // which shouldn't be counted here.
- allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding,
- SVELocalsSize + StackOffset::getFixed(NumBytes),
- NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
- CFAOffset, MFI.hasVarSizedObjects());
- }
-
- // If we need a base pointer, set it up here. It's whatever the value of the
- // stack pointer is at this point. Any variable size objects will be allocated
- // after this, so we can still use the base pointer to reference locals.
- //
- // FIXME: Clarify FrameSetup flags here.
- // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
- // needed.
- // For funclets the BP belongs to the containing function.
- if (!IsFunclet && RegInfo->hasBasePointer(MF)) {
- TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
- false);
- if (NeedsWinCFI) {
- HasWinCFI = true;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
- .setMIFlag(MachineInstr::FrameSetup);
- }
- }
-
- // The very last FrameSetup instruction indicates the end of prologue. Emit a
- // SEH opcode indicating the prologue end.
- if (NeedsWinCFI && HasWinCFI) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- // SEH funclets are passed the frame pointer in X1. If the parent
- // function uses the base register, then the base register is used
- // directly, and is not retrieved from X1.
- if (IsFunclet && F.hasPersonalityFn()) {
- EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
- if (isAsynchronousEHPersonality(Per)) {
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP)
- .addReg(AArch64::X1)
- .setMIFlag(MachineInstr::FrameSetup);
- MBB.addLiveIn(AArch64::X1);
- }
- }
-
- if (EmitCFI && !EmitAsyncCFI) {
- if (HasFP) {
- emitDefineCFAWithFP(MF, MBB, MBBI, FixedObject);
- } else {
- StackOffset TotalSize =
- SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
- CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
- CFIBuilder.insertCFIInst(
- createDefCFA(*RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
- TotalSize, /*LastAdjustmentWasScalable=*/false));
- }
- emitCalleeSavedGPRLocations(MBB, MBBI);
- emitCalleeSavedSVELocations(MBB, MBBI);
- }
+ AArch64PrologueEmitter PrologueEmitter(MF, MBB, *this);
+ PrologueEmitter.emitPrologue();
}
static bool isFuncletReturnInstr(const MachineInstr &MI) {
@@ -2607,7 +1850,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
while (FirstGPRRestoreI != Begin) {
--FirstGPRRestoreI;
if (!FirstGPRRestoreI->getFlag(MachineInstr::FrameDestroy) ||
- (!FPAfterSVECalleeSaves && IsSVECalleeSave(FirstGPRRestoreI))) {
+ (!FPAfterSVECalleeSaves && isSVECalleeSave(FirstGPRRestoreI))) {
++FirstGPRRestoreI;
break;
} else if (CombineSPBump)
@@ -2689,11 +1932,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
RestoreBegin = std::prev(RestoreEnd);
while (RestoreBegin != MBB.begin() &&
- IsSVECalleeSave(std::prev(RestoreBegin)))
+ isSVECalleeSave(std::prev(RestoreBegin)))
--RestoreBegin;
- assert(IsSVECalleeSave(RestoreBegin) &&
- IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
+ assert(isSVECalleeSave(RestoreBegin) &&
+ isSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
StackOffset CalleeSavedSizeAsOffset =
StackOffset::getScalable(CalleeSavedSize);
@@ -2927,8 +2170,8 @@ AArch64FrameLowering::getNonLocalFrameIndexReference(const MachineFunction &MF,
return StackOffset::getFixed(getSEHFrameIndexOffset(MF, FI));
}
-static StackOffset getFPOffset(const MachineFunction &MF,
- int64_t ObjectOffset) {
+StackOffset AArch64FrameLowering::getFPOffset(const MachineFunction &MF,
+ int64_t ObjectOffset) const {
const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const Function &F = MF.getFunction();
@@ -2941,8 +2184,8 @@ static StackOffset getFPOffset(const MachineFunction &MF,
return StackOffset::getFixed(ObjectOffset + FixedObject + FPAdjust);
}
-static StackOffset getStackOffset(const MachineFunction &MF,
- int64_t ObjectOffset) {
+StackOffset AArch64FrameLowering::getStackOffset(const MachineFunction &MF,
+ int64_t ObjectOffset) const {
const auto &MFI = MF.getFrameInfo();
return StackOffset::getFixed(ObjectOffset + (int64_t)MFI.getStackSize());
}
@@ -3140,7 +2383,8 @@ static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
return getKillRegState(!IsLiveIn);
}
-static bool produceCompactUnwindFrame(MachineFunction &MF) {
+static bool produceCompactUnwindFrame(const AArch64FrameLowering &AFL,
+ MachineFunction &MF) {
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
AttributeList Attrs = MF.getFunction().getAttributes();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
@@ -3148,7 +2392,7 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) {
!(Subtarget.getTargetLowering()->supportSwiftError() &&
Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&
MF.getFunction().getCallingConv() != CallingConv::SwiftTail &&
- !requiresSaveVG(MF) && !AFI->isSVECC();
+ !AFL.requiresSaveVG(MF) && !AFI->isSVECC();
}
static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
@@ -3245,16 +2489,18 @@ bool enableMultiVectorSpillFill(const AArch64Subtarget &Subtarget,
(!IsLocallyStreaming && Subtarget.isStreaming()));
}
-static void computeCalleeSaveRegisterPairs(
- MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI,
- const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
- bool NeedsFrameRecord) {
+void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
+ MachineFunction &MF,
+ ArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI,
+ SmallVectorImpl<RegPairInfo> &RegPairs,
+ bool NeedsFrameRecord) {
if (CSI.empty())
return;
bool IsWindows = isTargetWindows(MF);
- bool NeedsWinCFI = needsWinCFI(MF);
+ bool NeedsWinCFI = AFL.needsWinCFI(MF);
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
unsigned StackHazardSize = getStackHazardSize(MF);
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -3263,9 +2509,10 @@ static void computeCalleeSaveRegisterPairs(
(void)CC;
// MachO's compact unwind format relies on all registers being stored in
// pairs.
- assert((!produceCompactUnwindFrame(MF) || CC == CallingConv::PreserveMost ||
- CC == CallingConv::PreserveAll || CC == CallingConv::CXX_FAST_TLS ||
- CC == CallingConv::Win64 || (Count & 1) == 0) &&
+ assert((!produceCompactUnwindFrame(AFL, MF) ||
+ CC == CallingConv::PreserveMost || CC == CallingConv::PreserveAll ||
+ CC == CallingConv::CXX_FAST_TLS || CC == CallingConv::Win64 ||
+ (Count & 1) == 0) &&
"Odd number of callee-saved regs to spill!");
int ByteOffset = AFI->getCalleeSavedStackSize();
int StackFillDir = -1;
@@ -3381,9 +2628,9 @@ static void computeCalleeSaveRegisterPairs(
// MachO's compact unwind format relies on all registers being stored in
// adjacent register pairs.
- assert((!produceCompactUnwindFrame(MF) || CC == CallingConv::PreserveMost ||
- CC == CallingConv::PreserveAll || CC == CallingConv::CXX_FAST_TLS ||
- CC == CallingConv::Win64 ||
+ assert((!produceCompactUnwindFrame(AFL, MF) ||
+ CC == CallingConv::PreserveMost || CC == CallingConv::PreserveAll ||
+ CC == CallingConv::CXX_FAST_TLS || CC == CallingConv::Win64 ||
(RPI.isPaired() &&
((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
RPI.Reg1 + 1 == RPI.Reg2))) &&
@@ -3496,7 +2743,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
DebugLoc DL;
SmallVector<RegPairInfo, 8> RegPairs;
- computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF));
+ computeCalleeSaveRegisterPairs(*this, MF, CSI, TRI, RegPairs, hasFP(MF));
MachineRegisterInfo &MRI = MF.getRegInfo();
// Refresh the reserved regs in case there are any potential changes since the
@@ -3708,7 +2955,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
if (MBBI != MBB.end())
DL = MBBI->getDebugLoc();
- computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF));
+ computeCalleeSaveRegisterPairs(*this, MF, CSI, TRI, RegPairs, hasFP(MF));
if (homogeneousPrologEpilog(MF, &MBB)) {
auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog))
.setMIFlag(MachineInstr::FrameDestroy);
@@ -4142,7 +3389,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
if (producePairRegisters(MF)) {
if (UnspilledCSGPRPaired == AArch64::NoRegister) {
// Failed to make a pair for compact unwind format, revert spilling.
- if (produceCompactUnwindFrame(MF)) {
+ if (produceCompactUnwindFrame(*this, MF)) {
SavedRegs.reset(UnspilledCSGPR);
ExtraCSSpill = AArch64::NoRegister;
}
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 555a93359c274..a9d65441a4e30 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -19,6 +19,10 @@
namespace llvm {
+class TargetLowering;
+class AArch64FunctionInfo;
+class AArch64PrologueEmitter;
+
class AArch64FrameLowering : public TargetFrameLowering {
public:
explicit AArch64FrameLowering()
@@ -130,12 +134,19 @@ class AArch64FrameLowering : public TargetFrameLowering {
return StackId != TargetStackID::ScalableVector;
}
+ friend class AArch64PrologueEmitter;
void
orderFrameObjects(const MachineFunction &MF,
SmallVectorImpl<int> &ObjectsToAllocate) const override;
bool isFPReserved(const MachineFunction &MF) const;
+ bool needsWinCFI(const MachineFunction &MF) const;
+
+ bool requiresSaveVG(const MachineFunction &MF) const;
+
+ StackOffset getSVEStackSize(const MachineFunction &MF) const;
+
protected:
bool hasFPImpl(const MachineFunction &MF) const override;
@@ -159,10 +170,6 @@ class AArch64FrameLowering : public TargetFrameLowering {
int &MaxCSFrameIndex) const;
bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB,
uint64_t StackBumpBytes) const;
- void emitCalleeSavedGPRLocations(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI) const;
- void emitCalleeSavedSVELocations(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI) const;
void emitCalleeSavedGPRRestores(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const;
void emitCalleeSavedSVERestores(MachineBasicBlock &MBB,
@@ -196,6 +203,61 @@ class AArch64FrameLowering : public TargetFrameLowering {
void emitRemarks(const MachineFunction &MF,
MachineOptimizationRemarkEmitter *ORE) const override;
+
+ bool windowsRequiresStackProbe(const MachineFunction &MF,
+ uint64_t StackSizeInBytes) const;
+
+ bool shouldSignReturnAddressEverywhere(const MachineFunction &MF) const;
+
+ StackOffset getFPOffset(const MachineFunction &MF,
+ int64_t ObjectOffset) const;
+
+ StackOffset getStackOffset(const MachineFunction &MF,
+ int64_t ObjectOffset) const;
+
+ // Find a scratch register that we can use at the start of the prologue to
+ // re-align the stack pointer. We avoid using callee-save registers since
+ // they may appear to be free when this is called from canUseAsPrologue
+ // (during shrink wrapping), but then no longer be free when this is called
+ // from emitPrologue.
+ //
+ // FIXME: This is a bit conservative, since in the above case we could use one
+ // of the callee-save registers as a scratch temp to re-align the stack
+ // pointer, but we would then have to make sure that we were in fact saving at
+ // least one callee-save register in the prologue, which is additional
+ // complexity that doesn't seem worth the benefit.
+ Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
+ bool HasCall = false) const;
+
+ // Convert callee-save register save/restore instruction to do stack pointer
+ // decrement/increment to allocate/deallocate the callee-save stack area by
+ // converting store/load to use pre/post increment version.
+ MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
+ bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
+ MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup,
+ int CFAOffset = 0) const;
+
+ // Fixup callee-save register save/restore instructions to take into account
+ // combined SP bump by adding the local stack size to the stack offsets.
+ void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
+ uint64_t LocalStackSize,
+ bool NeedsWinCFI,
+ bool *HasWinCFI) const;
+
+ bool isSVECalleeSave(MachineBasicBlock::iterator I) const;
+
+ /// Returns the size of the fixed object area (allocated next to sp on entry)
+ /// On Win64 this may include a var args area and an UnwindHelp object for EH.
+ unsigned getFixedObjectSize(const MachineFunction &MF,
+ const AArch64FunctionInfo *AFI, bool IsWin64,
+ bool IsFunclet) const;
+
+ bool isVGInstruction(MachineBasicBlock::iterator MBBI,
+ const TargetLowering &TLI) const;
+
+ bool requiresGetVGCall(const MachineFunction &MF) const;
};
} // End llvm namespace
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
new file mode 100644
index 0000000000000..af424987b8ddb
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
@@ -0,0 +1,794 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64PrologueEpilogue.h"
+#include "AArch64FrameLowering.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/CFIInstBuilder.h"
+#include "llvm/MC/MCContext.h"
+
+#define DEBUG_TYPE "frame-info"
+
+STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
+
+namespace llvm {
+
+AArch64PrologueEmitter::AArch64PrologueEmitter(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ const AArch64FrameLowering &AFL)
+ : MF(MF), MBB(MBB), F(MF.getFunction()), MFI(MF.getFrameInfo()),
+ Subtarget(MF.getSubtarget<AArch64Subtarget>()), AFL(AFL),
+ RegInfo(*Subtarget.getRegisterInfo()) {
+ TII = Subtarget.getInstrInfo();
+ AFI = MF.getInfo<AArch64FunctionInfo>();
+
+ EmitCFI = AFI->needsDwarfUnwindInfo(MF);
+ EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
+ HasFP = AFL.hasFP(MF);
+ NeedsWinCFI = AFL.needsWinCFI(MF);
+ IsFunclet = MBB.isEHFuncletEntry();
+ HomPrologEpilog = AFL.homogeneousPrologEpilog(MF);
+
+#ifndef NDEBUG
+ collectBlockLiveins();
+#endif
+}
+
+#ifndef NDEBUG
+/// Collect live registers from the end of \p MI's parent up to (including) \p
+/// MI in \p LiveRegs.
+static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI,
+ LivePhysRegs &LiveRegs) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ LiveRegs.addLiveOuts(MBB);
+ for (const MachineInstr &MI :
+ reverse(make_range(MI.getIterator(), MBB.instr_end())))
+ LiveRegs.stepBackward(MI);
+}
+
+void AArch64PrologueEmitter::collectBlockLiveins() {
+ // Collect live register from the end of MBB up to the start of the existing
+ // frame setup instructions.
+ PrologueEndI = MBB.begin();
+ while (PrologueEndI != MBB.end() &&
+ PrologueEndI->getFlag(MachineInstr::FrameSetup))
+ ++PrologueEndI;
+
+ if (PrologueEndI != MBB.end()) {
+ getLivePhysRegsUpTo(*PrologueEndI, RegInfo, LiveRegs);
+ // Ignore registers used for stack management for now.
+ LiveRegs.removeReg(AArch64::SP);
+ LiveRegs.removeReg(AArch64::X19);
+ LiveRegs.removeReg(AArch64::FP);
+ LiveRegs.removeReg(AArch64::LR);
+
+ // X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
+ // This is necessary to spill VG if required where SVE is unavailable, but
+ // X0 is preserved around this call.
+ if (AFL.requiresGetVGCall(MF))
+ LiveRegs.removeReg(AArch64::X0);
+ }
+}
+
+void AArch64PrologueEmitter::verifyPrologueClobbers() const {
+ if (PrologueEndI == MBB.end())
+ return;
+ // Check if any of the newly instructions clobber any of the live registers.
+ for (MachineInstr &MI :
+ make_range(MBB.instr_begin(), PrologueEndI->getIterator())) {
+ for (auto &Op : MI.operands())
+ if (Op.isReg() && Op.isDef())
+ assert(!LiveRegs.contains(Op.getReg()) &&
+ "live register clobbered by inserted prologue instructions");
+ }
+}
+#endif
+
+void AArch64PrologueEmitter::determineLocalsStackSize(
+ uint64_t StackSize, uint64_t PrologueSaveSize) {
+ AFI->setLocalStackSize(StackSize - PrologueSaveSize);
+ CombineSPBump = AFL.shouldCombineCSRLocalStackBump(MF, StackSize);
+}
+
+void AArch64PrologueEmitter::emitPrologue() {
+ const MachineBasicBlock::iterator PrologueBeginI = MBB.begin();
+ const MachineBasicBlock::iterator EndI = MBB.end();
+
+ // At this point, we're going to decide whether or not the function uses a
+ // redzone. In most cases, the function doesn't have a redzone so let's
+ // assume that's false and set it to true in the case that there's a redzone.
+ AFI->setHasRedZone(false);
+
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc DL;
+
+ if (AFI->shouldSignReturnAddress(MF)) {
+ // If pac-ret+leaf is in effect, PAUTH_PROLOGUE pseudo instructions
+ // are inserted by emitPacRetPlusLeafHardening().
+ if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
+ BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::PAUTH_PROLOGUE))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ // AArch64PointerAuth pass will insert SEH_PACSignLR
+ HasWinCFI |= NeedsWinCFI;
+ }
+
+ if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
+ emitShadowCallStackPrologue(PrologueBeginI, DL);
+ HasWinCFI |= NeedsWinCFI;
+ }
+
+ if (EmitCFI && AFI->isMTETagged())
+ BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::EMITMTETAGGED))
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // We signal the presence of a Swift extended frame to external tools by
+ // storing FP with 0b0001 in bits 63:60. In normal userland operation a simple
+ // ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
+ // bits so that is still true.
+ if (HasFP && AFI->hasSwiftAsyncContext())
+ emitSwiftAsyncContextFramePointer(PrologueBeginI, DL);
+
+ // All calls are tail calls in GHC calling conv, and functions have no
+ // prologue/epilogue.
+ if (MF.getFunction().getCallingConv() == CallingConv::GHC)
+ return;
+
+ // Set tagged base pointer to the requested stack slot. Ideally it should
+ // match SP value after prologue.
+ if (std::optional<int> TBPI = AFI->getTaggedBasePointerIndex())
+ AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
+ else
+ AFI->setTaggedBasePointerOffset(MFI.getStackSize());
+
+ // getStackSize() includes all the locals in its size calculation. We don't
+ // include these locals when computing the stack size of a funclet, as they
+ // are allocated in the parent's stack frame and accessed via the frame
+ // pointer from the funclet. We only save the callee saved registers in the
+ // funclet, which are really the callee saved registers of the parent
+ // function, including the funclet.
+ int64_t NumBytes =
+ IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
+ if (!AFI->hasStackFrame() && !AFL.windowsRequiresStackProbe(MF, NumBytes))
+ return emitEmptyStackFramePrologue(NumBytes, PrologueBeginI, DL);
+
+ bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
+ unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
+
+ // Windows unwind can't represent the required stack adjustments if we have
+ // both SVE callee-saves and dynamic stack allocations, and the frame
+ // pointer is before the SVE spills. The allocation of the frame pointer
+ // must be the last instruction in the prologue so the unwinder can restore
+ // the stack pointer correctly. (And there isn't any unwind opcode for
+ // `addvl sp, x29, -17`.)
+ //
+ // Because of this, we do spills in the opposite order on Windows: first SVE,
+ // then GPRs. The main side-effect of this is that it makes accessing
+ // parameters passed on the stack more expensive.
+ //
+ // We could consider rearranging the spills for simpler cases.
+ bool FPAfterSVECalleeSaves =
+ Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();
+
+ if (FPAfterSVECalleeSaves && AFI->hasStackHazardSlotIndex())
+ reportFatalUsageError("SME hazard padding is not supported on Windows");
+
+ auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
+ // All of the remaining stack allocations are for locals.
+ determineLocalsStackSize(NumBytes, PrologueSaveSize);
+
+ MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI;
+ if (FPAfterSVECalleeSaves) {
+ // If we're doing SVE saves first, we need to immediately allocate space
+ // for fixed objects, then space for the SVE callee saves.
+ //
+ // Windows unwind requires that the scalable size is a multiple of 16;
+ // that's handled when the callee-saved size is computed.
+ auto SaveSize =
+ StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()) +
+ StackOffset::getFixed(FixedObject);
+ AFL.allocateStackSpace(MBB, PrologueBeginI, 0, SaveSize, NeedsWinCFI,
+ &HasWinCFI,
+ /*EmitCFI=*/false, StackOffset{},
+ /*FollowupAllocs=*/true);
+ NumBytes -= FixedObject;
+
+ // Now allocate space for the GPR callee saves.
+ MachineBasicBlock::iterator MBBI = PrologueBeginI;
+ while (MBBI != EndI && AFL.isSVECalleeSave(MBBI))
+ ++MBBI;
+ FirstGPRSaveI = AFL.convertCalleeSaveRestoreToSPPrePostIncDec(
+ MBB, MBBI, DL, TII, -AFI->getCalleeSavedStackSize(), NeedsWinCFI,
+ &HasWinCFI, EmitAsyncCFI);
+ NumBytes -= AFI->getCalleeSavedStackSize();
+ } else if (CombineSPBump) {
+ assert(!AFL.getSVEStackSize(MF) && "Cannot combine SP bump with SVE");
+ emitFrameOffset(MBB, PrologueBeginI, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(-NumBytes), TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
+ EmitAsyncCFI);
+ NumBytes = 0;
+ } else if (HomPrologEpilog) {
+ // Stack has been already adjusted.
+ NumBytes -= PrologueSaveSize;
+ } else if (PrologueSaveSize != 0) {
+ FirstGPRSaveI = AFL.convertCalleeSaveRestoreToSPPrePostIncDec(
+ MBB, PrologueBeginI, DL, TII, -PrologueSaveSize, NeedsWinCFI,
+ &HasWinCFI, EmitAsyncCFI);
+ NumBytes -= PrologueSaveSize;
+ }
+ assert(NumBytes >= 0 && "Negative stack allocation size!?");
+
+ // Move past the saves of the callee-saved registers, fixing up the offsets
+ // and pre-inc if we decided to combine the callee-save and local stack
+ // pointer bump above.
+ auto &TLI = *MF.getSubtarget().getTargetLowering();
+
+ MachineBasicBlock::iterator AfterGPRSavesI = FirstGPRSaveI;
+ while (AfterGPRSavesI != EndI &&
+ AfterGPRSavesI->getFlag(MachineInstr::FrameSetup) &&
+ !AFL.isSVECalleeSave(AfterGPRSavesI)) {
+ if (CombineSPBump &&
+ // Only fix-up frame-setup load/store instructions.
+ (!AFL.requiresSaveVG(MF) || !AFL.isVGInstruction(AfterGPRSavesI, TLI)))
+ AFL.fixupCalleeSaveRestoreStackOffset(
+ *AfterGPRSavesI, AFI->getLocalStackSize(), NeedsWinCFI, &HasWinCFI);
+ ++AfterGPRSavesI;
+ }
+
+ // For funclets the FP belongs to the containing function. Only set up FP if
+ // we actually need to.
+ if (!IsFunclet && HasFP)
+ emitFramePointerSetup(AfterGPRSavesI, DL, FixedObject);
+
+ // Now emit the moves for whatever callee saved regs we have (including FP,
+ // LR if those are saved). Frame instructions for SVE register are emitted
+ // later, after the instruction which actually save SVE regs.
+ if (EmitAsyncCFI)
+ emitCalleeSavedGPRLocations(AfterGPRSavesI);
+
+ // Alignment is required for the parent frame, not the funclet
+ const bool NeedsRealignment =
+ NumBytes && !IsFunclet && RegInfo.hasStackRealignment(MF);
+ const int64_t RealignmentPadding =
+ (NeedsRealignment && MFI.getMaxAlign() > Align(16))
+ ? MFI.getMaxAlign().value() - 16
+ : 0;
+
+ if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
+ emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
+
+ StackOffset SVEStackSize = AFL.getSVEStackSize(MF);
+ StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
+ MachineBasicBlock::iterator CalleeSavesEnd = AfterGPRSavesI;
+
+ StackOffset CFAOffset =
+ StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
+
+ // Process the SVE callee-saves to determine what space needs to be
+ // allocated.
+ MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
+ if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
+ LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize
+ << "\n");
+ SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
+ SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
+ // Find callee save instructions in frame.
+ // Note: With FPAfterSVECalleeSaves the callee saves have already been
+ // allocated.
+ if (!FPAfterSVECalleeSaves) {
+ MachineBasicBlock::iterator CalleeSavesBegin = AfterGPRSavesI;
+ assert(AFL.isSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
+ while (AFL.isSVECalleeSave(AfterSVESavesI) &&
+ AfterSVESavesI != MBB.getFirstTerminator())
+ ++AfterSVESavesI;
+ CalleeSavesEnd = AfterSVESavesI;
+
+ StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
+ // Allocate space for the callee saves (if any).
+ AFL.allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize,
+ false, nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects() || LocalsSize);
+ }
+ }
+ CFAOffset += SVECalleeSavesSize;
+
+ if (EmitAsyncCFI)
+ emitCalleeSavedSVELocations(CalleeSavesEnd);
+
+ // Allocate space for the rest of the frame including SVE locals. Align the
+ // stack as necessary.
+ assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
+ "Cannot use redzone with stack realignment");
+ if (!AFL.canUseRedZone(MF)) {
+ // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
+ // the correct value here, as NumBytes also includes padding bytes,
+ // which shouldn't be counted here.
+ AFL.allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding,
+ SVELocalsSize + StackOffset::getFixed(NumBytes),
+ NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
+ CFAOffset, MFI.hasVarSizedObjects());
+ }
+
+ // If we need a base pointer, set it up here. It's whatever the value of the
+ // stack pointer is at this point. Any variable size objects will be allocated
+ // after this, so we can still use the base pointer to reference locals.
+ //
+ // FIXME: Clarify FrameSetup flags here.
+ // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
+ // needed.
+ // For funclets the BP belongs to the containing function.
+ if (!IsFunclet && RegInfo.hasBasePointer(MF)) {
+ TII->copyPhysReg(MBB, AfterSVESavesI, DL, RegInfo.getBaseRegister(),
+ AArch64::SP, false);
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ }
+
+ // The very last FrameSetup instruction indicates the end of prologue. Emit a
+ // SEH opcode indicating the prologue end.
+ if (NeedsWinCFI && HasWinCFI) {
+ BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ // SEH funclets are passed the frame pointer in X1. If the parent
+ // function uses the base register, then the base register is used
+ // directly, and is not retrieved from X1.
+ if (IsFunclet && F.hasPersonalityFn()) {
+ EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
+ if (isAsynchronousEHPersonality(Per)) {
+ BuildMI(MBB, AfterSVESavesI, DL, TII->get(TargetOpcode::COPY),
+ AArch64::FP)
+ .addReg(AArch64::X1)
+ .setMIFlag(MachineInstr::FrameSetup);
+ MBB.addLiveIn(AArch64::X1);
+ }
+ }
+
+ if (EmitCFI && !EmitAsyncCFI) {
+ if (HasFP) {
+ emitDefineCFAWithFP(AfterSVESavesI, FixedObject);
+ } else {
+ StackOffset TotalSize =
+ SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
+ CFIInstBuilder CFIBuilder(MBB, AfterSVESavesI, MachineInstr::FrameSetup);
+ CFIBuilder.insertCFIInst(
+ createDefCFA(RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
+ TotalSize, /*LastAdjustmentWasScalable=*/false));
+ }
+ emitCalleeSavedGPRLocations(AfterSVESavesI);
+ emitCalleeSavedSVELocations(AfterSVESavesI);
+ }
+}
+
+void AArch64PrologueEmitter::emitShadowCallStackPrologue(
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL) const {
+ // Shadow call stack prolog: str x30, [x18], #8
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXpost))
+ .addReg(AArch64::X18, RegState::Define)
+ .addReg(AArch64::LR)
+ .addReg(AArch64::X18)
+ .addImm(8)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // This instruction also makes x18 live-in to the entry block.
+ MBB.addLiveIn(AArch64::X18);
+
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ if (EmitCFI) {
+ // Emit a CFI instruction that causes 8 to be subtracted from the value of
+ // x18 when unwinding past this frame.
+ static const char CFIInst[] = {
+ dwarf::DW_CFA_val_expression,
+ 18, // register
+ 2, // length
+ static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
+ static_cast<char>(-8) & 0x7f, // addend (sleb128)
+ };
+ CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
+ .buildEscape(StringRef(CFIInst, sizeof(CFIInst)));
+ }
+}
+
+void AArch64PrologueEmitter::emitSwiftAsyncContextFramePointer(
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL) const {
+ switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
+ case SwiftAsyncFramePointerMode::DeploymentBased:
+ if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
+ // The special symbol below is absolute and has a *value* that can be
+ // combined with the frame pointer to signal an extended frame.
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
+ .addExternalSymbol("swift_async_extendedFramePointerFlags",
+ AArch64II::MO_GOT);
+ if (NeedsWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlags(MachineInstr::FrameSetup);
+ HasWinCFI = true;
+ }
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
+ .addUse(AArch64::FP)
+ .addUse(AArch64::X16)
+ .addImm(Subtarget.isTargetILP32() ? 32 : 0);
+ if (NeedsWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlags(MachineInstr::FrameSetup);
+ HasWinCFI = true;
+ }
+ break;
+ }
+ [[fallthrough]];
+
+ case SwiftAsyncFramePointerMode::Always:
+ // ORR x29, x29, #0x1000_0000_0000_0000
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
+ .addUse(AArch64::FP)
+ .addImm(0x1100)
+ .setMIFlag(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlags(MachineInstr::FrameSetup);
+ HasWinCFI = true;
+ }
+ break;
+
+ case SwiftAsyncFramePointerMode::Never:
+ break;
+ }
+}
+
+void AArch64PrologueEmitter::emitEmptyStackFramePrologue(
+ int64_t NumBytes, MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL) const {
+ assert(!HasFP && "unexpected function without stack frame but with FP");
+ assert(!AFL.getSVEStackSize(MF) &&
+ "unexpected function without stack frame but with SVE objects");
+ // All of the stack allocation is for locals.
+ AFI->setLocalStackSize(NumBytes);
+ if (!NumBytes) {
+ if (NeedsWinCFI && HasWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ return;
+ }
+ // REDZONE: If the stack size is less than 128 bytes, we don't need
+ // to actually allocate.
+ if (AFL.canUseRedZone(MF)) {
+ AFI->setHasRedZone(true);
+ ++NumRedZoneFunctions;
+ } else {
+ emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(-NumBytes), TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+ if (EmitCFI) {
+ // Label used to tie together the PROLOG_LABEL and the MachineMoves.
+ MCSymbol *FrameLabel = MF.getContext().createTempSymbol();
+ // Encode the stack size of the leaf function.
+ CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
+ .buildDefCFAOffset(NumBytes, FrameLabel);
+ }
+ }
+
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+}
+
+void AArch64PrologueEmitter::emitFramePointerSetup(
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
+ unsigned FixedObject) {
+ int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
+ if (CombineSPBump)
+ FPOffset += AFI->getLocalStackSize();
+
+ if (AFI->hasSwiftAsyncContext()) {
+ // Before we update the live FP we have to ensure there's a valid (or
+ // null) asynchronous context in its slot just before FP in the frame
+ // record, so store it now.
+ const auto &Attrs = MF.getFunction().getAttributes();
+ bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
+ if (HaveInitialContext)
+ MBB.addLiveIn(AArch64::X22);
+ Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
+ .addUse(Reg)
+ .addUse(AArch64::SP)
+ .addImm(FPOffset - 8)
+ .setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
+ // to multiple instructions, should be mutually-exclusive.
+ assert(Subtarget.getTargetTriple().getArchName() != "arm64e");
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlags(MachineInstr::FrameSetup);
+ HasWinCFI = true;
+ }
+ }
+
+ if (HomPrologEpilog) {
+ auto Prolog = MBBI;
+ --Prolog;
+ assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
+ Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
+ } else {
+ // Issue sub fp, sp, FPOffset or
+ // mov fp,sp when FPOffset is zero.
+ // Note: All stores of callee-saved registers are marked as "FrameSetup".
+ // This code marks the instruction(s) that set the FP also.
+ emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
+ StackOffset::getFixed(FPOffset), TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+ if (NeedsWinCFI && HasWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+ // After setting up the FP, the rest of the prolog doesn't need to be
+ // included in the SEH unwind info.
+ NeedsWinCFI = false;
+ }
+ }
+ if (EmitAsyncCFI)
+ emitDefineCFAWithFP(MBBI, FixedObject);
+}
+
+// Define the current CFA rule to use the provided FP.
+void AArch64PrologueEmitter::emitDefineCFAWithFP(
+ MachineBasicBlock::iterator MBBI, unsigned FixedObject) const {
+ const AArch64RegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const int OffsetToFirstCalleeSaveFromFP =
+ AFI->getCalleeSaveBaseToFrameRecordOffset() -
+ AFI->getCalleeSavedStackSize();
+ Register FramePtr = TRI->getFrameRegister(MF);
+ CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
+ .buildDefCFA(FramePtr, FixedObject - OffsetToFirstCalleeSaveFromFP);
+}
+
+void AArch64PrologueEmitter::emitWindowsStackProbe(
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t &NumBytes,
+ int64_t RealignmentPadding) const {
+ if (AFI->getSVECalleeSavedStackSize())
+ report_fatal_error("SVE callee saves not yet supported with stack probing");
+
+ // Find an available register to spill the value of X15 to, if X15 is being
+ // used already for nest.
+ unsigned X15Scratch = AArch64::NoRegister;
+ const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
+ if (llvm::any_of(MBB.liveins(),
+ [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
+ return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
+ AArch64::X15, LiveIn.PhysReg);
+ })) {
+ X15Scratch = AFL.findScratchNonCalleeSaveRegister(&MBB, /*HasCall=*/true);
+ assert(X15Scratch != AArch64::NoRegister &&
+ (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17));
+#ifndef NDEBUG
+ LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it
+#endif
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), X15Scratch)
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::X15, RegState::Undef)
+ .addReg(AArch64::X15, RegState::Implicit)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
+ // exceed this amount. We need to move at most 2^24 - 1 into x15.
+ // This is at most two instructions, MOVZ followed by MOVK.
+ // TODO: Fix to use multiple stack alloc unwind codes for stacks
+ // exceeding 256MB in size.
+ if (NumBytes >= (1 << 28))
+ report_fatal_error("Stack size cannot exceed 256MB for stack "
+ "unwinding purposes");
+
+ uint32_t LowNumWords = NumWords & 0xFFFF;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
+ .addImm(LowNumWords)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
+ .setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ if ((NumWords & 0xFFFF0000) != 0) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
+ .addReg(AArch64::X15)
+ .addImm((NumWords & 0xFFFF0000) >> 16) // High half
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16))
+ .setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ } else {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
+ .addImm(NumWords)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+
+ const char *ChkStk = Subtarget.getChkStkName();
+ switch (MF.getTarget().getCodeModel()) {
+ case CodeModel::Tiny:
+ case CodeModel::Small:
+ case CodeModel::Medium:
+ case CodeModel::Kernel:
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
+ .addExternalSymbol(ChkStk)
+ .addReg(AArch64::X15, RegState::Implicit)
+ .addReg(AArch64::X16,
+ RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::X17,
+ RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::NZCV,
+ RegState::Implicit | RegState::Define | RegState::Dead)
+ .setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ break;
+ case CodeModel::Large:
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
+ .addReg(AArch64::X16, RegState::Define)
+ .addExternalSymbol(ChkStk)
+ .addExternalSymbol(ChkStk)
+ .setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ BuildMI(MBB, MBBI, DL, TII->get(getBLRCallOpcode(MF)))
+ .addReg(AArch64::X16, RegState::Kill)
+ .addReg(AArch64::X15, RegState::Implicit | RegState::Define)
+ .addReg(AArch64::X16,
+ RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::X17,
+ RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::NZCV,
+ RegState::Implicit | RegState::Define | RegState::Dead)
+ .setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ break;
+ }
+
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
+ .addReg(AArch64::SP, RegState::Kill)
+ .addReg(AArch64::X15, RegState::Kill)
+ .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
+ .setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
+ .addImm(NumBytes)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ NumBytes = 0;
+
+ if (RealignmentPadding > 0) {
+ if (RealignmentPadding >= 4096) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm))
+ .addReg(AArch64::X16, RegState::Define)
+ .addImm(RealignmentPadding)
+ .setMIFlags(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15)
+ .addReg(AArch64::SP)
+ .addReg(AArch64::X16, RegState::Kill)
+ .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
+ .setMIFlag(MachineInstr::FrameSetup);
+ } else {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
+ .addReg(AArch64::SP)
+ .addImm(RealignmentPadding)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
+ .addReg(AArch64::X15, RegState::Kill)
+ .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64));
+ AFI->setStackRealigned(true);
+
+ // No need for SEH instructions here; if we're realigning the stack,
+ // we've set a frame pointer and already finished the SEH prologue.
+ assert(!NeedsWinCFI);
+ }
+ if (X15Scratch != AArch64::NoRegister) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), AArch64::X15)
+ .addReg(AArch64::XZR)
+ .addReg(X15Scratch, RegState::Undef)
+ .addReg(X15Scratch, RegState::Implicit)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+}
+
+void AArch64PrologueEmitter::emitCalleeSavedGPRLocations(
+ MachineBasicBlock::iterator MBBI) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ if (CSI.empty())
+ return;
+
+ CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
+ for (const auto &Info : CSI) {
+ unsigned FrameIdx = Info.getFrameIdx();
+ if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector)
+ continue;
+
+ assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
+ int64_t Offset = MFI.getObjectOffset(FrameIdx) - AFL.getOffsetOfLocalArea();
+ CFIBuilder.buildOffset(Info.getReg(), Offset);
+ }
+}
+
+void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
+ MachineBasicBlock::iterator MBBI) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ if (CSI.empty())
+ return;
+
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+ AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
+ CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
+
+ std::optional<int64_t> IncomingVGOffsetFromDefCFA;
+ if (AFL.requiresSaveVG(MF)) {
+ auto IncomingVG = *find_if(
+ reverse(CSI), [](auto &Info) { return Info.getReg() == AArch64::VG; });
+ IncomingVGOffsetFromDefCFA = MFI.getObjectOffset(IncomingVG.getFrameIdx()) -
+ AFL.getOffsetOfLocalArea();
+ }
+
+ for (const auto &Info : CSI) {
+ if (MFI.getStackID(Info.getFrameIdx()) != TargetStackID::ScalableVector)
+ continue;
+
+ // Not all unwinders may know about SVE registers, so assume the lowest
+ // common denominator.
+ assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
+ MCRegister Reg = Info.getReg();
+ if (!static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
+ continue;
+
+ StackOffset Offset =
+ StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
+ StackOffset::getFixed(AFI.getCalleeSavedStackSize(MFI));
+
+ CFIBuilder.insertCFIInst(
+ createCFAOffset(TRI, Reg, Offset, IncomingVGOffsetFromDefCFA));
+ }
+}
+
+} // namespace llvm
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
new file mode 100644
index 0000000000000..94029ede60c76
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
@@ -0,0 +1,111 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the declaration of the AArch64PrologueEmitter class,
+/// which is is used to emit the prologue on AArch64.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64PROLOGUEEPILOGUE_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64PROLOGUEEPILOGUE_H
+
+#include "AArch64RegisterInfo.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+class AArch64Subtarget;
+class AArch64FunctionInfo;
+class AArch64FrameLowering;
+
+/// A helper class for emitting the prologue. Substantial new functionality
+/// should be factored into a new method. Where possible "emit*" methods should
+/// be const, and any flags that change how the prologue is emitted should be
+/// set in the constructor.
+class AArch64PrologueEmitter {
+public:
+ AArch64PrologueEmitter(MachineFunction &MF, MachineBasicBlock &MBB,
+ const AArch64FrameLowering &AFL);
+
+ /// Emit the prologue.
+ void emitPrologue();
+
+ ~AArch64PrologueEmitter() {
+ MF.setHasWinCFI(HasWinCFI);
+#ifndef NDEBUG
+ verifyPrologueClobbers();
+#endif
+ }
+
+private:
+ void emitShadowCallStackPrologue(MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL) const;
+
+ void emitSwiftAsyncContextFramePointer(MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL) const;
+
+ void emitEmptyStackFramePrologue(int64_t NumBytes,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL) const;
+
+ void emitFramePointerSetup(MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, unsigned FixedObject);
+
+ void emitDefineCFAWithFP(MachineBasicBlock::iterator MBBI,
+ unsigned FixedObject) const;
+
+ void emitWindowsStackProbe(MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, int64_t &NumBytes,
+ int64_t RealignmentPadding) const;
+
+ void emitCalleeSavedGPRLocations(MachineBasicBlock::iterator MBBI) const;
+ void emitCalleeSavedSVELocations(MachineBasicBlock::iterator MBBI) const;
+
+ void determineLocalsStackSize(uint64_t StackSize, uint64_t PrologueSaveSize);
+
+ MachineFunction &MF;
+ MachineBasicBlock &MBB;
+
+ const Function &F;
+ const MachineFrameInfo &MFI;
+ const AArch64Subtarget &Subtarget;
+ const AArch64FrameLowering &AFL;
+ const AArch64RegisterInfo &RegInfo;
+
+#ifndef NDEBUG
+ mutable LivePhysRegs LiveRegs{RegInfo};
+ MachineBasicBlock::iterator PrologueEndI;
+
+ void collectBlockLiveins();
+ void verifyPrologueClobbers() const;
+#endif
+
+ // Prologue flags. These generally should not change outside of the
+ // constructor. Two exceptions are "CombineSPBump" which is set in
+ // determineLocalsStackSize, and "NeedsWinCFI" which is set in
+ // emitFramePointerSetup.
+ bool EmitCFI = false;
+ bool EmitAsyncCFI = false;
+ bool HasFP = false;
+ bool IsFunclet = false;
+ bool CombineSPBump = false;
+ bool HomPrologEpilog = false;
+ bool NeedsWinCFI = false;
+
+ // Note: "HasWinCFI" is mutable as it can change in any "emit" function.
+ mutable bool HasWinCFI = false;
+
+ const TargetInstrInfo *TII = nullptr;
+ AArch64FunctionInfo *AFI = nullptr;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index 79b56ea9cf850..2bce75f75b964 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -92,6 +92,7 @@ add_llvm_target(AArch64CodeGen
SVEIntrinsicOpts.cpp
MachineSMEABIPass.cpp
AArch64SIMDInstrOpt.cpp
+ AArch64PrologueEpilogue.cpp
DEPENDS
intrinsics_gen
More information about the llvm-commits
mailing list