[llvm] 106eb46 - [AArch64] Break up `AArch64FrameLowering::emitPrologue` (NFCI) (#157485)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 10 06:23:19 PDT 2025
Author: Benjamin Maxwell
Date: 2025-09-10T14:23:14+01:00
New Revision: 106eb4623d0c50cd14a7bdf08d159eef6907d0d7
URL: https://github.com/llvm/llvm-project/commit/106eb4623d0c50cd14a7bdf08d159eef6907d0d7
DIFF: https://github.com/llvm/llvm-project/commit/106eb4623d0c50cd14a7bdf08d159eef6907d0d7.diff
LOG: [AArch64] Break up `AArch64FrameLowering::emitPrologue` (NFCI) (#157485)
`emitPrologue` was almost 1k SLOC, with a large portion not actually
related to emitting the vast majority of prologues.
This patch creates a new class `AArch64PrologueEmitter` for emitting the
prologue, which keeps common state/target classes as members. This makes
adding methods that handle niche cases easy, and allows methods to be
marked "const" when they don't redefine flags/state.
With this change, the core "emitPrologue" is around 275 LOC, with cases
like Windows stack probes or Swift frame pointers split into routines.
This makes following the logic much easier.
Added:
llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
Modified:
llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
llvm/lib/Target/AArch64/AArch64FrameLowering.h
llvm/lib/Target/AArch64/CMakeLists.txt
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 87a09b72933db..175b5e04d82ff 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -211,6 +211,7 @@
#include "AArch64FrameLowering.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
+#include "AArch64PrologueEpilogue.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
@@ -218,7 +219,6 @@
#include "Utils/AArch64SMEAttributes.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/CFIInstBuilder.h"
#include "llvm/CodeGen/LivePhysRegs.h"
@@ -293,8 +293,6 @@ static cl::opt<bool> DisableMultiVectorSpillFill(
cl::desc("Disable use of LD/ST pairs for SME2 or SVE2p1"), cl::init(false),
cl::Hidden);
-STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
-
/// Returns how much of the incoming argument stack area (in bytes) we should
/// clean up in an epilogue. For the C calling convention this will be 0, for
/// guaranteed tail call conventions it can be positive (a normal return or a
@@ -328,23 +326,20 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
return ArgumentPopSize;
}
-static bool produceCompactUnwindFrame(MachineFunction &MF);
-static bool needsWinCFI(const MachineFunction &MF);
-static StackOffset getSVEStackSize(const MachineFunction &MF);
-static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
- bool HasCall = false);
-static bool requiresSaveVG(const MachineFunction &MF);
+static bool produceCompactUnwindFrame(const AArch64FrameLowering &,
+ MachineFunction &MF);
// Conservatively, returns true if the function is likely to have an SVE vectors
// on the stack. This function is safe to be called before callee-saves or
// object offsets have been determined.
-static bool isLikelyToHaveSVEStack(const MachineFunction &MF) {
+static bool isLikelyToHaveSVEStack(const AArch64FrameLowering &AFL,
+ const MachineFunction &MF) {
auto *AFI = MF.getInfo<AArch64FunctionInfo>();
if (AFI->isSVECC())
return true;
if (AFI->hasCalculatedStackSizeSVE())
- return bool(getSVEStackSize(MF));
+ return bool(AFL.getSVEStackSize(MF));
const MachineFrameInfo &MFI = MF.getFrameInfo();
for (int FI = MFI.getObjectIndexBegin(); FI < MFI.getObjectIndexEnd(); FI++) {
@@ -372,7 +367,7 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
return false;
// TODO: SVE is not supported yet.
- if (isLikelyToHaveSVEStack(MF))
+ if (isLikelyToHaveSVEStack(*this, MF))
return false;
// Bail on stack adjustment needed on return for simplicity.
@@ -409,7 +404,7 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
/// Returns true if CSRs should be paired.
bool AArch64FrameLowering::producePairRegisters(MachineFunction &MF) const {
- return produceCompactUnwindFrame(MF) || homogeneousPrologEpilog(MF);
+ return produceCompactUnwindFrame(*this, MF) || homogeneousPrologEpilog(MF);
}
/// This is the biggest offset to the stack pointer we can encode in aarch64
@@ -451,11 +446,10 @@ AArch64FrameLowering::getStackIDForScalableVectors() const {
return TargetStackID::ScalableVector;
}
-/// Returns the size of the fixed object area (allocated next to sp on entry)
-/// On Win64 this may include a var args area and an UnwindHelp object for EH.
-static unsigned getFixedObjectSize(const MachineFunction &MF,
- const AArch64FunctionInfo *AFI, bool IsWin64,
- bool IsFunclet) {
+unsigned
+AArch64FrameLowering::getFixedObjectSize(const MachineFunction &MF,
+ const AArch64FunctionInfo *AFI,
+ bool IsWin64, bool IsFunclet) const {
assert(AFI->getTailCallReservedStack() % 16 == 0 &&
"Tail call reserved stack must be aligned to 16 bytes");
if (!IsWin64 || IsFunclet) {
@@ -494,7 +488,8 @@ static unsigned getFixedObjectSize(const MachineFunction &MF,
}
/// Returns the size of the entire SVE stackframe (calleesaves + spills).
-static StackOffset getSVEStackSize(const MachineFunction &MF) {
+StackOffset
+AArch64FrameLowering::getSVEStackSize(const MachineFunction &MF) const {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE());
}
@@ -683,70 +678,6 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
return MBB.erase(I);
}
-void AArch64FrameLowering::emitCalleeSavedGPRLocations(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = MF.getFrameInfo();
-
- const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
- if (CSI.empty())
- return;
-
- CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
- for (const auto &Info : CSI) {
- unsigned FrameIdx = Info.getFrameIdx();
- if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector)
- continue;
-
- assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
- int64_t Offset = MFI.getObjectOffset(FrameIdx) - getOffsetOfLocalArea();
- CFIBuilder.buildOffset(Info.getReg(), Offset);
- }
-}
-
-void AArch64FrameLowering::emitCalleeSavedSVELocations(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = MF.getFrameInfo();
-
- // Add callee saved registers to move list.
- const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
- if (CSI.empty())
- return;
-
- const TargetSubtargetInfo &STI = MF.getSubtarget();
- const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
- AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
- CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
-
- std::optional<int64_t> IncomingVGOffsetFromDefCFA;
- if (requiresSaveVG(MF)) {
- auto IncomingVG = *find_if(
- reverse(CSI), [](auto &Info) { return Info.getReg() == AArch64::VG; });
- IncomingVGOffsetFromDefCFA =
- MFI.getObjectOffset(IncomingVG.getFrameIdx()) - getOffsetOfLocalArea();
- }
-
- for (const auto &Info : CSI) {
- if (MFI.getStackID(Info.getFrameIdx()) != TargetStackID::ScalableVector)
- continue;
-
- // Not all unwinders may know about SVE registers, so assume the lowest
- // common denominator.
- assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
- MCRegister Reg = Info.getReg();
- if (!static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
- continue;
-
- StackOffset Offset =
- StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
- StackOffset::getFixed(AFI.getCalleeSavedStackSize(MFI));
-
- CFIBuilder.insertCFIInst(
- createCFAOffset(TRI, Reg, Offset, IncomingVGOffsetFromDefCFA));
- }
-}
-
void AArch64FrameLowering::resetCFIToInitialState(
MachineBasicBlock &MBB) const {
@@ -1088,8 +1019,8 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
}
}
-static bool windowsRequiresStackProbe(const MachineFunction &MF,
- uint64_t StackSizeInBytes) {
+bool AArch64FrameLowering::windowsRequiresStackProbe(
+ const MachineFunction &MF, uint64_t StackSizeInBytes) const {
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const AArch64FunctionInfo &MFI = *MF.getInfo<AArch64FunctionInfo>();
// TODO: When implementing stack protectors, take that into account
@@ -1108,19 +1039,9 @@ static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs,
LiveRegs.addReg(CSRegs[i]);
}
-// Find a scratch register that we can use at the start of the prologue to
-// re-align the stack pointer. We avoid using callee-save registers since they
-// may appear to be free when this is called from canUseAsPrologue (during
-// shrink wrapping), but then no longer be free when this is called from
-// emitPrologue.
-//
-// FIXME: This is a bit conservative, since in the above case we could use one
-// of the callee-save registers as a scratch temp to re-align the stack pointer,
-// but we would then have to make sure that we were in fact saving at least one
-// callee-save register in the prologue, which is additional complexity that
-// doesn't seem worth the benefit.
-static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
- bool HasCall) {
+Register
+AArch64FrameLowering::findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
+ bool HasCall) const {
MachineFunction *MF = MBB->getParent();
// If MBB is an entry block, use X9 as the scratch register
@@ -1193,13 +1114,14 @@ bool AArch64FrameLowering::canUseAsPrologue(
return true;
}
-static bool needsWinCFI(const MachineFunction &MF) {
+bool AArch64FrameLowering::needsWinCFI(const MachineFunction &MF) const {
const Function &F = MF.getFunction();
return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
F.needsUnwindTableEntry();
}
-static bool shouldSignReturnAddressEverywhere(const MachineFunction &MF) {
+bool AArch64FrameLowering::shouldSignReturnAddressEverywhere(
+ const MachineFunction &MF) const {
// FIXME: With WinCFI, extra care should be taken to place SEH_PACSignLR
// and SEH_EpilogEnd instructions in the correct order.
if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
@@ -1475,13 +1397,13 @@ static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
}
-bool requiresGetVGCall(MachineFunction &MF) {
- AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+bool AArch64FrameLowering::requiresGetVGCall(const MachineFunction &MF) const {
+ auto *AFI = MF.getInfo<AArch64FunctionInfo>();
return AFI->hasStreamingModeChanges() &&
!MF.getSubtarget<AArch64Subtarget>().hasSVE();
}
-static bool requiresSaveVG(const MachineFunction &MF) {
+bool AArch64FrameLowering::requiresSaveVG(const MachineFunction &MF) const {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
if (!AFI->needsDwarfUnwindInfo(MF) || !AFI->hasStreamingModeChanges())
return false;
@@ -1499,8 +1421,8 @@ static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO,
StringRef(TLI.getLibcallName(LC)) == MO.getSymbolName();
}
-bool isVGInstruction(MachineBasicBlock::iterator MBBI,
- const TargetLowering &TLI) {
+bool AArch64FrameLowering::isVGInstruction(MachineBasicBlock::iterator MBBI,
+ const TargetLowering &TLI) const {
unsigned Opc = MBBI->getOpcode();
if (Opc == AArch64::CNTD_XPiI)
return true;
@@ -1514,15 +1436,12 @@ bool isVGInstruction(MachineBasicBlock::iterator MBBI,
return Opc == TargetOpcode::COPY;
}
-// Convert callee-save register save/restore instruction to do stack pointer
-// decrement/increment to allocate/deallocate the callee-save stack area by
-// converting store/load to use pre/post increment version.
-static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
+MachineBasicBlock::iterator
+AArch64FrameLowering::convertCalleeSaveRestoreToSPPrePostIncDec(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
- MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup,
- int CFAOffset = 0) {
+ MachineInstr::MIFlag FrameFlag, int CFAOffset) const {
unsigned NewOpc;
// If the function contains streaming mode changes, we expect instructions
@@ -1643,12 +1562,9 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
return std::prev(MBB.erase(MBBI));
}
-// Fixup callee-save register save/restore instructions to take into account
-// combined SP bump by adding the local stack size to the stack offsets.
-static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
- uint64_t LocalStackSize,
- bool NeedsWinCFI,
- bool *HasWinCFI) {
+void AArch64FrameLowering::fixupCalleeSaveRestoreStackOffset(
+ MachineInstr &MI, uint64_t LocalStackSize, bool NeedsWinCFI,
+ bool *HasWinCFI) const {
if (AArch64InstrInfo::isSEHInstruction(MI))
return;
@@ -1703,7 +1619,8 @@ static unsigned getStackHazardSize(const MachineFunction &MF) {
}
// Convenience function to determine whether I is an SVE callee save.
-static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
+bool AArch64FrameLowering::isSVECalleeSave(
+ MachineBasicBlock::iterator I) const {
switch (I->getOpcode()) {
default:
return false;
@@ -1725,42 +1642,6 @@ static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
}
}
-static void emitShadowCallStackPrologue(const TargetInstrInfo &TII,
- MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, bool NeedsWinCFI,
- bool NeedsUnwindInfo) {
- // Shadow call stack prolog: str x30, [x18], #8
- BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXpost))
- .addReg(AArch64::X18, RegState::Define)
- .addReg(AArch64::LR)
- .addReg(AArch64::X18)
- .addImm(8)
- .setMIFlag(MachineInstr::FrameSetup);
-
- // This instruction also makes x18 live-in to the entry block.
- MBB.addLiveIn(AArch64::X18);
-
- if (NeedsWinCFI)
- BuildMI(MBB, MBBI, DL, TII.get(AArch64::SEH_Nop))
- .setMIFlag(MachineInstr::FrameSetup);
-
- if (NeedsUnwindInfo) {
- // Emit a CFI instruction that causes 8 to be subtracted from the value of
- // x18 when unwinding past this frame.
- static const char CFIInst[] = {
- dwarf::DW_CFA_val_expression,
- 18, // register
- 2, // length
- static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
- static_cast<char>(-8) & 0x7f, // addend (sleb128)
- };
- CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
- .buildEscape(StringRef(CFIInst, sizeof(CFIInst)));
- }
-}
-
static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII,
MachineFunction &MF,
MachineBasicBlock &MBB,
@@ -1783,36 +1664,6 @@ static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII,
.buildRestore(AArch64::X18);
}
-// Define the current CFA rule to use the provided FP.
-static void emitDefineCFAWithFP(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned FixedObject) {
- const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
- const AArch64RegisterInfo *TRI = STI.getRegisterInfo();
- AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
-
- const int OffsetToFirstCalleeSaveFromFP =
- AFI->getCalleeSaveBaseToFrameRecordOffset() -
- AFI->getCalleeSavedStackSize();
- Register FramePtr = TRI->getFrameRegister(MF);
- CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
- .buildDefCFA(FramePtr, FixedObject - OffsetToFirstCalleeSaveFromFP);
-}
-
-#ifndef NDEBUG
-/// Collect live registers from the end of \p MI's parent up to (including) \p
-/// MI in \p LiveRegs.
-static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI,
- LivePhysRegs &LiveRegs) {
-
- MachineBasicBlock &MBB = *MI.getParent();
- LiveRegs.addLiveOuts(MBB);
- for (const MachineInstr &MI :
- reverse(make_range(MI.getIterator(), MBB.instr_end())))
- LiveRegs.stepBackward(MI);
-}
-#endif
-
void AArch64FrameLowering::emitPacRetPlusLeafHardening(
MachineFunction &MF) const {
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
@@ -1848,616 +1699,8 @@ void AArch64FrameLowering::emitPacRetPlusLeafHardening(
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.begin();
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- const Function &F = MF.getFunction();
- const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
- const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
- const TargetInstrInfo *TII = Subtarget.getInstrInfo();
-
- AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- bool EmitCFI = AFI->needsDwarfUnwindInfo(MF);
- bool EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
- bool HasFP = hasFP(MF);
- bool NeedsWinCFI = needsWinCFI(MF);
- bool HasWinCFI = false;
- auto Cleanup = make_scope_exit([&]() { MF.setHasWinCFI(HasWinCFI); });
-
- MachineBasicBlock::iterator End = MBB.end();
-#ifndef NDEBUG
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- // Collect live register from the end of MBB up to the start of the existing
- // frame setup instructions.
- MachineBasicBlock::iterator NonFrameStart = MBB.begin();
- while (NonFrameStart != End &&
- NonFrameStart->getFlag(MachineInstr::FrameSetup))
- ++NonFrameStart;
-
- LivePhysRegs LiveRegs(*TRI);
- if (NonFrameStart != MBB.end()) {
- getLivePhysRegsUpTo(*NonFrameStart, *TRI, LiveRegs);
- // Ignore registers used for stack management for now.
- LiveRegs.removeReg(AArch64::SP);
- LiveRegs.removeReg(AArch64::X19);
- LiveRegs.removeReg(AArch64::FP);
- LiveRegs.removeReg(AArch64::LR);
-
- // X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
- // This is necessary to spill VG if required where SVE is unavailable, but
- // X0 is preserved around this call.
- if (requiresGetVGCall(MF))
- LiveRegs.removeReg(AArch64::X0);
- }
-
- auto VerifyClobberOnExit = make_scope_exit([&]() {
- if (NonFrameStart == MBB.end())
- return;
- // Check if any of the newly instructions clobber any of the live registers.
- for (MachineInstr &MI :
- make_range(MBB.instr_begin(), NonFrameStart->getIterator())) {
- for (auto &Op : MI.operands())
- if (Op.isReg() && Op.isDef())
- assert(!LiveRegs.contains(Op.getReg()) &&
- "live register clobbered by inserted prologue instructions");
- }
- });
-#endif
-
- bool IsFunclet = MBB.isEHFuncletEntry();
-
- // At this point, we're going to decide whether or not the function uses a
- // redzone. In most cases, the function doesn't have a redzone so let's
- // assume that's false and set it to true in the case that there's a redzone.
- AFI->setHasRedZone(false);
-
- // Debug location must be unknown since the first debug location is used
- // to determine the end of the prologue.
- DebugLoc DL;
-
- const auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
- if (MFnI.shouldSignReturnAddress(MF)) {
- // If pac-ret+leaf is in effect, PAUTH_PROLOGUE pseudo instructions
- // are inserted by emitPacRetPlusLeafHardening().
- if (!shouldSignReturnAddressEverywhere(MF)) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::PAUTH_PROLOGUE))
- .setMIFlag(MachineInstr::FrameSetup);
- }
- // AArch64PointerAuth pass will insert SEH_PACSignLR
- HasWinCFI |= NeedsWinCFI;
- }
-
- if (MFnI.needsShadowCallStackPrologueEpilogue(MF)) {
- emitShadowCallStackPrologue(*TII, MF, MBB, MBBI, DL, NeedsWinCFI,
- MFnI.needsDwarfUnwindInfo(MF));
- HasWinCFI |= NeedsWinCFI;
- }
-
- if (EmitCFI && MFnI.isMTETagged()) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITMTETAGGED))
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- // We signal the presence of a Swift extended frame to external tools by
- // storing FP with 0b0001 in bits 63:60. In normal userland operation a simple
- // ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
- // bits so that is still true.
- if (HasFP && AFI->hasSwiftAsyncContext()) {
- switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
- case SwiftAsyncFramePointerMode::DeploymentBased:
- if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
- // The special symbol below is absolute and has a *value* that can be
- // combined with the frame pointer to signal an extended frame.
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
- .addExternalSymbol("swift_async_extendedFramePointerFlags",
- AArch64II::MO_GOT);
- if (NeedsWinCFI) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
- .setMIFlags(MachineInstr::FrameSetup);
- HasWinCFI = true;
- }
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
- .addUse(AArch64::FP)
- .addUse(AArch64::X16)
- .addImm(Subtarget.isTargetILP32() ? 32 : 0);
- if (NeedsWinCFI) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
- .setMIFlags(MachineInstr::FrameSetup);
- HasWinCFI = true;
- }
- break;
- }
- [[fallthrough]];
-
- case SwiftAsyncFramePointerMode::Always:
- // ORR x29, x29, #0x1000_0000_0000_0000
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
- .addUse(AArch64::FP)
- .addImm(0x1100)
- .setMIFlag(MachineInstr::FrameSetup);
- if (NeedsWinCFI) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
- .setMIFlags(MachineInstr::FrameSetup);
- HasWinCFI = true;
- }
- break;
-
- case SwiftAsyncFramePointerMode::Never:
- break;
- }
- }
-
- // All calls are tail calls in GHC calling conv, and functions have no
- // prologue/epilogue.
- if (MF.getFunction().getCallingConv() == CallingConv::GHC)
- return;
-
- // Set tagged base pointer to the requested stack slot.
- // Ideally it should match SP value after prologue.
- std::optional<int> TBPI = AFI->getTaggedBasePointerIndex();
- if (TBPI)
- AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
- else
- AFI->setTaggedBasePointerOffset(MFI.getStackSize());
-
- const StackOffset &SVEStackSize = getSVEStackSize(MF);
-
- // getStackSize() includes all the locals in its size calculation. We don't
- // include these locals when computing the stack size of a funclet, as they
- // are allocated in the parent's stack frame and accessed via the frame
- // pointer from the funclet. We only save the callee saved registers in the
- // funclet, which are really the callee saved registers of the parent
- // function, including the funclet.
- int64_t NumBytes =
- IsFunclet ? getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
- if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
- assert(!HasFP && "unexpected function without stack frame but with FP");
- assert(!SVEStackSize &&
- "unexpected function without stack frame but with SVE objects");
- // All of the stack allocation is for locals.
- AFI->setLocalStackSize(NumBytes);
- if (!NumBytes) {
- if (NeedsWinCFI && HasWinCFI) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
- .setMIFlag(MachineInstr::FrameSetup);
- }
- return;
- }
- // REDZONE: If the stack size is less than 128 bytes, we don't need
- // to actually allocate.
- if (canUseRedZone(MF)) {
- AFI->setHasRedZone(true);
- ++NumRedZoneFunctions;
- } else {
- emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(-NumBytes), TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
- if (EmitCFI) {
- // Label used to tie together the PROLOG_LABEL and the MachineMoves.
- MCSymbol *FrameLabel = MF.getContext().createTempSymbol();
- // Encode the stack size of the leaf function.
- CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
- .buildDefCFAOffset(NumBytes, FrameLabel);
- }
- }
-
- if (NeedsWinCFI) {
- HasWinCFI = true;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- return;
- }
-
- bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
- unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
-
- // Windows unwind can't represent the required stack adjustments if we have
- // both SVE callee-saves and dynamic stack allocations, and the frame
- // pointer is before the SVE spills. The allocation of the frame pointer
- // must be the last instruction in the prologue so the unwinder can restore
- // the stack pointer correctly. (And there isn't any unwind opcode for
- // `addvl sp, x29, -17`.)
- //
- // Because of this, we do spills in the opposite order on Windows: first SVE,
- // then GPRs. The main side-effect of this is that it makes accessing
- // parameters passed on the stack more expensive.
- //
- // We could consider rearranging the spills for simpler cases.
- bool FPAfterSVECalleeSaves =
- Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();
-
- if (FPAfterSVECalleeSaves && AFI->hasStackHazardSlotIndex())
- reportFatalUsageError("SME hazard padding is not supported on Windows");
-
- auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
- // All of the remaining stack allocations are for locals.
- AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
- bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
- bool HomPrologEpilog = homogeneousPrologEpilog(MF);
- if (FPAfterSVECalleeSaves) {
- // If we're doing SVE saves first, we need to immediately allocate space
- // for fixed objects, then space for the SVE callee saves.
- //
- // Windows unwind requires that the scalable size is a multiple of 16;
- // that's handled when the callee-saved size is computed.
- auto SaveSize =
- StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()) +
- StackOffset::getFixed(FixedObject);
- allocateStackSpace(MBB, MBBI, 0, SaveSize, NeedsWinCFI, &HasWinCFI,
- /*EmitCFI=*/false, StackOffset{},
- /*FollowupAllocs=*/true);
- NumBytes -= FixedObject;
-
- // Now allocate space for the GPR callee saves.
- while (MBBI != End && IsSVECalleeSave(MBBI))
- ++MBBI;
- MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
- MBB, MBBI, DL, TII, -AFI->getCalleeSavedStackSize(), NeedsWinCFI,
- &HasWinCFI, EmitAsyncCFI);
- NumBytes -= AFI->getCalleeSavedStackSize();
- } else if (CombineSPBump) {
- assert(!SVEStackSize && "Cannot combine SP bump with SVE");
- emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(-NumBytes), TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
- EmitAsyncCFI);
- NumBytes = 0;
- } else if (HomPrologEpilog) {
- // Stack has been already adjusted.
- NumBytes -= PrologueSaveSize;
- } else if (PrologueSaveSize != 0) {
- MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
- MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI,
- EmitAsyncCFI);
- NumBytes -= PrologueSaveSize;
- }
- assert(NumBytes >= 0 && "Negative stack allocation size!?");
-
- // Move past the saves of the callee-saved registers, fixing up the offsets
- // and pre-inc if we decided to combine the callee-save and local stack
- // pointer bump above.
- auto &TLI = *MF.getSubtarget().getTargetLowering();
- while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
- !IsSVECalleeSave(MBBI)) {
- if (CombineSPBump &&
- // Only fix-up frame-setup load/store instructions.
- (!requiresSaveVG(MF) || !isVGInstruction(MBBI, TLI)))
- fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
- NeedsWinCFI, &HasWinCFI);
- ++MBBI;
- }
-
- // For funclets the FP belongs to the containing function.
- if (!IsFunclet && HasFP) {
- // Only set up FP if we actually need to.
- int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
-
- if (CombineSPBump)
- FPOffset += AFI->getLocalStackSize();
-
- if (AFI->hasSwiftAsyncContext()) {
- // Before we update the live FP we have to ensure there's a valid (or
- // null) asynchronous context in its slot just before FP in the frame
- // record, so store it now.
- const auto &Attrs = MF.getFunction().getAttributes();
- bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
- if (HaveInitialContext)
- MBB.addLiveIn(AArch64::X22);
- Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
- .addUse(Reg)
- .addUse(AArch64::SP)
- .addImm(FPOffset - 8)
- .setMIFlags(MachineInstr::FrameSetup);
- if (NeedsWinCFI) {
- // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
- // to multiple instructions, should be mutually-exclusive.
- assert(Subtarget.getTargetTriple().getArchName() != "arm64e");
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
- .setMIFlags(MachineInstr::FrameSetup);
- HasWinCFI = true;
- }
- }
-
- if (HomPrologEpilog) {
- auto Prolog = MBBI;
- --Prolog;
- assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
- Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
- } else {
- // Issue sub fp, sp, FPOffset or
- // mov fp,sp when FPOffset is zero.
- // Note: All stores of callee-saved registers are marked as "FrameSetup".
- // This code marks the instruction(s) that set the FP also.
- emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
- StackOffset::getFixed(FPOffset), TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
- if (NeedsWinCFI && HasWinCFI) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
- .setMIFlag(MachineInstr::FrameSetup);
- // After setting up the FP, the rest of the prolog doesn't need to be
- // included in the SEH unwind info.
- NeedsWinCFI = false;
- }
- }
- if (EmitAsyncCFI)
- emitDefineCFAWithFP(MF, MBB, MBBI, FixedObject);
- }
-
- // Now emit the moves for whatever callee saved regs we have (including FP,
- // LR if those are saved). Frame instructions for SVE register are emitted
- // later, after the instruction which actually save SVE regs.
- if (EmitAsyncCFI)
- emitCalleeSavedGPRLocations(MBB, MBBI);
-
- // Alignment is required for the parent frame, not the funclet
- const bool NeedsRealignment =
- NumBytes && !IsFunclet && RegInfo->hasStackRealignment(MF);
- const int64_t RealignmentPadding =
- (NeedsRealignment && MFI.getMaxAlign() > Align(16))
- ? MFI.getMaxAlign().value() - 16
- : 0;
-
- if (windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding)) {
- if (AFI->getSVECalleeSavedStackSize())
- report_fatal_error(
- "SVE callee saves not yet supported with stack probing");
-
- // Find an available register to spill the value of X15 to, if X15 is being
- // used already for nest.
- unsigned X15Scratch = AArch64::NoRegister;
- const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
- if (llvm::any_of(MBB.liveins(),
- [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
- return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
- AArch64::X15, LiveIn.PhysReg);
- })) {
- X15Scratch = findScratchNonCalleeSaveRegister(&MBB, true);
- assert(X15Scratch != AArch64::NoRegister &&
- (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17));
-#ifndef NDEBUG
- LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it
-#endif
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), X15Scratch)
- .addReg(AArch64::XZR)
- .addReg(AArch64::X15, RegState::Undef)
- .addReg(AArch64::X15, RegState::Implicit)
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
- if (NeedsWinCFI) {
- HasWinCFI = true;
- // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
- // exceed this amount. We need to move at most 2^24 - 1 into x15.
- // This is at most two instructions, MOVZ followed by MOVK.
- // TODO: Fix to use multiple stack alloc unwind codes for stacks
- // exceeding 256MB in size.
- if (NumBytes >= (1 << 28))
- report_fatal_error("Stack size cannot exceed 256MB for stack "
- "unwinding purposes");
-
- uint32_t LowNumWords = NumWords & 0xFFFF;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
- .addImm(LowNumWords)
- .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
- .setMIFlag(MachineInstr::FrameSetup);
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
- .setMIFlag(MachineInstr::FrameSetup);
- if ((NumWords & 0xFFFF0000) != 0) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
- .addReg(AArch64::X15)
- .addImm((NumWords & 0xFFFF0000) >> 16) // High half
- .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16))
- .setMIFlag(MachineInstr::FrameSetup);
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
- .setMIFlag(MachineInstr::FrameSetup);
- }
- } else {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
- .addImm(NumWords)
- .setMIFlags(MachineInstr::FrameSetup);
- }
-
- const char *ChkStk = Subtarget.getChkStkName();
- switch (MF.getTarget().getCodeModel()) {
- case CodeModel::Tiny:
- case CodeModel::Small:
- case CodeModel::Medium:
- case CodeModel::Kernel:
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
- .addExternalSymbol(ChkStk)
- .addReg(AArch64::X15, RegState::Implicit)
- .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
- .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
- .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
- .setMIFlags(MachineInstr::FrameSetup);
- if (NeedsWinCFI) {
- HasWinCFI = true;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
- .setMIFlag(MachineInstr::FrameSetup);
- }
- break;
- case CodeModel::Large:
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
- .addReg(AArch64::X16, RegState::Define)
- .addExternalSymbol(ChkStk)
- .addExternalSymbol(ChkStk)
- .setMIFlags(MachineInstr::FrameSetup);
- if (NeedsWinCFI) {
- HasWinCFI = true;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- BuildMI(MBB, MBBI, DL, TII->get(getBLRCallOpcode(MF)))
- .addReg(AArch64::X16, RegState::Kill)
- .addReg(AArch64::X15, RegState::Implicit | RegState::Define)
- .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
- .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
- .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
- .setMIFlags(MachineInstr::FrameSetup);
- if (NeedsWinCFI) {
- HasWinCFI = true;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
- .setMIFlag(MachineInstr::FrameSetup);
- }
- break;
- }
-
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
- .addReg(AArch64::SP, RegState::Kill)
- .addReg(AArch64::X15, RegState::Kill)
- .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
- .setMIFlags(MachineInstr::FrameSetup);
- if (NeedsWinCFI) {
- HasWinCFI = true;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
- .addImm(NumBytes)
- .setMIFlag(MachineInstr::FrameSetup);
- }
- NumBytes = 0;
-
- if (RealignmentPadding > 0) {
- if (RealignmentPadding >= 4096) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm))
- .addReg(AArch64::X16, RegState::Define)
- .addImm(RealignmentPadding)
- .setMIFlags(MachineInstr::FrameSetup);
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15)
- .addReg(AArch64::SP)
- .addReg(AArch64::X16, RegState::Kill)
- .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
- .setMIFlag(MachineInstr::FrameSetup);
- } else {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
- .addReg(AArch64::SP)
- .addImm(RealignmentPadding)
- .addImm(0)
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
- .addReg(AArch64::X15, RegState::Kill)
- .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64));
- AFI->setStackRealigned(true);
-
- // No need for SEH instructions here; if we're realigning the stack,
- // we've set a frame pointer and already finished the SEH prologue.
- assert(!NeedsWinCFI);
- }
- if (X15Scratch != AArch64::NoRegister) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), AArch64::X15)
- .addReg(AArch64::XZR)
- .addReg(X15Scratch, RegState::Undef)
- .addReg(X15Scratch, RegState::Implicit)
- .setMIFlag(MachineInstr::FrameSetup);
- }
- }
-
- StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
- MachineBasicBlock::iterator CalleeSavesEnd = MBBI;
-
- StackOffset CFAOffset =
- StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
-
- // Process the SVE callee-saves to determine what space needs to be
- // allocated.
- if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
- LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize
- << "\n");
- SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
- SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
- // Find callee save instructions in frame.
- // Note: With FPAfterSVECalleeSaves the callee saves have already been
- // allocated.
- if (!FPAfterSVECalleeSaves) {
- MachineBasicBlock::iterator CalleeSavesBegin = MBBI;
- assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
- while (IsSVECalleeSave(MBBI) && MBBI != MBB.getFirstTerminator())
- ++MBBI;
- CalleeSavesEnd = MBBI;
-
- StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
- // Allocate space for the callee saves (if any).
- allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false,
- nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
- MFI.hasVarSizedObjects() || LocalsSize);
- }
- }
- CFAOffset += SVECalleeSavesSize;
-
- if (EmitAsyncCFI)
- emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);
-
- // Allocate space for the rest of the frame including SVE locals. Align the
- // stack as necessary.
- assert(!(canUseRedZone(MF) && NeedsRealignment) &&
- "Cannot use redzone with stack realignment");
- if (!canUseRedZone(MF)) {
- // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
- // the correct value here, as NumBytes also includes padding bytes,
- // which shouldn't be counted here.
- allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding,
- SVELocalsSize + StackOffset::getFixed(NumBytes),
- NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
- CFAOffset, MFI.hasVarSizedObjects());
- }
-
- // If we need a base pointer, set it up here. It's whatever the value of the
- // stack pointer is at this point. Any variable size objects will be allocated
- // after this, so we can still use the base pointer to reference locals.
- //
- // FIXME: Clarify FrameSetup flags here.
- // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
- // needed.
- // For funclets the BP belongs to the containing function.
- if (!IsFunclet && RegInfo->hasBasePointer(MF)) {
- TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
- false);
- if (NeedsWinCFI) {
- HasWinCFI = true;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
- .setMIFlag(MachineInstr::FrameSetup);
- }
- }
-
- // The very last FrameSetup instruction indicates the end of prologue. Emit a
- // SEH opcode indicating the prologue end.
- if (NeedsWinCFI && HasWinCFI) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- // SEH funclets are passed the frame pointer in X1. If the parent
- // function uses the base register, then the base register is used
- // directly, and is not retrieved from X1.
- if (IsFunclet && F.hasPersonalityFn()) {
- EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
- if (isAsynchronousEHPersonality(Per)) {
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP)
- .addReg(AArch64::X1)
- .setMIFlag(MachineInstr::FrameSetup);
- MBB.addLiveIn(AArch64::X1);
- }
- }
-
- if (EmitCFI && !EmitAsyncCFI) {
- if (HasFP) {
- emitDefineCFAWithFP(MF, MBB, MBBI, FixedObject);
- } else {
- StackOffset TotalSize =
- SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
- CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
- CFIBuilder.insertCFIInst(
- createDefCFA(*RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
- TotalSize, /*LastAdjustmentWasScalable=*/false));
- }
- emitCalleeSavedGPRLocations(MBB, MBBI);
- emitCalleeSavedSVELocations(MBB, MBBI);
- }
+ AArch64PrologueEmitter PrologueEmitter(MF, MBB, *this);
+ PrologueEmitter.emitPrologue();
}
static bool isFuncletReturnInstr(const MachineInstr &MI) {
@@ -2607,7 +1850,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
while (FirstGPRRestoreI != Begin) {
--FirstGPRRestoreI;
if (!FirstGPRRestoreI->getFlag(MachineInstr::FrameDestroy) ||
- (!FPAfterSVECalleeSaves && IsSVECalleeSave(FirstGPRRestoreI))) {
+ (!FPAfterSVECalleeSaves && isSVECalleeSave(FirstGPRRestoreI))) {
++FirstGPRRestoreI;
break;
} else if (CombineSPBump)
@@ -2689,11 +1932,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
RestoreBegin = std::prev(RestoreEnd);
while (RestoreBegin != MBB.begin() &&
- IsSVECalleeSave(std::prev(RestoreBegin)))
+ isSVECalleeSave(std::prev(RestoreBegin)))
--RestoreBegin;
- assert(IsSVECalleeSave(RestoreBegin) &&
- IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
+ assert(isSVECalleeSave(RestoreBegin) &&
+ isSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
StackOffset CalleeSavedSizeAsOffset =
StackOffset::getScalable(CalleeSavedSize);
@@ -2927,8 +2170,8 @@ AArch64FrameLowering::getNonLocalFrameIndexReference(const MachineFunction &MF,
return StackOffset::getFixed(getSEHFrameIndexOffset(MF, FI));
}
-static StackOffset getFPOffset(const MachineFunction &MF,
- int64_t ObjectOffset) {
+StackOffset AArch64FrameLowering::getFPOffset(const MachineFunction &MF,
+ int64_t ObjectOffset) const {
const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const Function &F = MF.getFunction();
@@ -2941,8 +2184,8 @@ static StackOffset getFPOffset(const MachineFunction &MF,
return StackOffset::getFixed(ObjectOffset + FixedObject + FPAdjust);
}
-static StackOffset getStackOffset(const MachineFunction &MF,
- int64_t ObjectOffset) {
+StackOffset AArch64FrameLowering::getStackOffset(const MachineFunction &MF,
+ int64_t ObjectOffset) const {
const auto &MFI = MF.getFrameInfo();
return StackOffset::getFixed(ObjectOffset + (int64_t)MFI.getStackSize());
}
@@ -3140,7 +2383,8 @@ static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
return getKillRegState(!IsLiveIn);
}
-static bool produceCompactUnwindFrame(MachineFunction &MF) {
+static bool produceCompactUnwindFrame(const AArch64FrameLowering &AFL,
+ MachineFunction &MF) {
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
AttributeList Attrs = MF.getFunction().getAttributes();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
@@ -3148,7 +2392,7 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) {
!(Subtarget.getTargetLowering()->supportSwiftError() &&
Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&
MF.getFunction().getCallingConv() != CallingConv::SwiftTail &&
- !requiresSaveVG(MF) && !AFI->isSVECC();
+ !AFL.requiresSaveVG(MF) && !AFI->isSVECC();
}
static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
@@ -3245,16 +2489,18 @@ bool enableMultiVectorSpillFill(const AArch64Subtarget &Subtarget,
(!IsLocallyStreaming && Subtarget.isStreaming()));
}
-static void computeCalleeSaveRegisterPairs(
- MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI,
- const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
- bool NeedsFrameRecord) {
+void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
+ MachineFunction &MF,
+ ArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI,
+ SmallVectorImpl<RegPairInfo> &RegPairs,
+ bool NeedsFrameRecord) {
if (CSI.empty())
return;
bool IsWindows = isTargetWindows(MF);
- bool NeedsWinCFI = needsWinCFI(MF);
+ bool NeedsWinCFI = AFL.needsWinCFI(MF);
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
unsigned StackHazardSize = getStackHazardSize(MF);
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -3263,9 +2509,10 @@ static void computeCalleeSaveRegisterPairs(
(void)CC;
// MachO's compact unwind format relies on all registers being stored in
// pairs.
- assert((!produceCompactUnwindFrame(MF) || CC == CallingConv::PreserveMost ||
- CC == CallingConv::PreserveAll || CC == CallingConv::CXX_FAST_TLS ||
- CC == CallingConv::Win64 || (Count & 1) == 0) &&
+ assert((!produceCompactUnwindFrame(AFL, MF) ||
+ CC == CallingConv::PreserveMost || CC == CallingConv::PreserveAll ||
+ CC == CallingConv::CXX_FAST_TLS || CC == CallingConv::Win64 ||
+ (Count & 1) == 0) &&
"Odd number of callee-saved regs to spill!");
int ByteOffset = AFI->getCalleeSavedStackSize();
int StackFillDir = -1;
@@ -3381,9 +2628,9 @@ static void computeCalleeSaveRegisterPairs(
// MachO's compact unwind format relies on all registers being stored in
// adjacent register pairs.
- assert((!produceCompactUnwindFrame(MF) || CC == CallingConv::PreserveMost ||
- CC == CallingConv::PreserveAll || CC == CallingConv::CXX_FAST_TLS ||
- CC == CallingConv::Win64 ||
+ assert((!produceCompactUnwindFrame(AFL, MF) ||
+ CC == CallingConv::PreserveMost || CC == CallingConv::PreserveAll ||
+ CC == CallingConv::CXX_FAST_TLS || CC == CallingConv::Win64 ||
(RPI.isPaired() &&
((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
RPI.Reg1 + 1 == RPI.Reg2))) &&
@@ -3496,7 +2743,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
DebugLoc DL;
SmallVector<RegPairInfo, 8> RegPairs;
- computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF));
+ computeCalleeSaveRegisterPairs(*this, MF, CSI, TRI, RegPairs, hasFP(MF));
MachineRegisterInfo &MRI = MF.getRegInfo();
// Refresh the reserved regs in case there are any potential changes since the
@@ -3708,7 +2955,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
if (MBBI != MBB.end())
DL = MBBI->getDebugLoc();
- computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF));
+ computeCalleeSaveRegisterPairs(*this, MF, CSI, TRI, RegPairs, hasFP(MF));
if (homogeneousPrologEpilog(MF, &MBB)) {
auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog))
.setMIFlag(MachineInstr::FrameDestroy);
@@ -4142,7 +3389,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
if (producePairRegisters(MF)) {
if (UnspilledCSGPRPaired == AArch64::NoRegister) {
// Failed to make a pair for compact unwind format, revert spilling.
- if (produceCompactUnwindFrame(MF)) {
+ if (produceCompactUnwindFrame(*this, MF)) {
SavedRegs.reset(UnspilledCSGPR);
ExtraCSSpill = AArch64::NoRegister;
}
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 555a93359c274..a9d65441a4e30 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -19,6 +19,10 @@
namespace llvm {
+class TargetLowering;
+class AArch64FunctionInfo;
+class AArch64PrologueEmitter;
+
class AArch64FrameLowering : public TargetFrameLowering {
public:
explicit AArch64FrameLowering()
@@ -130,12 +134,19 @@ class AArch64FrameLowering : public TargetFrameLowering {
return StackId != TargetStackID::ScalableVector;
}
+ friend class AArch64PrologueEmitter;
void
orderFrameObjects(const MachineFunction &MF,
SmallVectorImpl<int> &ObjectsToAllocate) const override;
bool isFPReserved(const MachineFunction &MF) const;
+ bool needsWinCFI(const MachineFunction &MF) const;
+
+ bool requiresSaveVG(const MachineFunction &MF) const;
+
+ StackOffset getSVEStackSize(const MachineFunction &MF) const;
+
protected:
bool hasFPImpl(const MachineFunction &MF) const override;
@@ -159,10 +170,6 @@ class AArch64FrameLowering : public TargetFrameLowering {
int &MaxCSFrameIndex) const;
bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB,
uint64_t StackBumpBytes) const;
- void emitCalleeSavedGPRLocations(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI) const;
- void emitCalleeSavedSVELocations(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI) const;
void emitCalleeSavedGPRRestores(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const;
void emitCalleeSavedSVERestores(MachineBasicBlock &MBB,
@@ -196,6 +203,61 @@ class AArch64FrameLowering : public TargetFrameLowering {
void emitRemarks(const MachineFunction &MF,
MachineOptimizationRemarkEmitter *ORE) const override;
+
+ bool windowsRequiresStackProbe(const MachineFunction &MF,
+ uint64_t StackSizeInBytes) const;
+
+ bool shouldSignReturnAddressEverywhere(const MachineFunction &MF) const;
+
+ StackOffset getFPOffset(const MachineFunction &MF,
+ int64_t ObjectOffset) const;
+
+ StackOffset getStackOffset(const MachineFunction &MF,
+ int64_t ObjectOffset) const;
+
+ // Find a scratch register that we can use at the start of the prologue to
+ // re-align the stack pointer. We avoid using callee-save registers since
+ // they may appear to be free when this is called from canUseAsPrologue
+ // (during shrink wrapping), but then no longer be free when this is called
+ // from emitPrologue.
+ //
+ // FIXME: This is a bit conservative, since in the above case we could use one
+ // of the callee-save registers as a scratch temp to re-align the stack
+ // pointer, but we would then have to make sure that we were in fact saving at
+ // least one callee-save register in the prologue, which is additional
+ // complexity that doesn't seem worth the benefit.
+ Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
+ bool HasCall = false) const;
+
+ // Convert callee-save register save/restore instruction to do stack pointer
+ // decrement/increment to allocate/deallocate the callee-save stack area by
+ // converting store/load to use pre/post increment version.
+ MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
+ bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
+ MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup,
+ int CFAOffset = 0) const;
+
+ // Fixup callee-save register save/restore instructions to take into account
+ // combined SP bump by adding the local stack size to the stack offsets.
+ void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
+ uint64_t LocalStackSize,
+ bool NeedsWinCFI,
+ bool *HasWinCFI) const;
+
+ bool isSVECalleeSave(MachineBasicBlock::iterator I) const;
+
+ /// Returns the size of the fixed object area (allocated next to sp on entry)
+ /// On Win64 this may include a var args area and an UnwindHelp object for EH.
+ unsigned getFixedObjectSize(const MachineFunction &MF,
+ const AArch64FunctionInfo *AFI, bool IsWin64,
+ bool IsFunclet) const;
+
+ bool isVGInstruction(MachineBasicBlock::iterator MBBI,
+ const TargetLowering &TLI) const;
+
+ bool requiresGetVGCall(const MachineFunction &MF) const;
};
} // End llvm namespace
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
new file mode 100644
index 0000000000000..af424987b8ddb
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
@@ -0,0 +1,794 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64PrologueEpilogue.h"
+#include "AArch64FrameLowering.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/CFIInstBuilder.h"
+#include "llvm/MC/MCContext.h"
+
+#define DEBUG_TYPE "frame-info"
+
+STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
+
+namespace llvm {
+
+AArch64PrologueEmitter::AArch64PrologueEmitter(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ const AArch64FrameLowering &AFL)
+ : MF(MF), MBB(MBB), F(MF.getFunction()), MFI(MF.getFrameInfo()),
+ Subtarget(MF.getSubtarget<AArch64Subtarget>()), AFL(AFL),
+ RegInfo(*Subtarget.getRegisterInfo()) {
+ TII = Subtarget.getInstrInfo();
+ AFI = MF.getInfo<AArch64FunctionInfo>();
+
+ EmitCFI = AFI->needsDwarfUnwindInfo(MF);
+ EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
+ HasFP = AFL.hasFP(MF);
+ NeedsWinCFI = AFL.needsWinCFI(MF);
+ IsFunclet = MBB.isEHFuncletEntry();
+ HomPrologEpilog = AFL.homogeneousPrologEpilog(MF);
+
+#ifndef NDEBUG
+ collectBlockLiveins();
+#endif
+}
+
+#ifndef NDEBUG
+/// Collect live registers from the end of \p MI's parent up to (including) \p
+/// MI in \p LiveRegs.
+static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI,
+ LivePhysRegs &LiveRegs) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ LiveRegs.addLiveOuts(MBB);
+ for (const MachineInstr &MI :
+ reverse(make_range(MI.getIterator(), MBB.instr_end())))
+ LiveRegs.stepBackward(MI);
+}
+
+void AArch64PrologueEmitter::collectBlockLiveins() {
+ // Collect live register from the end of MBB up to the start of the existing
+ // frame setup instructions.
+ PrologueEndI = MBB.begin();
+ while (PrologueEndI != MBB.end() &&
+ PrologueEndI->getFlag(MachineInstr::FrameSetup))
+ ++PrologueEndI;
+
+ if (PrologueEndI != MBB.end()) {
+ getLivePhysRegsUpTo(*PrologueEndI, RegInfo, LiveRegs);
+ // Ignore registers used for stack management for now.
+ LiveRegs.removeReg(AArch64::SP);
+ LiveRegs.removeReg(AArch64::X19);
+ LiveRegs.removeReg(AArch64::FP);
+ LiveRegs.removeReg(AArch64::LR);
+
+ // X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
+ // This is necessary to spill VG if required where SVE is unavailable, but
+ // X0 is preserved around this call.
+ if (AFL.requiresGetVGCall(MF))
+ LiveRegs.removeReg(AArch64::X0);
+ }
+}
+
+void AArch64PrologueEmitter::verifyPrologueClobbers() const {
+ if (PrologueEndI == MBB.end())
+ return;
+ // Check if any of the newly instructions clobber any of the live registers.
+ for (MachineInstr &MI :
+ make_range(MBB.instr_begin(), PrologueEndI->getIterator())) {
+ for (auto &Op : MI.operands())
+ if (Op.isReg() && Op.isDef())
+ assert(!LiveRegs.contains(Op.getReg()) &&
+ "live register clobbered by inserted prologue instructions");
+ }
+}
+#endif
+
+void AArch64PrologueEmitter::determineLocalsStackSize(
+ uint64_t StackSize, uint64_t PrologueSaveSize) {
+ AFI->setLocalStackSize(StackSize - PrologueSaveSize);
+ CombineSPBump = AFL.shouldCombineCSRLocalStackBump(MF, StackSize);
+}
+
+void AArch64PrologueEmitter::emitPrologue() {
+ const MachineBasicBlock::iterator PrologueBeginI = MBB.begin();
+ const MachineBasicBlock::iterator EndI = MBB.end();
+
+ // At this point, we're going to decide whether or not the function uses a
+ // redzone. In most cases, the function doesn't have a redzone so let's
+ // assume that's false and set it to true in the case that there's a redzone.
+ AFI->setHasRedZone(false);
+
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc DL;
+
+ if (AFI->shouldSignReturnAddress(MF)) {
+ // If pac-ret+leaf is in effect, PAUTH_PROLOGUE pseudo instructions
+ // are inserted by emitPacRetPlusLeafHardening().
+ if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
+ BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::PAUTH_PROLOGUE))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ // AArch64PointerAuth pass will insert SEH_PACSignLR
+ HasWinCFI |= NeedsWinCFI;
+ }
+
+ if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
+ emitShadowCallStackPrologue(PrologueBeginI, DL);
+ HasWinCFI |= NeedsWinCFI;
+ }
+
+ if (EmitCFI && AFI->isMTETagged())
+ BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::EMITMTETAGGED))
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // We signal the presence of a Swift extended frame to external tools by
+ // storing FP with 0b0001 in bits 63:60. In normal userland operation a simple
+ // ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
+ // bits so that is still true.
+ if (HasFP && AFI->hasSwiftAsyncContext())
+ emitSwiftAsyncContextFramePointer(PrologueBeginI, DL);
+
+ // All calls are tail calls in GHC calling conv, and functions have no
+ // prologue/epilogue.
+ if (MF.getFunction().getCallingConv() == CallingConv::GHC)
+ return;
+
+ // Set tagged base pointer to the requested stack slot. Ideally it should
+ // match SP value after prologue.
+ if (std::optional<int> TBPI = AFI->getTaggedBasePointerIndex())
+ AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
+ else
+ AFI->setTaggedBasePointerOffset(MFI.getStackSize());
+
+ // getStackSize() includes all the locals in its size calculation. We don't
+ // include these locals when computing the stack size of a funclet, as they
+ // are allocated in the parent's stack frame and accessed via the frame
+ // pointer from the funclet. We only save the callee saved registers in the
+ // funclet, which are really the callee saved registers of the parent
+ // function, including the funclet.
+ int64_t NumBytes =
+ IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
+ if (!AFI->hasStackFrame() && !AFL.windowsRequiresStackProbe(MF, NumBytes))
+ return emitEmptyStackFramePrologue(NumBytes, PrologueBeginI, DL);
+
+ bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
+ unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
+
+ // Windows unwind can't represent the required stack adjustments if we have
+ // both SVE callee-saves and dynamic stack allocations, and the frame
+ // pointer is before the SVE spills. The allocation of the frame pointer
+ // must be the last instruction in the prologue so the unwinder can restore
+ // the stack pointer correctly. (And there isn't any unwind opcode for
+ // `addvl sp, x29, -17`.)
+ //
+ // Because of this, we do spills in the opposite order on Windows: first SVE,
+ // then GPRs. The main side-effect of this is that it makes accessing
+ // parameters passed on the stack more expensive.
+ //
+ // We could consider rearranging the spills for simpler cases.
+ bool FPAfterSVECalleeSaves =
+ Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();
+
+ if (FPAfterSVECalleeSaves && AFI->hasStackHazardSlotIndex())
+ reportFatalUsageError("SME hazard padding is not supported on Windows");
+
+ auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
+ // All of the remaining stack allocations are for locals.
+ determineLocalsStackSize(NumBytes, PrologueSaveSize);
+
+ MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI;
+ if (FPAfterSVECalleeSaves) {
+ // If we're doing SVE saves first, we need to immediately allocate space
+ // for fixed objects, then space for the SVE callee saves.
+ //
+ // Windows unwind requires that the scalable size is a multiple of 16;
+ // that's handled when the callee-saved size is computed.
+ auto SaveSize =
+ StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()) +
+ StackOffset::getFixed(FixedObject);
+ AFL.allocateStackSpace(MBB, PrologueBeginI, 0, SaveSize, NeedsWinCFI,
+ &HasWinCFI,
+ /*EmitCFI=*/false, StackOffset{},
+ /*FollowupAllocs=*/true);
+ NumBytes -= FixedObject;
+
+ // Now allocate space for the GPR callee saves.
+ MachineBasicBlock::iterator MBBI = PrologueBeginI;
+ while (MBBI != EndI && AFL.isSVECalleeSave(MBBI))
+ ++MBBI;
+ FirstGPRSaveI = AFL.convertCalleeSaveRestoreToSPPrePostIncDec(
+ MBB, MBBI, DL, TII, -AFI->getCalleeSavedStackSize(), NeedsWinCFI,
+ &HasWinCFI, EmitAsyncCFI);
+ NumBytes -= AFI->getCalleeSavedStackSize();
+ } else if (CombineSPBump) {
+ assert(!AFL.getSVEStackSize(MF) && "Cannot combine SP bump with SVE");
+ emitFrameOffset(MBB, PrologueBeginI, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(-NumBytes), TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
+ EmitAsyncCFI);
+ NumBytes = 0;
+ } else if (HomPrologEpilog) {
+ // Stack has been already adjusted.
+ NumBytes -= PrologueSaveSize;
+ } else if (PrologueSaveSize != 0) {
+ FirstGPRSaveI = AFL.convertCalleeSaveRestoreToSPPrePostIncDec(
+ MBB, PrologueBeginI, DL, TII, -PrologueSaveSize, NeedsWinCFI,
+ &HasWinCFI, EmitAsyncCFI);
+ NumBytes -= PrologueSaveSize;
+ }
+ assert(NumBytes >= 0 && "Negative stack allocation size!?");
+
+ // Move past the saves of the callee-saved registers, fixing up the offsets
+ // and pre-inc if we decided to combine the callee-save and local stack
+ // pointer bump above.
+ auto &TLI = *MF.getSubtarget().getTargetLowering();
+
+ MachineBasicBlock::iterator AfterGPRSavesI = FirstGPRSaveI;
+ while (AfterGPRSavesI != EndI &&
+ AfterGPRSavesI->getFlag(MachineInstr::FrameSetup) &&
+ !AFL.isSVECalleeSave(AfterGPRSavesI)) {
+ if (CombineSPBump &&
+ // Only fix-up frame-setup load/store instructions.
+ (!AFL.requiresSaveVG(MF) || !AFL.isVGInstruction(AfterGPRSavesI, TLI)))
+ AFL.fixupCalleeSaveRestoreStackOffset(
+ *AfterGPRSavesI, AFI->getLocalStackSize(), NeedsWinCFI, &HasWinCFI);
+ ++AfterGPRSavesI;
+ }
+
+ // For funclets the FP belongs to the containing function. Only set up FP if
+ // we actually need to.
+ if (!IsFunclet && HasFP)
+ emitFramePointerSetup(AfterGPRSavesI, DL, FixedObject);
+
+ // Now emit the moves for whatever callee saved regs we have (including FP,
+ // LR if those are saved). Frame instructions for SVE register are emitted
+ // later, after the instruction which actually save SVE regs.
+ if (EmitAsyncCFI)
+ emitCalleeSavedGPRLocations(AfterGPRSavesI);
+
+ // Alignment is required for the parent frame, not the funclet
+ const bool NeedsRealignment =
+ NumBytes && !IsFunclet && RegInfo.hasStackRealignment(MF);
+ const int64_t RealignmentPadding =
+ (NeedsRealignment && MFI.getMaxAlign() > Align(16))
+ ? MFI.getMaxAlign().value() - 16
+ : 0;
+
+ if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
+ emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
+
+ StackOffset SVEStackSize = AFL.getSVEStackSize(MF);
+ StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
+ MachineBasicBlock::iterator CalleeSavesEnd = AfterGPRSavesI;
+
+ StackOffset CFAOffset =
+ StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
+
+ // Process the SVE callee-saves to determine what space needs to be
+ // allocated.
+ MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
+ if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
+ LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize
+ << "\n");
+ SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
+ SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
+ // Find callee save instructions in frame.
+ // Note: With FPAfterSVECalleeSaves the callee saves have already been
+ // allocated.
+ if (!FPAfterSVECalleeSaves) {
+ MachineBasicBlock::iterator CalleeSavesBegin = AfterGPRSavesI;
+ assert(AFL.isSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
+ while (AFL.isSVECalleeSave(AfterSVESavesI) &&
+ AfterSVESavesI != MBB.getFirstTerminator())
+ ++AfterSVESavesI;
+ CalleeSavesEnd = AfterSVESavesI;
+
+ StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
+ // Allocate space for the callee saves (if any).
+ AFL.allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize,
+ false, nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects() || LocalsSize);
+ }
+ }
+ CFAOffset += SVECalleeSavesSize;
+
+ if (EmitAsyncCFI)
+ emitCalleeSavedSVELocations(CalleeSavesEnd);
+
+ // Allocate space for the rest of the frame including SVE locals. Align the
+ // stack as necessary.
+ assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
+ "Cannot use redzone with stack realignment");
+ if (!AFL.canUseRedZone(MF)) {
+ // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
+ // the correct value here, as NumBytes also includes padding bytes,
+ // which shouldn't be counted here.
+ AFL.allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding,
+ SVELocalsSize + StackOffset::getFixed(NumBytes),
+ NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
+ CFAOffset, MFI.hasVarSizedObjects());
+ }
+
+ // If we need a base pointer, set it up here. It's whatever the value of the
+ // stack pointer is at this point. Any variable size objects will be allocated
+ // after this, so we can still use the base pointer to reference locals.
+ //
+ // FIXME: Clarify FrameSetup flags here.
+ // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
+ // needed.
+ // For funclets the BP belongs to the containing function.
+ if (!IsFunclet && RegInfo.hasBasePointer(MF)) {
+ TII->copyPhysReg(MBB, AfterSVESavesI, DL, RegInfo.getBaseRegister(),
+ AArch64::SP, false);
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ }
+
+ // The very last FrameSetup instruction indicates the end of prologue. Emit a
+ // SEH opcode indicating the prologue end.
+ if (NeedsWinCFI && HasWinCFI) {
+ BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ // SEH funclets are passed the frame pointer in X1. If the parent
+ // function uses the base register, then the base register is used
+ // directly, and is not retrieved from X1.
+ if (IsFunclet && F.hasPersonalityFn()) {
+ EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
+ if (isAsynchronousEHPersonality(Per)) {
+ BuildMI(MBB, AfterSVESavesI, DL, TII->get(TargetOpcode::COPY),
+ AArch64::FP)
+ .addReg(AArch64::X1)
+ .setMIFlag(MachineInstr::FrameSetup);
+ MBB.addLiveIn(AArch64::X1);
+ }
+ }
+
+ if (EmitCFI && !EmitAsyncCFI) {
+ if (HasFP) {
+ emitDefineCFAWithFP(AfterSVESavesI, FixedObject);
+ } else {
+ StackOffset TotalSize =
+ SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
+ CFIInstBuilder CFIBuilder(MBB, AfterSVESavesI, MachineInstr::FrameSetup);
+ CFIBuilder.insertCFIInst(
+ createDefCFA(RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
+ TotalSize, /*LastAdjustmentWasScalable=*/false));
+ }
+ emitCalleeSavedGPRLocations(AfterSVESavesI);
+ emitCalleeSavedSVELocations(AfterSVESavesI);
+ }
+}
+
+void AArch64PrologueEmitter::emitShadowCallStackPrologue(
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL) const {
+ // Shadow call stack prolog: str x30, [x18], #8
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXpost))
+ .addReg(AArch64::X18, RegState::Define)
+ .addReg(AArch64::LR)
+ .addReg(AArch64::X18)
+ .addImm(8)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // This instruction also makes x18 live-in to the entry block.
+ MBB.addLiveIn(AArch64::X18);
+
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ if (EmitCFI) {
+ // Emit a CFI instruction that causes 8 to be subtracted from the value of
+ // x18 when unwinding past this frame.
+ static const char CFIInst[] = {
+ dwarf::DW_CFA_val_expression,
+ 18, // register
+ 2, // length
+ static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
+ static_cast<char>(-8) & 0x7f, // addend (sleb128)
+ };
+ CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
+ .buildEscape(StringRef(CFIInst, sizeof(CFIInst)));
+ }
+}
+
+void AArch64PrologueEmitter::emitSwiftAsyncContextFramePointer(
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL) const {
+ switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
+ case SwiftAsyncFramePointerMode::DeploymentBased:
+ if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
+ // The special symbol below is absolute and has a *value* that can be
+ // combined with the frame pointer to signal an extended frame.
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
+ .addExternalSymbol("swift_async_extendedFramePointerFlags",
+ AArch64II::MO_GOT);
+ if (NeedsWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlags(MachineInstr::FrameSetup);
+ HasWinCFI = true;
+ }
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
+ .addUse(AArch64::FP)
+ .addUse(AArch64::X16)
+ .addImm(Subtarget.isTargetILP32() ? 32 : 0);
+ if (NeedsWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlags(MachineInstr::FrameSetup);
+ HasWinCFI = true;
+ }
+ break;
+ }
+ [[fallthrough]];
+
+ case SwiftAsyncFramePointerMode::Always:
+ // ORR x29, x29, #0x1000_0000_0000_0000
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
+ .addUse(AArch64::FP)
+ .addImm(0x1100)
+ .setMIFlag(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlags(MachineInstr::FrameSetup);
+ HasWinCFI = true;
+ }
+ break;
+
+ case SwiftAsyncFramePointerMode::Never:
+ break;
+ }
+}
+
+void AArch64PrologueEmitter::emitEmptyStackFramePrologue(
+ int64_t NumBytes, MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL) const {
+ assert(!HasFP && "unexpected function without stack frame but with FP");
+ assert(!AFL.getSVEStackSize(MF) &&
+ "unexpected function without stack frame but with SVE objects");
+ // All of the stack allocation is for locals.
+ AFI->setLocalStackSize(NumBytes);
+ if (!NumBytes) {
+ if (NeedsWinCFI && HasWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ return;
+ }
+ // REDZONE: If the stack size is less than 128 bytes, we don't need
+ // to actually allocate.
+ if (AFL.canUseRedZone(MF)) {
+ AFI->setHasRedZone(true);
+ ++NumRedZoneFunctions;
+ } else {
+ emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(-NumBytes), TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+ if (EmitCFI) {
+ // Label used to tie together the PROLOG_LABEL and the MachineMoves.
+ MCSymbol *FrameLabel = MF.getContext().createTempSymbol();
+ // Encode the stack size of the leaf function.
+ CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
+ .buildDefCFAOffset(NumBytes, FrameLabel);
+ }
+ }
+
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+}
+
+void AArch64PrologueEmitter::emitFramePointerSetup(
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
+ unsigned FixedObject) {
+ int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
+ if (CombineSPBump)
+ FPOffset += AFI->getLocalStackSize();
+
+ if (AFI->hasSwiftAsyncContext()) {
+ // Before we update the live FP we have to ensure there's a valid (or
+ // null) asynchronous context in its slot just before FP in the frame
+ // record, so store it now.
+ const auto &Attrs = MF.getFunction().getAttributes();
+ bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
+ if (HaveInitialContext)
+ MBB.addLiveIn(AArch64::X22);
+ Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
+ .addUse(Reg)
+ .addUse(AArch64::SP)
+ .addImm(FPOffset - 8)
+ .setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
+ // to multiple instructions, should be mutually-exclusive.
+ assert(Subtarget.getTargetTriple().getArchName() != "arm64e");
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlags(MachineInstr::FrameSetup);
+ HasWinCFI = true;
+ }
+ }
+
+ if (HomPrologEpilog) {
+ auto Prolog = MBBI;
+ --Prolog;
+ assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
+ Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
+ } else {
+ // Issue sub fp, sp, FPOffset or
+ // mov fp,sp when FPOffset is zero.
+ // Note: All stores of callee-saved registers are marked as "FrameSetup".
+ // This code marks the instruction(s) that set the FP also.
+ emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
+ StackOffset::getFixed(FPOffset), TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+ if (NeedsWinCFI && HasWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+ // After setting up the FP, the rest of the prolog doesn't need to be
+ // included in the SEH unwind info.
+ NeedsWinCFI = false;
+ }
+ }
+ if (EmitAsyncCFI)
+ emitDefineCFAWithFP(MBBI, FixedObject);
+}
+
+// Define the current CFA rule to use the provided FP.
+void AArch64PrologueEmitter::emitDefineCFAWithFP(
+ MachineBasicBlock::iterator MBBI, unsigned FixedObject) const {
+ const AArch64RegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const int OffsetToFirstCalleeSaveFromFP =
+ AFI->getCalleeSaveBaseToFrameRecordOffset() -
+ AFI->getCalleeSavedStackSize();
+ Register FramePtr = TRI->getFrameRegister(MF);
+ CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
+ .buildDefCFA(FramePtr, FixedObject - OffsetToFirstCalleeSaveFromFP);
+}
+
+void AArch64PrologueEmitter::emitWindowsStackProbe(
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t &NumBytes,
+ int64_t RealignmentPadding) const {
+ if (AFI->getSVECalleeSavedStackSize())
+ report_fatal_error("SVE callee saves not yet supported with stack probing");
+
+ // Find an available register to spill the value of X15 to, if X15 is being
+ // used already for nest.
+ unsigned X15Scratch = AArch64::NoRegister;
+ const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
+ if (llvm::any_of(MBB.liveins(),
+ [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
+ return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
+ AArch64::X15, LiveIn.PhysReg);
+ })) {
+ X15Scratch = AFL.findScratchNonCalleeSaveRegister(&MBB, /*HasCall=*/true);
+ assert(X15Scratch != AArch64::NoRegister &&
+ (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17));
+#ifndef NDEBUG
+ LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it
+#endif
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), X15Scratch)
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::X15, RegState::Undef)
+ .addReg(AArch64::X15, RegState::Implicit)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
+ // exceed this amount. We need to move at most 2^24 - 1 into x15.
+ // This is at most two instructions, MOVZ followed by MOVK.
+ // TODO: Fix to use multiple stack alloc unwind codes for stacks
+ // exceeding 256MB in size.
+ if (NumBytes >= (1 << 28))
+ report_fatal_error("Stack size cannot exceed 256MB for stack "
+ "unwinding purposes");
+
+ uint32_t LowNumWords = NumWords & 0xFFFF;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
+ .addImm(LowNumWords)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
+ .setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ if ((NumWords & 0xFFFF0000) != 0) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
+ .addReg(AArch64::X15)
+ .addImm((NumWords & 0xFFFF0000) >> 16) // High half
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16))
+ .setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ } else {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
+ .addImm(NumWords)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+
+ const char *ChkStk = Subtarget.getChkStkName();
+ switch (MF.getTarget().getCodeModel()) {
+ case CodeModel::Tiny:
+ case CodeModel::Small:
+ case CodeModel::Medium:
+ case CodeModel::Kernel:
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
+ .addExternalSymbol(ChkStk)
+ .addReg(AArch64::X15, RegState::Implicit)
+ .addReg(AArch64::X16,
+ RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::X17,
+ RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::NZCV,
+ RegState::Implicit | RegState::Define | RegState::Dead)
+ .setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ break;
+ case CodeModel::Large:
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
+ .addReg(AArch64::X16, RegState::Define)
+ .addExternalSymbol(ChkStk)
+ .addExternalSymbol(ChkStk)
+ .setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ BuildMI(MBB, MBBI, DL, TII->get(getBLRCallOpcode(MF)))
+ .addReg(AArch64::X16, RegState::Kill)
+ .addReg(AArch64::X15, RegState::Implicit | RegState::Define)
+ .addReg(AArch64::X16,
+ RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::X17,
+ RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::NZCV,
+ RegState::Implicit | RegState::Define | RegState::Dead)
+ .setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ break;
+ }
+
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
+ .addReg(AArch64::SP, RegState::Kill)
+ .addReg(AArch64::X15, RegState::Kill)
+ .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
+ .setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
+ .addImm(NumBytes)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ NumBytes = 0;
+
+ if (RealignmentPadding > 0) {
+ if (RealignmentPadding >= 4096) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm))
+ .addReg(AArch64::X16, RegState::Define)
+ .addImm(RealignmentPadding)
+ .setMIFlags(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15)
+ .addReg(AArch64::SP)
+ .addReg(AArch64::X16, RegState::Kill)
+ .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
+ .setMIFlag(MachineInstr::FrameSetup);
+ } else {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
+ .addReg(AArch64::SP)
+ .addImm(RealignmentPadding)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
+ .addReg(AArch64::X15, RegState::Kill)
+ .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64));
+ AFI->setStackRealigned(true);
+
+ // No need for SEH instructions here; if we're realigning the stack,
+ // we've set a frame pointer and already finished the SEH prologue.
+ assert(!NeedsWinCFI);
+ }
+ if (X15Scratch != AArch64::NoRegister) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), AArch64::X15)
+ .addReg(AArch64::XZR)
+ .addReg(X15Scratch, RegState::Undef)
+ .addReg(X15Scratch, RegState::Implicit)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+}
+
+void AArch64PrologueEmitter::emitCalleeSavedGPRLocations(
+ MachineBasicBlock::iterator MBBI) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ if (CSI.empty())
+ return;
+
+ CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
+ for (const auto &Info : CSI) {
+ unsigned FrameIdx = Info.getFrameIdx();
+ if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector)
+ continue;
+
+ assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
+ int64_t Offset = MFI.getObjectOffset(FrameIdx) - AFL.getOffsetOfLocalArea();
+ CFIBuilder.buildOffset(Info.getReg(), Offset);
+ }
+}
+
+void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
+ MachineBasicBlock::iterator MBBI) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ if (CSI.empty())
+ return;
+
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+ AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
+ CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
+
+ std::optional<int64_t> IncomingVGOffsetFromDefCFA;
+ if (AFL.requiresSaveVG(MF)) {
+ auto IncomingVG = *find_if(
+ reverse(CSI), [](auto &Info) { return Info.getReg() == AArch64::VG; });
+ IncomingVGOffsetFromDefCFA = MFI.getObjectOffset(IncomingVG.getFrameIdx()) -
+ AFL.getOffsetOfLocalArea();
+ }
+
+ for (const auto &Info : CSI) {
+ if (MFI.getStackID(Info.getFrameIdx()) != TargetStackID::ScalableVector)
+ continue;
+
+ // Not all unwinders may know about SVE registers, so assume the lowest
+ // common denominator.
+ assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
+ MCRegister Reg = Info.getReg();
+ if (!static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
+ continue;
+
+ StackOffset Offset =
+ StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
+ StackOffset::getFixed(AFI.getCalleeSavedStackSize(MFI));
+
+ CFIBuilder.insertCFIInst(
+ createCFAOffset(TRI, Reg, Offset, IncomingVGOffsetFromDefCFA));
+ }
+}
+
+} // namespace llvm
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
new file mode 100644
index 0000000000000..94029ede60c76
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
@@ -0,0 +1,111 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the declaration of the AArch64PrologueEmitter class,
+/// which is is used to emit the prologue on AArch64.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64PROLOGUEEPILOGUE_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64PROLOGUEEPILOGUE_H
+
+#include "AArch64RegisterInfo.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+class AArch64Subtarget;
+class AArch64FunctionInfo;
+class AArch64FrameLowering;
+
+/// A helper class for emitting the prologue. Substantial new functionality
+/// should be factored into a new method. Where possible "emit*" methods should
+/// be const, and any flags that change how the prologue is emitted should be
+/// set in the constructor.
+class AArch64PrologueEmitter {
+public:
+ AArch64PrologueEmitter(MachineFunction &MF, MachineBasicBlock &MBB,
+ const AArch64FrameLowering &AFL);
+
+ /// Emit the prologue.
+ void emitPrologue();
+
+ ~AArch64PrologueEmitter() {
+ MF.setHasWinCFI(HasWinCFI);
+#ifndef NDEBUG
+ verifyPrologueClobbers();
+#endif
+ }
+
+private:
+ void emitShadowCallStackPrologue(MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL) const;
+
+ void emitSwiftAsyncContextFramePointer(MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL) const;
+
+ void emitEmptyStackFramePrologue(int64_t NumBytes,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL) const;
+
+ void emitFramePointerSetup(MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, unsigned FixedObject);
+
+ void emitDefineCFAWithFP(MachineBasicBlock::iterator MBBI,
+ unsigned FixedObject) const;
+
+ void emitWindowsStackProbe(MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, int64_t &NumBytes,
+ int64_t RealignmentPadding) const;
+
+ void emitCalleeSavedGPRLocations(MachineBasicBlock::iterator MBBI) const;
+ void emitCalleeSavedSVELocations(MachineBasicBlock::iterator MBBI) const;
+
+ void determineLocalsStackSize(uint64_t StackSize, uint64_t PrologueSaveSize);
+
+ MachineFunction &MF;
+ MachineBasicBlock &MBB;
+
+ const Function &F;
+ const MachineFrameInfo &MFI;
+ const AArch64Subtarget &Subtarget;
+ const AArch64FrameLowering &AFL;
+ const AArch64RegisterInfo &RegInfo;
+
+#ifndef NDEBUG
+ mutable LivePhysRegs LiveRegs{RegInfo};
+ MachineBasicBlock::iterator PrologueEndI;
+
+ void collectBlockLiveins();
+ void verifyPrologueClobbers() const;
+#endif
+
+ // Prologue flags. These generally should not change outside of the
+ // constructor. Two exceptions are "CombineSPBump" which is set in
+ // determineLocalsStackSize, and "NeedsWinCFI" which is set in
+ // emitFramePointerSetup.
+ bool EmitCFI = false;
+ bool EmitAsyncCFI = false;
+ bool HasFP = false;
+ bool IsFunclet = false;
+ bool CombineSPBump = false;
+ bool HomPrologEpilog = false;
+ bool NeedsWinCFI = false;
+
+ // Note: "HasWinCFI" is mutable as it can change in any "emit" function.
+ mutable bool HasWinCFI = false;
+
+ const TargetInstrInfo *TII = nullptr;
+ AArch64FunctionInfo *AFI = nullptr;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index 71017b37cf49c..a8185358d6dfc 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -92,6 +92,7 @@ add_llvm_target(AArch64CodeGen
SVEIntrinsicOpts.cpp
MachineSMEABIPass.cpp
AArch64SIMDInstrOpt.cpp
+ AArch64PrologueEpilogue.cpp
DEPENDS
intrinsics_gen
More information about the llvm-commits
mailing list