[llvm] [AArch64] Break up `AArch64FrameLowering::emitPrologue` (NFCI) (PR #157485)

Benjamin Maxwell via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 8 08:02:18 PDT 2025


https://github.com/MacDue created https://github.com/llvm/llvm-project/pull/157485

`emitPrologue` was almost 1k SLOC, with a large portion not actually related to emitting the vast majority of prologues.

This patch creates a new class `AArch64PrologueEmitter` for emitting the prologue, which keeps common state/target classes as members. This makes adding methods that handle niche cases easy, and allows methods to be marked "const" when they don't redefine flags/state.

With this change, the core "emitPrologue" is around 275 LOC, with cases like Windows stack probes or Swift frame pointers split into routines. This makes following the logic much easier. 

>From 61d73a63699dfc0257312bb95c9ce274da820c9f Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 8 Sep 2025 13:04:54 +0000
Subject: [PATCH] [AArch64] Break up `AArch64FrameLowering::emitPrologue`
 (NFCI)

`emitPrologue` was almost 1k SLOC, with a large portion of that not
actually related to emitting the vast majority of prologues.

This patch creates a new class `AArch64PrologueEmitter` for emitting
the prologue, which keeps common state/target classes as members. This
makes adding methods that handle niche cases easy, and allows methods to
be marked "const" when they don't redefine flags/state.

With this change, the core "emitPrologue" is around 275 LOC, with cases
like Windows stack probes, or Swift frame pointers, split off into their
own routines.
---
 .../Target/AArch64/AArch64FrameLowering.cpp   | 883 ++----------------
 .../lib/Target/AArch64/AArch64FrameLowering.h |  70 +-
 .../AArch64/AArch64PrologueEpilogue.cpp       | 794 ++++++++++++++++
 .../Target/AArch64/AArch64PrologueEpilogue.h  | 111 +++
 llvm/lib/Target/AArch64/CMakeLists.txt        |   1 +
 5 files changed, 1037 insertions(+), 822 deletions(-)
 create mode 100644 llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
 create mode 100644 llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 87a09b72933db..175b5e04d82ff 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -211,6 +211,7 @@
 #include "AArch64FrameLowering.h"
 #include "AArch64InstrInfo.h"
 #include "AArch64MachineFunctionInfo.h"
+#include "AArch64PrologueEpilogue.h"
 #include "AArch64RegisterInfo.h"
 #include "AArch64Subtarget.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
@@ -218,7 +219,6 @@
 #include "Utils/AArch64SMEAttributes.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/CFIInstBuilder.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
@@ -293,8 +293,6 @@ static cl::opt<bool> DisableMultiVectorSpillFill(
     cl::desc("Disable use of LD/ST pairs for SME2 or SVE2p1"), cl::init(false),
     cl::Hidden);
 
-STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
-
 /// Returns how much of the incoming argument stack area (in bytes) we should
 /// clean up in an epilogue. For the C calling convention this will be 0, for
 /// guaranteed tail call conventions it can be positive (a normal return or a
@@ -328,23 +326,20 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
   return ArgumentPopSize;
 }
 
-static bool produceCompactUnwindFrame(MachineFunction &MF);
-static bool needsWinCFI(const MachineFunction &MF);
-static StackOffset getSVEStackSize(const MachineFunction &MF);
-static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
-                                                 bool HasCall = false);
-static bool requiresSaveVG(const MachineFunction &MF);
+static bool produceCompactUnwindFrame(const AArch64FrameLowering &,
+                                      MachineFunction &MF);
 
 // Conservatively, returns true if the function is likely to have an SVE vectors
 // on the stack. This function is safe to be called before callee-saves or
 // object offsets have been determined.
-static bool isLikelyToHaveSVEStack(const MachineFunction &MF) {
+static bool isLikelyToHaveSVEStack(const AArch64FrameLowering &AFL,
+                                   const MachineFunction &MF) {
   auto *AFI = MF.getInfo<AArch64FunctionInfo>();
   if (AFI->isSVECC())
     return true;
 
   if (AFI->hasCalculatedStackSizeSVE())
-    return bool(getSVEStackSize(MF));
+    return bool(AFL.getSVEStackSize(MF));
 
   const MachineFrameInfo &MFI = MF.getFrameInfo();
   for (int FI = MFI.getObjectIndexBegin(); FI < MFI.getObjectIndexEnd(); FI++) {
@@ -372,7 +367,7 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
     return false;
 
   // TODO: SVE is not supported yet.
-  if (isLikelyToHaveSVEStack(MF))
+  if (isLikelyToHaveSVEStack(*this, MF))
     return false;
 
   // Bail on stack adjustment needed on return for simplicity.
@@ -409,7 +404,7 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
 
 /// Returns true if CSRs should be paired.
 bool AArch64FrameLowering::producePairRegisters(MachineFunction &MF) const {
-  return produceCompactUnwindFrame(MF) || homogeneousPrologEpilog(MF);
+  return produceCompactUnwindFrame(*this, MF) || homogeneousPrologEpilog(MF);
 }
 
 /// This is the biggest offset to the stack pointer we can encode in aarch64
@@ -451,11 +446,10 @@ AArch64FrameLowering::getStackIDForScalableVectors() const {
   return TargetStackID::ScalableVector;
 }
 
-/// Returns the size of the fixed object area (allocated next to sp on entry)
-/// On Win64 this may include a var args area and an UnwindHelp object for EH.
-static unsigned getFixedObjectSize(const MachineFunction &MF,
-                                   const AArch64FunctionInfo *AFI, bool IsWin64,
-                                   bool IsFunclet) {
+unsigned
+AArch64FrameLowering::getFixedObjectSize(const MachineFunction &MF,
+                                         const AArch64FunctionInfo *AFI,
+                                         bool IsWin64, bool IsFunclet) const {
   assert(AFI->getTailCallReservedStack() % 16 == 0 &&
          "Tail call reserved stack must be aligned to 16 bytes");
   if (!IsWin64 || IsFunclet) {
@@ -494,7 +488,8 @@ static unsigned getFixedObjectSize(const MachineFunction &MF,
 }
 
 /// Returns the size of the entire SVE stackframe (calleesaves + spills).
-static StackOffset getSVEStackSize(const MachineFunction &MF) {
+StackOffset
+AArch64FrameLowering::getSVEStackSize(const MachineFunction &MF) const {
   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE());
 }
@@ -683,70 +678,6 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
   return MBB.erase(I);
 }
 
-void AArch64FrameLowering::emitCalleeSavedGPRLocations(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
-  MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo &MFI = MF.getFrameInfo();
-
-  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
-  if (CSI.empty())
-    return;
-
-  CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
-  for (const auto &Info : CSI) {
-    unsigned FrameIdx = Info.getFrameIdx();
-    if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector)
-      continue;
-
-    assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
-    int64_t Offset = MFI.getObjectOffset(FrameIdx) - getOffsetOfLocalArea();
-    CFIBuilder.buildOffset(Info.getReg(), Offset);
-  }
-}
-
-void AArch64FrameLowering::emitCalleeSavedSVELocations(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
-  MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo &MFI = MF.getFrameInfo();
-
-  // Add callee saved registers to move list.
-  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
-  if (CSI.empty())
-    return;
-
-  const TargetSubtargetInfo &STI = MF.getSubtarget();
-  const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
-  AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
-  CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
-
-  std::optional<int64_t> IncomingVGOffsetFromDefCFA;
-  if (requiresSaveVG(MF)) {
-    auto IncomingVG = *find_if(
-        reverse(CSI), [](auto &Info) { return Info.getReg() == AArch64::VG; });
-    IncomingVGOffsetFromDefCFA =
-        MFI.getObjectOffset(IncomingVG.getFrameIdx()) - getOffsetOfLocalArea();
-  }
-
-  for (const auto &Info : CSI) {
-    if (MFI.getStackID(Info.getFrameIdx()) != TargetStackID::ScalableVector)
-      continue;
-
-    // Not all unwinders may know about SVE registers, so assume the lowest
-    // common denominator.
-    assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
-    MCRegister Reg = Info.getReg();
-    if (!static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
-      continue;
-
-    StackOffset Offset =
-        StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
-        StackOffset::getFixed(AFI.getCalleeSavedStackSize(MFI));
-
-    CFIBuilder.insertCFIInst(
-        createCFAOffset(TRI, Reg, Offset, IncomingVGOffsetFromDefCFA));
-  }
-}
-
 void AArch64FrameLowering::resetCFIToInitialState(
     MachineBasicBlock &MBB) const {
 
@@ -1088,8 +1019,8 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
   }
 }
 
-static bool windowsRequiresStackProbe(const MachineFunction &MF,
-                                      uint64_t StackSizeInBytes) {
+bool AArch64FrameLowering::windowsRequiresStackProbe(
+    const MachineFunction &MF, uint64_t StackSizeInBytes) const {
   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
   const AArch64FunctionInfo &MFI = *MF.getInfo<AArch64FunctionInfo>();
   // TODO: When implementing stack protectors, take that into account
@@ -1108,19 +1039,9 @@ static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs,
     LiveRegs.addReg(CSRegs[i]);
 }
 
-// Find a scratch register that we can use at the start of the prologue to
-// re-align the stack pointer.  We avoid using callee-save registers since they
-// may appear to be free when this is called from canUseAsPrologue (during
-// shrink wrapping), but then no longer be free when this is called from
-// emitPrologue.
-//
-// FIXME: This is a bit conservative, since in the above case we could use one
-// of the callee-save registers as a scratch temp to re-align the stack pointer,
-// but we would then have to make sure that we were in fact saving at least one
-// callee-save register in the prologue, which is additional complexity that
-// doesn't seem worth the benefit.
-static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
-                                                 bool HasCall) {
+Register
+AArch64FrameLowering::findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
+                                                       bool HasCall) const {
   MachineFunction *MF = MBB->getParent();
 
   // If MBB is an entry block, use X9 as the scratch register
@@ -1193,13 +1114,14 @@ bool AArch64FrameLowering::canUseAsPrologue(
   return true;
 }
 
-static bool needsWinCFI(const MachineFunction &MF) {
+bool AArch64FrameLowering::needsWinCFI(const MachineFunction &MF) const {
   const Function &F = MF.getFunction();
   return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
          F.needsUnwindTableEntry();
 }
 
-static bool shouldSignReturnAddressEverywhere(const MachineFunction &MF) {
+bool AArch64FrameLowering::shouldSignReturnAddressEverywhere(
+    const MachineFunction &MF) const {
   // FIXME: With WinCFI, extra care should be taken to place SEH_PACSignLR
   //        and SEH_EpilogEnd instructions in the correct order.
   if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
@@ -1475,13 +1397,13 @@ static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
     ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
 }
 
-bool requiresGetVGCall(MachineFunction &MF) {
-  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+bool AArch64FrameLowering::requiresGetVGCall(const MachineFunction &MF) const {
+  auto *AFI = MF.getInfo<AArch64FunctionInfo>();
   return AFI->hasStreamingModeChanges() &&
          !MF.getSubtarget<AArch64Subtarget>().hasSVE();
 }
 
-static bool requiresSaveVG(const MachineFunction &MF) {
+bool AArch64FrameLowering::requiresSaveVG(const MachineFunction &MF) const {
   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   if (!AFI->needsDwarfUnwindInfo(MF) || !AFI->hasStreamingModeChanges())
     return false;
@@ -1499,8 +1421,8 @@ static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO,
          StringRef(TLI.getLibcallName(LC)) == MO.getSymbolName();
 }
 
-bool isVGInstruction(MachineBasicBlock::iterator MBBI,
-                     const TargetLowering &TLI) {
+bool AArch64FrameLowering::isVGInstruction(MachineBasicBlock::iterator MBBI,
+                                           const TargetLowering &TLI) const {
   unsigned Opc = MBBI->getOpcode();
   if (Opc == AArch64::CNTD_XPiI)
     return true;
@@ -1514,15 +1436,12 @@ bool isVGInstruction(MachineBasicBlock::iterator MBBI,
   return Opc == TargetOpcode::COPY;
 }
 
-// Convert callee-save register save/restore instruction to do stack pointer
-// decrement/increment to allocate/deallocate the callee-save stack area by
-// converting store/load to use pre/post increment version.
-static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
+MachineBasicBlock::iterator
+AArch64FrameLowering::convertCalleeSaveRestoreToSPPrePostIncDec(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
     const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
     bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
-    MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup,
-    int CFAOffset = 0) {
+    MachineInstr::MIFlag FrameFlag, int CFAOffset) const {
   unsigned NewOpc;
 
   // If the function contains streaming mode changes, we expect instructions
@@ -1643,12 +1562,9 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
   return std::prev(MBB.erase(MBBI));
 }
 
-// Fixup callee-save register save/restore instructions to take into account
-// combined SP bump by adding the local stack size to the stack offsets.
-static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
-                                              uint64_t LocalStackSize,
-                                              bool NeedsWinCFI,
-                                              bool *HasWinCFI) {
+void AArch64FrameLowering::fixupCalleeSaveRestoreStackOffset(
+    MachineInstr &MI, uint64_t LocalStackSize, bool NeedsWinCFI,
+    bool *HasWinCFI) const {
   if (AArch64InstrInfo::isSEHInstruction(MI))
     return;
 
@@ -1703,7 +1619,8 @@ static unsigned getStackHazardSize(const MachineFunction &MF) {
 }
 
 // Convenience function to determine whether I is an SVE callee save.
-static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
+bool AArch64FrameLowering::isSVECalleeSave(
+    MachineBasicBlock::iterator I) const {
   switch (I->getOpcode()) {
   default:
     return false;
@@ -1725,42 +1642,6 @@ static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
   }
 }
 
-static void emitShadowCallStackPrologue(const TargetInstrInfo &TII,
-                                        MachineFunction &MF,
-                                        MachineBasicBlock &MBB,
-                                        MachineBasicBlock::iterator MBBI,
-                                        const DebugLoc &DL, bool NeedsWinCFI,
-                                        bool NeedsUnwindInfo) {
-  // Shadow call stack prolog: str x30, [x18], #8
-  BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXpost))
-      .addReg(AArch64::X18, RegState::Define)
-      .addReg(AArch64::LR)
-      .addReg(AArch64::X18)
-      .addImm(8)
-      .setMIFlag(MachineInstr::FrameSetup);
-
-  // This instruction also makes x18 live-in to the entry block.
-  MBB.addLiveIn(AArch64::X18);
-
-  if (NeedsWinCFI)
-    BuildMI(MBB, MBBI, DL, TII.get(AArch64::SEH_Nop))
-        .setMIFlag(MachineInstr::FrameSetup);
-
-  if (NeedsUnwindInfo) {
-    // Emit a CFI instruction that causes 8 to be subtracted from the value of
-    // x18 when unwinding past this frame.
-    static const char CFIInst[] = {
-        dwarf::DW_CFA_val_expression,
-        18, // register
-        2,  // length
-        static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
-        static_cast<char>(-8) & 0x7f, // addend (sleb128)
-    };
-    CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
-        .buildEscape(StringRef(CFIInst, sizeof(CFIInst)));
-  }
-}
-
 static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII,
                                         MachineFunction &MF,
                                         MachineBasicBlock &MBB,
@@ -1783,36 +1664,6 @@ static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII,
         .buildRestore(AArch64::X18);
 }
 
-// Define the current CFA rule to use the provided FP.
-static void emitDefineCFAWithFP(MachineFunction &MF, MachineBasicBlock &MBB,
-                                MachineBasicBlock::iterator MBBI,
-                                unsigned FixedObject) {
-  const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
-  const AArch64RegisterInfo *TRI = STI.getRegisterInfo();
-  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
-
-  const int OffsetToFirstCalleeSaveFromFP =
-      AFI->getCalleeSaveBaseToFrameRecordOffset() -
-      AFI->getCalleeSavedStackSize();
-  Register FramePtr = TRI->getFrameRegister(MF);
-  CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
-      .buildDefCFA(FramePtr, FixedObject - OffsetToFirstCalleeSaveFromFP);
-}
-
-#ifndef NDEBUG
-/// Collect live registers from the end of \p MI's parent up to (including) \p
-/// MI in \p LiveRegs.
-static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI,
-                                LivePhysRegs &LiveRegs) {
-
-  MachineBasicBlock &MBB = *MI.getParent();
-  LiveRegs.addLiveOuts(MBB);
-  for (const MachineInstr &MI :
-       reverse(make_range(MI.getIterator(), MBB.instr_end())))
-    LiveRegs.stepBackward(MI);
-}
-#endif
-
 void AArch64FrameLowering::emitPacRetPlusLeafHardening(
     MachineFunction &MF) const {
   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
@@ -1848,616 +1699,8 @@ void AArch64FrameLowering::emitPacRetPlusLeafHardening(
 
 void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
                                         MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  const MachineFrameInfo &MFI = MF.getFrameInfo();
-  const Function &F = MF.getFunction();
-  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
-  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
-  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
-
-  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
-  bool EmitCFI = AFI->needsDwarfUnwindInfo(MF);
-  bool EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
-  bool HasFP = hasFP(MF);
-  bool NeedsWinCFI = needsWinCFI(MF);
-  bool HasWinCFI = false;
-  auto Cleanup = make_scope_exit([&]() { MF.setHasWinCFI(HasWinCFI); });
-
-  MachineBasicBlock::iterator End = MBB.end();
-#ifndef NDEBUG
-  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-  // Collect live register from the end of MBB up to the start of the existing
-  // frame setup instructions.
-  MachineBasicBlock::iterator NonFrameStart = MBB.begin();
-  while (NonFrameStart != End &&
-         NonFrameStart->getFlag(MachineInstr::FrameSetup))
-    ++NonFrameStart;
-
-  LivePhysRegs LiveRegs(*TRI);
-  if (NonFrameStart != MBB.end()) {
-    getLivePhysRegsUpTo(*NonFrameStart, *TRI, LiveRegs);
-    // Ignore registers used for stack management for now.
-    LiveRegs.removeReg(AArch64::SP);
-    LiveRegs.removeReg(AArch64::X19);
-    LiveRegs.removeReg(AArch64::FP);
-    LiveRegs.removeReg(AArch64::LR);
-
-    // X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
-    // This is necessary to spill VG if required where SVE is unavailable, but
-    // X0 is preserved around this call.
-    if (requiresGetVGCall(MF))
-      LiveRegs.removeReg(AArch64::X0);
-  }
-
-  auto VerifyClobberOnExit = make_scope_exit([&]() {
-    if (NonFrameStart == MBB.end())
-      return;
-    // Check if any of the newly instructions clobber any of the live registers.
-    for (MachineInstr &MI :
-         make_range(MBB.instr_begin(), NonFrameStart->getIterator())) {
-      for (auto &Op : MI.operands())
-        if (Op.isReg() && Op.isDef())
-          assert(!LiveRegs.contains(Op.getReg()) &&
-                 "live register clobbered by inserted prologue instructions");
-    }
-  });
-#endif
-
-  bool IsFunclet = MBB.isEHFuncletEntry();
-
-  // At this point, we're going to decide whether or not the function uses a
-  // redzone. In most cases, the function doesn't have a redzone so let's
-  // assume that's false and set it to true in the case that there's a redzone.
-  AFI->setHasRedZone(false);
-
-  // Debug location must be unknown since the first debug location is used
-  // to determine the end of the prologue.
-  DebugLoc DL;
-
-  const auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
-  if (MFnI.shouldSignReturnAddress(MF)) {
-    // If pac-ret+leaf is in effect, PAUTH_PROLOGUE pseudo instructions
-    // are inserted by emitPacRetPlusLeafHardening().
-    if (!shouldSignReturnAddressEverywhere(MF)) {
-      BuildMI(MBB, MBBI, DL, TII->get(AArch64::PAUTH_PROLOGUE))
-          .setMIFlag(MachineInstr::FrameSetup);
-    }
-    // AArch64PointerAuth pass will insert SEH_PACSignLR
-    HasWinCFI |= NeedsWinCFI;
-  }
-
-  if (MFnI.needsShadowCallStackPrologueEpilogue(MF)) {
-    emitShadowCallStackPrologue(*TII, MF, MBB, MBBI, DL, NeedsWinCFI,
-                                MFnI.needsDwarfUnwindInfo(MF));
-    HasWinCFI |= NeedsWinCFI;
-  }
-
-  if (EmitCFI && MFnI.isMTETagged()) {
-    BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITMTETAGGED))
-        .setMIFlag(MachineInstr::FrameSetup);
-  }
-
-  // We signal the presence of a Swift extended frame to external tools by
-  // storing FP with 0b0001 in bits 63:60. In normal userland operation a simple
-  // ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
-  // bits so that is still true.
-  if (HasFP && AFI->hasSwiftAsyncContext()) {
-    switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
-    case SwiftAsyncFramePointerMode::DeploymentBased:
-      if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
-        // The special symbol below is absolute and has a *value* that can be
-        // combined with the frame pointer to signal an extended frame.
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
-            .addExternalSymbol("swift_async_extendedFramePointerFlags",
-                               AArch64II::MO_GOT);
-        if (NeedsWinCFI) {
-          BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
-              .setMIFlags(MachineInstr::FrameSetup);
-          HasWinCFI = true;
-        }
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
-            .addUse(AArch64::FP)
-            .addUse(AArch64::X16)
-            .addImm(Subtarget.isTargetILP32() ? 32 : 0);
-        if (NeedsWinCFI) {
-          BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
-              .setMIFlags(MachineInstr::FrameSetup);
-          HasWinCFI = true;
-        }
-        break;
-      }
-      [[fallthrough]];
-
-    case SwiftAsyncFramePointerMode::Always:
-      // ORR x29, x29, #0x1000_0000_0000_0000
-      BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
-          .addUse(AArch64::FP)
-          .addImm(0x1100)
-          .setMIFlag(MachineInstr::FrameSetup);
-      if (NeedsWinCFI) {
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
-            .setMIFlags(MachineInstr::FrameSetup);
-        HasWinCFI = true;
-      }
-      break;
-
-    case SwiftAsyncFramePointerMode::Never:
-      break;
-    }
-  }
-
-  // All calls are tail calls in GHC calling conv, and functions have no
-  // prologue/epilogue.
-  if (MF.getFunction().getCallingConv() == CallingConv::GHC)
-    return;
-
-  // Set tagged base pointer to the requested stack slot.
-  // Ideally it should match SP value after prologue.
-  std::optional<int> TBPI = AFI->getTaggedBasePointerIndex();
-  if (TBPI)
-    AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
-  else
-    AFI->setTaggedBasePointerOffset(MFI.getStackSize());
-
-  const StackOffset &SVEStackSize = getSVEStackSize(MF);
-
-  // getStackSize() includes all the locals in its size calculation. We don't
-  // include these locals when computing the stack size of a funclet, as they
-  // are allocated in the parent's stack frame and accessed via the frame
-  // pointer from the funclet.  We only save the callee saved registers in the
-  // funclet, which are really the callee saved registers of the parent
-  // function, including the funclet.
-  int64_t NumBytes =
-      IsFunclet ? getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
-  if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
-    assert(!HasFP && "unexpected function without stack frame but with FP");
-    assert(!SVEStackSize &&
-           "unexpected function without stack frame but with SVE objects");
-    // All of the stack allocation is for locals.
-    AFI->setLocalStackSize(NumBytes);
-    if (!NumBytes) {
-      if (NeedsWinCFI && HasWinCFI) {
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
-            .setMIFlag(MachineInstr::FrameSetup);
-      }
-      return;
-    }
-    // REDZONE: If the stack size is less than 128 bytes, we don't need
-    // to actually allocate.
-    if (canUseRedZone(MF)) {
-      AFI->setHasRedZone(true);
-      ++NumRedZoneFunctions;
-    } else {
-      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
-                      StackOffset::getFixed(-NumBytes), TII,
-                      MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
-      if (EmitCFI) {
-        // Label used to tie together the PROLOG_LABEL and the MachineMoves.
-        MCSymbol *FrameLabel = MF.getContext().createTempSymbol();
-        // Encode the stack size of the leaf function.
-        CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
-            .buildDefCFAOffset(NumBytes, FrameLabel);
-      }
-    }
-
-    if (NeedsWinCFI) {
-      HasWinCFI = true;
-      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
-          .setMIFlag(MachineInstr::FrameSetup);
-    }
-
-    return;
-  }
-
-  bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
-  unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
-
-  // Windows unwind can't represent the required stack adjustments if we have
-  // both SVE callee-saves and dynamic stack allocations, and the frame
-  // pointer is before the SVE spills.  The allocation of the frame pointer
-  // must be the last instruction in the prologue so the unwinder can restore
-  // the stack pointer correctly. (And there isn't any unwind opcode for
-  // `addvl sp, x29, -17`.)
-  //
-  // Because of this, we do spills in the opposite order on Windows: first SVE,
-  // then GPRs. The main side-effect of this is that it makes accessing
-  // parameters passed on the stack more expensive.
-  //
-  // We could consider rearranging the spills for simpler cases.
-  bool FPAfterSVECalleeSaves =
-      Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();
-
-  if (FPAfterSVECalleeSaves && AFI->hasStackHazardSlotIndex())
-    reportFatalUsageError("SME hazard padding is not supported on Windows");
-
-  auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
-  // All of the remaining stack allocations are for locals.
-  AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
-  bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
-  bool HomPrologEpilog = homogeneousPrologEpilog(MF);
-  if (FPAfterSVECalleeSaves) {
-    // If we're doing SVE saves first, we need to immediately allocate space
-    // for fixed objects, then space for the SVE callee saves.
-    //
-    // Windows unwind requires that the scalable size is a multiple of 16;
-    // that's handled when the callee-saved size is computed.
-    auto SaveSize =
-        StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()) +
-        StackOffset::getFixed(FixedObject);
-    allocateStackSpace(MBB, MBBI, 0, SaveSize, NeedsWinCFI, &HasWinCFI,
-                       /*EmitCFI=*/false, StackOffset{},
-                       /*FollowupAllocs=*/true);
-    NumBytes -= FixedObject;
-
-    // Now allocate space for the GPR callee saves.
-    while (MBBI != End && IsSVECalleeSave(MBBI))
-      ++MBBI;
-    MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
-        MBB, MBBI, DL, TII, -AFI->getCalleeSavedStackSize(), NeedsWinCFI,
-        &HasWinCFI, EmitAsyncCFI);
-    NumBytes -= AFI->getCalleeSavedStackSize();
-  } else if (CombineSPBump) {
-    assert(!SVEStackSize && "Cannot combine SP bump with SVE");
-    emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
-                    StackOffset::getFixed(-NumBytes), TII,
-                    MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
-                    EmitAsyncCFI);
-    NumBytes = 0;
-  } else if (HomPrologEpilog) {
-    // Stack has been already adjusted.
-    NumBytes -= PrologueSaveSize;
-  } else if (PrologueSaveSize != 0) {
-    MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
-        MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI,
-        EmitAsyncCFI);
-    NumBytes -= PrologueSaveSize;
-  }
-  assert(NumBytes >= 0 && "Negative stack allocation size!?");
-
-  // Move past the saves of the callee-saved registers, fixing up the offsets
-  // and pre-inc if we decided to combine the callee-save and local stack
-  // pointer bump above.
-  auto &TLI = *MF.getSubtarget().getTargetLowering();
-  while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
-         !IsSVECalleeSave(MBBI)) {
-    if (CombineSPBump &&
-        // Only fix-up frame-setup load/store instructions.
-        (!requiresSaveVG(MF) || !isVGInstruction(MBBI, TLI)))
-      fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
-                                        NeedsWinCFI, &HasWinCFI);
-    ++MBBI;
-  }
-
-  // For funclets the FP belongs to the containing function.
-  if (!IsFunclet && HasFP) {
-    // Only set up FP if we actually need to.
-    int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
-
-    if (CombineSPBump)
-      FPOffset += AFI->getLocalStackSize();
-
-    if (AFI->hasSwiftAsyncContext()) {
-      // Before we update the live FP we have to ensure there's a valid (or
-      // null) asynchronous context in its slot just before FP in the frame
-      // record, so store it now.
-      const auto &Attrs = MF.getFunction().getAttributes();
-      bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
-      if (HaveInitialContext)
-        MBB.addLiveIn(AArch64::X22);
-      Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
-      BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
-          .addUse(Reg)
-          .addUse(AArch64::SP)
-          .addImm(FPOffset - 8)
-          .setMIFlags(MachineInstr::FrameSetup);
-      if (NeedsWinCFI) {
-        // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
-        // to multiple instructions, should be mutually-exclusive.
-        assert(Subtarget.getTargetTriple().getArchName() != "arm64e");
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
-            .setMIFlags(MachineInstr::FrameSetup);
-        HasWinCFI = true;
-      }
-    }
-
-    if (HomPrologEpilog) {
-      auto Prolog = MBBI;
-      --Prolog;
-      assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
-      Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
-    } else {
-      // Issue    sub fp, sp, FPOffset or
-      //          mov fp,sp          when FPOffset is zero.
-      // Note: All stores of callee-saved registers are marked as "FrameSetup".
-      // This code marks the instruction(s) that set the FP also.
-      emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
-                      StackOffset::getFixed(FPOffset), TII,
-                      MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
-      if (NeedsWinCFI && HasWinCFI) {
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
-            .setMIFlag(MachineInstr::FrameSetup);
-        // After setting up the FP, the rest of the prolog doesn't need to be
-        // included in the SEH unwind info.
-        NeedsWinCFI = false;
-      }
-    }
-    if (EmitAsyncCFI)
-      emitDefineCFAWithFP(MF, MBB, MBBI, FixedObject);
-  }
-
-  // Now emit the moves for whatever callee saved regs we have (including FP,
-  // LR if those are saved). Frame instructions for SVE register are emitted
-  // later, after the instruction which actually save SVE regs.
-  if (EmitAsyncCFI)
-    emitCalleeSavedGPRLocations(MBB, MBBI);
-
-  // Alignment is required for the parent frame, not the funclet
-  const bool NeedsRealignment =
-      NumBytes && !IsFunclet && RegInfo->hasStackRealignment(MF);
-  const int64_t RealignmentPadding =
-      (NeedsRealignment && MFI.getMaxAlign() > Align(16))
-          ? MFI.getMaxAlign().value() - 16
-          : 0;
-
-  if (windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding)) {
-    if (AFI->getSVECalleeSavedStackSize())
-      report_fatal_error(
-          "SVE callee saves not yet supported with stack probing");
-
-    // Find an available register to spill the value of X15 to, if X15 is being
-    // used already for nest.
-    unsigned X15Scratch = AArch64::NoRegister;
-    const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
-    if (llvm::any_of(MBB.liveins(),
-                     [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
-                       return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
-                           AArch64::X15, LiveIn.PhysReg);
-                     })) {
-      X15Scratch = findScratchNonCalleeSaveRegister(&MBB, true);
-      assert(X15Scratch != AArch64::NoRegister &&
-             (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17));
-#ifndef NDEBUG
-      LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it
-#endif
-      BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), X15Scratch)
-          .addReg(AArch64::XZR)
-          .addReg(AArch64::X15, RegState::Undef)
-          .addReg(AArch64::X15, RegState::Implicit)
-          .setMIFlag(MachineInstr::FrameSetup);
-    }
-
-    uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
-    if (NeedsWinCFI) {
-      HasWinCFI = true;
-      // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
-      // exceed this amount.  We need to move at most 2^24 - 1 into x15.
-      // This is at most two instructions, MOVZ followed by MOVK.
-      // TODO: Fix to use multiple stack alloc unwind codes for stacks
-      // exceeding 256MB in size.
-      if (NumBytes >= (1 << 28))
-        report_fatal_error("Stack size cannot exceed 256MB for stack "
-                           "unwinding purposes");
-
-      uint32_t LowNumWords = NumWords & 0xFFFF;
-      BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
-          .addImm(LowNumWords)
-          .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
-          .setMIFlag(MachineInstr::FrameSetup);
-      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
-          .setMIFlag(MachineInstr::FrameSetup);
-      if ((NumWords & 0xFFFF0000) != 0) {
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
-            .addReg(AArch64::X15)
-            .addImm((NumWords & 0xFFFF0000) >> 16) // High half
-            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16))
-            .setMIFlag(MachineInstr::FrameSetup);
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
-            .setMIFlag(MachineInstr::FrameSetup);
-      }
-    } else {
-      BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
-          .addImm(NumWords)
-          .setMIFlags(MachineInstr::FrameSetup);
-    }
-
-    const char *ChkStk = Subtarget.getChkStkName();
-    switch (MF.getTarget().getCodeModel()) {
-    case CodeModel::Tiny:
-    case CodeModel::Small:
-    case CodeModel::Medium:
-    case CodeModel::Kernel:
-      BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
-          .addExternalSymbol(ChkStk)
-          .addReg(AArch64::X15, RegState::Implicit)
-          .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
-          .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
-          .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
-          .setMIFlags(MachineInstr::FrameSetup);
-      if (NeedsWinCFI) {
-        HasWinCFI = true;
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
-            .setMIFlag(MachineInstr::FrameSetup);
-      }
-      break;
-    case CodeModel::Large:
-      BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
-          .addReg(AArch64::X16, RegState::Define)
-          .addExternalSymbol(ChkStk)
-          .addExternalSymbol(ChkStk)
-          .setMIFlags(MachineInstr::FrameSetup);
-      if (NeedsWinCFI) {
-        HasWinCFI = true;
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
-            .setMIFlag(MachineInstr::FrameSetup);
-      }
-
-      BuildMI(MBB, MBBI, DL, TII->get(getBLRCallOpcode(MF)))
-          .addReg(AArch64::X16, RegState::Kill)
-          .addReg(AArch64::X15, RegState::Implicit | RegState::Define)
-          .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
-          .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
-          .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
-          .setMIFlags(MachineInstr::FrameSetup);
-      if (NeedsWinCFI) {
-        HasWinCFI = true;
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
-            .setMIFlag(MachineInstr::FrameSetup);
-      }
-      break;
-    }
-
-    BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
-        .addReg(AArch64::SP, RegState::Kill)
-        .addReg(AArch64::X15, RegState::Kill)
-        .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
-        .setMIFlags(MachineInstr::FrameSetup);
-    if (NeedsWinCFI) {
-      HasWinCFI = true;
-      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
-          .addImm(NumBytes)
-          .setMIFlag(MachineInstr::FrameSetup);
-    }
-    NumBytes = 0;
-
-    if (RealignmentPadding > 0) {
-      if (RealignmentPadding >= 4096) {
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm))
-            .addReg(AArch64::X16, RegState::Define)
-            .addImm(RealignmentPadding)
-            .setMIFlags(MachineInstr::FrameSetup);
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15)
-            .addReg(AArch64::SP)
-            .addReg(AArch64::X16, RegState::Kill)
-            .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
-            .setMIFlag(MachineInstr::FrameSetup);
-      } else {
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
-            .addReg(AArch64::SP)
-            .addImm(RealignmentPadding)
-            .addImm(0)
-            .setMIFlag(MachineInstr::FrameSetup);
-      }
-
-      uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
-      BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
-          .addReg(AArch64::X15, RegState::Kill)
-          .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64));
-      AFI->setStackRealigned(true);
-
-      // No need for SEH instructions here; if we're realigning the stack,
-      // we've set a frame pointer and already finished the SEH prologue.
-      assert(!NeedsWinCFI);
-    }
-    if (X15Scratch != AArch64::NoRegister) {
-      BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), AArch64::X15)
-          .addReg(AArch64::XZR)
-          .addReg(X15Scratch, RegState::Undef)
-          .addReg(X15Scratch, RegState::Implicit)
-          .setMIFlag(MachineInstr::FrameSetup);
-    }
-  }
-
-  StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
-  MachineBasicBlock::iterator CalleeSavesEnd = MBBI;
-
-  StackOffset CFAOffset =
-      StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
-
-  // Process the SVE callee-saves to determine what space needs to be
-  // allocated.
-  if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
-    LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize
-                      << "\n");
-    SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
-    SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
-    // Find callee save instructions in frame.
-    // Note: With FPAfterSVECalleeSaves the callee saves have already been
-    // allocated.
-    if (!FPAfterSVECalleeSaves) {
-      MachineBasicBlock::iterator CalleeSavesBegin = MBBI;
-      assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
-      while (IsSVECalleeSave(MBBI) && MBBI != MBB.getFirstTerminator())
-        ++MBBI;
-      CalleeSavesEnd = MBBI;
-
-      StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
-      // Allocate space for the callee saves (if any).
-      allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false,
-                         nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
-                         MFI.hasVarSizedObjects() || LocalsSize);
-    }
-  }
-  CFAOffset += SVECalleeSavesSize;
-
-  if (EmitAsyncCFI)
-    emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);
-
-  // Allocate space for the rest of the frame including SVE locals. Align the
-  // stack as necessary.
-  assert(!(canUseRedZone(MF) && NeedsRealignment) &&
-         "Cannot use redzone with stack realignment");
-  if (!canUseRedZone(MF)) {
-    // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
-    // the correct value here, as NumBytes also includes padding bytes,
-    // which shouldn't be counted here.
-    allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding,
-                       SVELocalsSize + StackOffset::getFixed(NumBytes),
-                       NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
-                       CFAOffset, MFI.hasVarSizedObjects());
-  }
-
-  // If we need a base pointer, set it up here. It's whatever the value of the
-  // stack pointer is at this point. Any variable size objects will be allocated
-  // after this, so we can still use the base pointer to reference locals.
-  //
-  // FIXME: Clarify FrameSetup flags here.
-  // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
-  // needed.
-  // For funclets the BP belongs to the containing function.
-  if (!IsFunclet && RegInfo->hasBasePointer(MF)) {
-    TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
-                     false);
-    if (NeedsWinCFI) {
-      HasWinCFI = true;
-      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
-          .setMIFlag(MachineInstr::FrameSetup);
-    }
-  }
-
-  // The very last FrameSetup instruction indicates the end of prologue. Emit a
-  // SEH opcode indicating the prologue end.
-  if (NeedsWinCFI && HasWinCFI) {
-    BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
-        .setMIFlag(MachineInstr::FrameSetup);
-  }
-
-  // SEH funclets are passed the frame pointer in X1.  If the parent
-  // function uses the base register, then the base register is used
-  // directly, and is not retrieved from X1.
-  if (IsFunclet && F.hasPersonalityFn()) {
-    EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
-    if (isAsynchronousEHPersonality(Per)) {
-      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP)
-          .addReg(AArch64::X1)
-          .setMIFlag(MachineInstr::FrameSetup);
-      MBB.addLiveIn(AArch64::X1);
-    }
-  }
-
-  if (EmitCFI && !EmitAsyncCFI) {
-    if (HasFP) {
-      emitDefineCFAWithFP(MF, MBB, MBBI, FixedObject);
-    } else {
-      StackOffset TotalSize =
-          SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
-      CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
-      CFIBuilder.insertCFIInst(
-          createDefCFA(*RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
-                       TotalSize, /*LastAdjustmentWasScalable=*/false));
-    }
-    emitCalleeSavedGPRLocations(MBB, MBBI);
-    emitCalleeSavedSVELocations(MBB, MBBI);
-  }
+  AArch64PrologueEmitter PrologueEmitter(MF, MBB, *this);
+  PrologueEmitter.emitPrologue();
 }
 
 static bool isFuncletReturnInstr(const MachineInstr &MI) {
@@ -2607,7 +1850,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   while (FirstGPRRestoreI != Begin) {
     --FirstGPRRestoreI;
     if (!FirstGPRRestoreI->getFlag(MachineInstr::FrameDestroy) ||
-        (!FPAfterSVECalleeSaves && IsSVECalleeSave(FirstGPRRestoreI))) {
+        (!FPAfterSVECalleeSaves && isSVECalleeSave(FirstGPRRestoreI))) {
       ++FirstGPRRestoreI;
       break;
     } else if (CombineSPBump)
@@ -2689,11 +1932,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
 
     RestoreBegin = std::prev(RestoreEnd);
     while (RestoreBegin != MBB.begin() &&
-           IsSVECalleeSave(std::prev(RestoreBegin)))
+           isSVECalleeSave(std::prev(RestoreBegin)))
       --RestoreBegin;
 
-    assert(IsSVECalleeSave(RestoreBegin) &&
-           IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
+    assert(isSVECalleeSave(RestoreBegin) &&
+           isSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
 
     StackOffset CalleeSavedSizeAsOffset =
         StackOffset::getScalable(CalleeSavedSize);
@@ -2927,8 +2170,8 @@ AArch64FrameLowering::getNonLocalFrameIndexReference(const MachineFunction &MF,
   return StackOffset::getFixed(getSEHFrameIndexOffset(MF, FI));
 }
 
-static StackOffset getFPOffset(const MachineFunction &MF,
-                               int64_t ObjectOffset) {
+StackOffset AArch64FrameLowering::getFPOffset(const MachineFunction &MF,
+                                              int64_t ObjectOffset) const {
   const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
   const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
   const Function &F = MF.getFunction();
@@ -2941,8 +2184,8 @@ static StackOffset getFPOffset(const MachineFunction &MF,
   return StackOffset::getFixed(ObjectOffset + FixedObject + FPAdjust);
 }
 
-static StackOffset getStackOffset(const MachineFunction &MF,
-                                  int64_t ObjectOffset) {
+StackOffset AArch64FrameLowering::getStackOffset(const MachineFunction &MF,
+                                                 int64_t ObjectOffset) const {
   const auto &MFI = MF.getFrameInfo();
   return StackOffset::getFixed(ObjectOffset + (int64_t)MFI.getStackSize());
 }
@@ -3140,7 +2383,8 @@ static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
   return getKillRegState(!IsLiveIn);
 }
 
-static bool produceCompactUnwindFrame(MachineFunction &MF) {
+static bool produceCompactUnwindFrame(const AArch64FrameLowering &AFL,
+                                      MachineFunction &MF) {
   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
   AttributeList Attrs = MF.getFunction().getAttributes();
   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
@@ -3148,7 +2392,7 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) {
          !(Subtarget.getTargetLowering()->supportSwiftError() &&
            Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&
          MF.getFunction().getCallingConv() != CallingConv::SwiftTail &&
-         !requiresSaveVG(MF) && !AFI->isSVECC();
+         !AFL.requiresSaveVG(MF) && !AFI->isSVECC();
 }
 
 static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
@@ -3245,16 +2489,18 @@ bool enableMultiVectorSpillFill(const AArch64Subtarget &Subtarget,
           (!IsLocallyStreaming && Subtarget.isStreaming()));
 }
 
-static void computeCalleeSaveRegisterPairs(
-    MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI,
-    const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
-    bool NeedsFrameRecord) {
+void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
+                                    MachineFunction &MF,
+                                    ArrayRef<CalleeSavedInfo> CSI,
+                                    const TargetRegisterInfo *TRI,
+                                    SmallVectorImpl<RegPairInfo> &RegPairs,
+                                    bool NeedsFrameRecord) {
 
   if (CSI.empty())
     return;
 
   bool IsWindows = isTargetWindows(MF);
-  bool NeedsWinCFI = needsWinCFI(MF);
+  bool NeedsWinCFI = AFL.needsWinCFI(MF);
   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   unsigned StackHazardSize = getStackHazardSize(MF);
   MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -3263,9 +2509,10 @@ static void computeCalleeSaveRegisterPairs(
   (void)CC;
   // MachO's compact unwind format relies on all registers being stored in
   // pairs.
-  assert((!produceCompactUnwindFrame(MF) || CC == CallingConv::PreserveMost ||
-          CC == CallingConv::PreserveAll || CC == CallingConv::CXX_FAST_TLS ||
-          CC == CallingConv::Win64 || (Count & 1) == 0) &&
+  assert((!produceCompactUnwindFrame(AFL, MF) ||
+          CC == CallingConv::PreserveMost || CC == CallingConv::PreserveAll ||
+          CC == CallingConv::CXX_FAST_TLS || CC == CallingConv::Win64 ||
+          (Count & 1) == 0) &&
          "Odd number of callee-saved regs to spill!");
   int ByteOffset = AFI->getCalleeSavedStackSize();
   int StackFillDir = -1;
@@ -3381,9 +2628,9 @@ static void computeCalleeSaveRegisterPairs(
 
     // MachO's compact unwind format relies on all registers being stored in
     // adjacent register pairs.
-    assert((!produceCompactUnwindFrame(MF) || CC == CallingConv::PreserveMost ||
-            CC == CallingConv::PreserveAll || CC == CallingConv::CXX_FAST_TLS ||
-            CC == CallingConv::Win64 ||
+    assert((!produceCompactUnwindFrame(AFL, MF) ||
+            CC == CallingConv::PreserveMost || CC == CallingConv::PreserveAll ||
+            CC == CallingConv::CXX_FAST_TLS || CC == CallingConv::Win64 ||
             (RPI.isPaired() &&
              ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
               RPI.Reg1 + 1 == RPI.Reg2))) &&
@@ -3496,7 +2743,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
   DebugLoc DL;
   SmallVector<RegPairInfo, 8> RegPairs;
 
-  computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF));
+  computeCalleeSaveRegisterPairs(*this, MF, CSI, TRI, RegPairs, hasFP(MF));
 
   MachineRegisterInfo &MRI = MF.getRegInfo();
   // Refresh the reserved regs in case there are any potential changes since the
@@ -3708,7 +2955,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
   if (MBBI != MBB.end())
     DL = MBBI->getDebugLoc();
 
-  computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF));
+  computeCalleeSaveRegisterPairs(*this, MF, CSI, TRI, RegPairs, hasFP(MF));
   if (homogeneousPrologEpilog(MF, &MBB)) {
     auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog))
                    .setMIFlag(MachineInstr::FrameDestroy);
@@ -4142,7 +3389,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
       if (producePairRegisters(MF)) {
         if (UnspilledCSGPRPaired == AArch64::NoRegister) {
           // Failed to make a pair for compact unwind format, revert spilling.
-          if (produceCompactUnwindFrame(MF)) {
+          if (produceCompactUnwindFrame(*this, MF)) {
             SavedRegs.reset(UnspilledCSGPR);
             ExtraCSSpill = AArch64::NoRegister;
           }
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 555a93359c274..a9d65441a4e30 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -19,6 +19,10 @@
 
 namespace llvm {
 
+class TargetLowering;
+class AArch64FunctionInfo;
+class AArch64PrologueEmitter;
+
 class AArch64FrameLowering : public TargetFrameLowering {
 public:
   explicit AArch64FrameLowering()
@@ -130,12 +134,19 @@ class AArch64FrameLowering : public TargetFrameLowering {
     return StackId != TargetStackID::ScalableVector;
   }
 
+  friend class AArch64PrologueEmitter;
   void
   orderFrameObjects(const MachineFunction &MF,
                     SmallVectorImpl<int> &ObjectsToAllocate) const override;
 
   bool isFPReserved(const MachineFunction &MF) const;
 
+  bool needsWinCFI(const MachineFunction &MF) const;
+
+  bool requiresSaveVG(const MachineFunction &MF) const;
+
+  StackOffset getSVEStackSize(const MachineFunction &MF) const;
+
 protected:
   bool hasFPImpl(const MachineFunction &MF) const override;
 
@@ -159,10 +170,6 @@ class AArch64FrameLowering : public TargetFrameLowering {
                                       int &MaxCSFrameIndex) const;
   bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB,
                                                 uint64_t StackBumpBytes) const;
-  void emitCalleeSavedGPRLocations(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI) const;
-  void emitCalleeSavedSVELocations(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI) const;
   void emitCalleeSavedGPRRestores(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator MBBI) const;
   void emitCalleeSavedSVERestores(MachineBasicBlock &MBB,
@@ -196,6 +203,61 @@ class AArch64FrameLowering : public TargetFrameLowering {
 
   void emitRemarks(const MachineFunction &MF,
                    MachineOptimizationRemarkEmitter *ORE) const override;
+
+  bool windowsRequiresStackProbe(const MachineFunction &MF,
+                                 uint64_t StackSizeInBytes) const;
+
+  bool shouldSignReturnAddressEverywhere(const MachineFunction &MF) const;
+
+  StackOffset getFPOffset(const MachineFunction &MF,
+                          int64_t ObjectOffset) const;
+
+  StackOffset getStackOffset(const MachineFunction &MF,
+                             int64_t ObjectOffset) const;
+
+  // Find a scratch register that we can use at the start of the prologue to
+  // re-align the stack pointer.  We avoid using callee-save registers since
+  // they may appear to be free when this is called from canUseAsPrologue
+  // (during shrink wrapping), but then no longer be free when this is called
+  // from emitPrologue.
+  //
+  // FIXME: This is a bit conservative, since in the above case we could use one
+  // of the callee-save registers as a scratch temp to re-align the stack
+  // pointer, but we would then have to make sure that we were in fact saving at
+  // least one callee-save register in the prologue, which is additional
+  // complexity that doesn't seem worth the benefit.
+  Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
+                                            bool HasCall = false) const;
+
+  // Convert callee-save register save/restore instruction to do stack pointer
+  // decrement/increment to allocate/deallocate the callee-save stack area by
+  // converting store/load to use pre/post increment version.
+  MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+      const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
+      bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
+      MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup,
+      int CFAOffset = 0) const;
+
+  // Fixup callee-save register save/restore instructions to take into account
+  // combined SP bump by adding the local stack size to the stack offsets.
+  void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
+                                         uint64_t LocalStackSize,
+                                         bool NeedsWinCFI,
+                                         bool *HasWinCFI) const;
+
+  bool isSVECalleeSave(MachineBasicBlock::iterator I) const;
+
+  /// Returns the size of the fixed object area (allocated next to sp on entry)
+  /// On Win64 this may include a var args area and an UnwindHelp object for EH.
+  unsigned getFixedObjectSize(const MachineFunction &MF,
+                              const AArch64FunctionInfo *AFI, bool IsWin64,
+                              bool IsFunclet) const;
+
+  bool isVGInstruction(MachineBasicBlock::iterator MBBI,
+                       const TargetLowering &TLI) const;
+
+  bool requiresGetVGCall(const MachineFunction &MF) const;
 };
 
 } // End llvm namespace
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
new file mode 100644
index 0000000000000..af424987b8ddb
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
@@ -0,0 +1,794 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64PrologueEpilogue.h"
+#include "AArch64FrameLowering.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/CFIInstBuilder.h"
+#include "llvm/MC/MCContext.h"
+
+#define DEBUG_TYPE "frame-info"
+
+STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
+
+namespace llvm {
+
+AArch64PrologueEmitter::AArch64PrologueEmitter(MachineFunction &MF,
+                                               MachineBasicBlock &MBB,
+                                               const AArch64FrameLowering &AFL)
+    : MF(MF), MBB(MBB), F(MF.getFunction()), MFI(MF.getFrameInfo()),
+      Subtarget(MF.getSubtarget<AArch64Subtarget>()), AFL(AFL),
+      RegInfo(*Subtarget.getRegisterInfo()) {
+  TII = Subtarget.getInstrInfo();
+  AFI = MF.getInfo<AArch64FunctionInfo>();
+
+  EmitCFI = AFI->needsDwarfUnwindInfo(MF);
+  EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
+  HasFP = AFL.hasFP(MF);
+  NeedsWinCFI = AFL.needsWinCFI(MF);
+  IsFunclet = MBB.isEHFuncletEntry();
+  HomPrologEpilog = AFL.homogeneousPrologEpilog(MF);
+
+#ifndef NDEBUG
+  collectBlockLiveins();
+#endif
+}
+
+#ifndef NDEBUG
+/// Collect live registers from the end of \p MI's parent up to (including) \p
+/// MI in \p LiveRegs.
+static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI,
+                                LivePhysRegs &LiveRegs) {
+
+  MachineBasicBlock &MBB = *MI.getParent();
+  LiveRegs.addLiveOuts(MBB);
+  for (const MachineInstr &MI :
+       reverse(make_range(MI.getIterator(), MBB.instr_end())))
+    LiveRegs.stepBackward(MI);
+}
+
+void AArch64PrologueEmitter::collectBlockLiveins() {
+  // Collect live register from the end of MBB up to the start of the existing
+  // frame setup instructions.
+  PrologueEndI = MBB.begin();
+  while (PrologueEndI != MBB.end() &&
+         PrologueEndI->getFlag(MachineInstr::FrameSetup))
+    ++PrologueEndI;
+
+  if (PrologueEndI != MBB.end()) {
+    getLivePhysRegsUpTo(*PrologueEndI, RegInfo, LiveRegs);
+    // Ignore registers used for stack management for now.
+    LiveRegs.removeReg(AArch64::SP);
+    LiveRegs.removeReg(AArch64::X19);
+    LiveRegs.removeReg(AArch64::FP);
+    LiveRegs.removeReg(AArch64::LR);
+
+    // X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
+    // This is necessary to spill VG if required where SVE is unavailable, but
+    // X0 is preserved around this call.
+    if (AFL.requiresGetVGCall(MF))
+      LiveRegs.removeReg(AArch64::X0);
+  }
+}
+
+void AArch64PrologueEmitter::verifyPrologueClobbers() const {
+  if (PrologueEndI == MBB.end())
+    return;
+  // Check if any of the newly instructions clobber any of the live registers.
+  for (MachineInstr &MI :
+       make_range(MBB.instr_begin(), PrologueEndI->getIterator())) {
+    for (auto &Op : MI.operands())
+      if (Op.isReg() && Op.isDef())
+        assert(!LiveRegs.contains(Op.getReg()) &&
+               "live register clobbered by inserted prologue instructions");
+  }
+}
+#endif
+
+void AArch64PrologueEmitter::determineLocalsStackSize(
+    uint64_t StackSize, uint64_t PrologueSaveSize) {
+  AFI->setLocalStackSize(StackSize - PrologueSaveSize);
+  CombineSPBump = AFL.shouldCombineCSRLocalStackBump(MF, StackSize);
+}
+
+void AArch64PrologueEmitter::emitPrologue() {
+  const MachineBasicBlock::iterator PrologueBeginI = MBB.begin();
+  const MachineBasicBlock::iterator EndI = MBB.end();
+
+  // At this point, we're going to decide whether or not the function uses a
+  // redzone. In most cases, the function doesn't have a redzone so let's
+  // assume that's false and set it to true in the case that there's a redzone.
+  AFI->setHasRedZone(false);
+
+  // Debug location must be unknown since the first debug location is used
+  // to determine the end of the prologue.
+  DebugLoc DL;
+
+  if (AFI->shouldSignReturnAddress(MF)) {
+    // If pac-ret+leaf is in effect, PAUTH_PROLOGUE pseudo instructions
+    // are inserted by emitPacRetPlusLeafHardening().
+    if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
+      BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::PAUTH_PROLOGUE))
+          .setMIFlag(MachineInstr::FrameSetup);
+    }
+    // AArch64PointerAuth pass will insert SEH_PACSignLR
+    HasWinCFI |= NeedsWinCFI;
+  }
+
+  if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
+    emitShadowCallStackPrologue(PrologueBeginI, DL);
+    HasWinCFI |= NeedsWinCFI;
+  }
+
+  if (EmitCFI && AFI->isMTETagged())
+    BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::EMITMTETAGGED))
+        .setMIFlag(MachineInstr::FrameSetup);
+
+  // We signal the presence of a Swift extended frame to external tools by
+  // storing FP with 0b0001 in bits 63:60. In normal userland operation a simple
+  // ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
+  // bits so that is still true.
+  if (HasFP && AFI->hasSwiftAsyncContext())
+    emitSwiftAsyncContextFramePointer(PrologueBeginI, DL);
+
+  // All calls are tail calls in GHC calling conv, and functions have no
+  // prologue/epilogue.
+  if (MF.getFunction().getCallingConv() == CallingConv::GHC)
+    return;
+
+  // Set tagged base pointer to the requested stack slot. Ideally it should
+  // match SP value after prologue.
+  if (std::optional<int> TBPI = AFI->getTaggedBasePointerIndex())
+    AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
+  else
+    AFI->setTaggedBasePointerOffset(MFI.getStackSize());
+
+  // getStackSize() includes all the locals in its size calculation. We don't
+  // include these locals when computing the stack size of a funclet, as they
+  // are allocated in the parent's stack frame and accessed via the frame
+  // pointer from the funclet.  We only save the callee saved registers in the
+  // funclet, which are really the callee saved registers of the parent
+  // function, including the funclet.
+  int64_t NumBytes =
+      IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
+  if (!AFI->hasStackFrame() && !AFL.windowsRequiresStackProbe(MF, NumBytes))
+    return emitEmptyStackFramePrologue(NumBytes, PrologueBeginI, DL);
+
+  bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
+  unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
+
+  // Windows unwind can't represent the required stack adjustments if we have
+  // both SVE callee-saves and dynamic stack allocations, and the frame
+  // pointer is before the SVE spills.  The allocation of the frame pointer
+  // must be the last instruction in the prologue so the unwinder can restore
+  // the stack pointer correctly. (And there isn't any unwind opcode for
+  // `addvl sp, x29, -17`.)
+  //
+  // Because of this, we do spills in the opposite order on Windows: first SVE,
+  // then GPRs. The main side-effect of this is that it makes accessing
+  // parameters passed on the stack more expensive.
+  //
+  // We could consider rearranging the spills for simpler cases.
+  bool FPAfterSVECalleeSaves =
+      Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();
+
+  if (FPAfterSVECalleeSaves && AFI->hasStackHazardSlotIndex())
+    reportFatalUsageError("SME hazard padding is not supported on Windows");
+
+  auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
+  // All of the remaining stack allocations are for locals.
+  determineLocalsStackSize(NumBytes, PrologueSaveSize);
+
+  MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI;
+  if (FPAfterSVECalleeSaves) {
+    // If we're doing SVE saves first, we need to immediately allocate space
+    // for fixed objects, then space for the SVE callee saves.
+    //
+    // Windows unwind requires that the scalable size is a multiple of 16;
+    // that's handled when the callee-saved size is computed.
+    auto SaveSize =
+        StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()) +
+        StackOffset::getFixed(FixedObject);
+    AFL.allocateStackSpace(MBB, PrologueBeginI, 0, SaveSize, NeedsWinCFI,
+                           &HasWinCFI,
+                           /*EmitCFI=*/false, StackOffset{},
+                           /*FollowupAllocs=*/true);
+    NumBytes -= FixedObject;
+
+    // Now allocate space for the GPR callee saves.
+    MachineBasicBlock::iterator MBBI = PrologueBeginI;
+    while (MBBI != EndI && AFL.isSVECalleeSave(MBBI))
+      ++MBBI;
+    FirstGPRSaveI = AFL.convertCalleeSaveRestoreToSPPrePostIncDec(
+        MBB, MBBI, DL, TII, -AFI->getCalleeSavedStackSize(), NeedsWinCFI,
+        &HasWinCFI, EmitAsyncCFI);
+    NumBytes -= AFI->getCalleeSavedStackSize();
+  } else if (CombineSPBump) {
+    assert(!AFL.getSVEStackSize(MF) && "Cannot combine SP bump with SVE");
+    emitFrameOffset(MBB, PrologueBeginI, DL, AArch64::SP, AArch64::SP,
+                    StackOffset::getFixed(-NumBytes), TII,
+                    MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
+                    EmitAsyncCFI);
+    NumBytes = 0;
+  } else if (HomPrologEpilog) {
+    // Stack has been already adjusted.
+    NumBytes -= PrologueSaveSize;
+  } else if (PrologueSaveSize != 0) {
+    FirstGPRSaveI = AFL.convertCalleeSaveRestoreToSPPrePostIncDec(
+        MBB, PrologueBeginI, DL, TII, -PrologueSaveSize, NeedsWinCFI,
+        &HasWinCFI, EmitAsyncCFI);
+    NumBytes -= PrologueSaveSize;
+  }
+  assert(NumBytes >= 0 && "Negative stack allocation size!?");
+
+  // Move past the saves of the callee-saved registers, fixing up the offsets
+  // and pre-inc if we decided to combine the callee-save and local stack
+  // pointer bump above.
+  auto &TLI = *MF.getSubtarget().getTargetLowering();
+
+  MachineBasicBlock::iterator AfterGPRSavesI = FirstGPRSaveI;
+  while (AfterGPRSavesI != EndI &&
+         AfterGPRSavesI->getFlag(MachineInstr::FrameSetup) &&
+         !AFL.isSVECalleeSave(AfterGPRSavesI)) {
+    if (CombineSPBump &&
+        // Only fix-up frame-setup load/store instructions.
+        (!AFL.requiresSaveVG(MF) || !AFL.isVGInstruction(AfterGPRSavesI, TLI)))
+      AFL.fixupCalleeSaveRestoreStackOffset(
+          *AfterGPRSavesI, AFI->getLocalStackSize(), NeedsWinCFI, &HasWinCFI);
+    ++AfterGPRSavesI;
+  }
+
+  // For funclets the FP belongs to the containing function. Only set up FP if
+  // we actually need to.
+  if (!IsFunclet && HasFP)
+    emitFramePointerSetup(AfterGPRSavesI, DL, FixedObject);
+
+  // Now emit the moves for whatever callee saved regs we have (including FP,
+  // LR if those are saved). Frame instructions for SVE register are emitted
+  // later, after the instruction which actually save SVE regs.
+  if (EmitAsyncCFI)
+    emitCalleeSavedGPRLocations(AfterGPRSavesI);
+
+  // Alignment is required for the parent frame, not the funclet
+  const bool NeedsRealignment =
+      NumBytes && !IsFunclet && RegInfo.hasStackRealignment(MF);
+  const int64_t RealignmentPadding =
+      (NeedsRealignment && MFI.getMaxAlign() > Align(16))
+          ? MFI.getMaxAlign().value() - 16
+          : 0;
+
+  if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
+    emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
+
+  StackOffset SVEStackSize = AFL.getSVEStackSize(MF);
+  StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
+  MachineBasicBlock::iterator CalleeSavesEnd = AfterGPRSavesI;
+
+  StackOffset CFAOffset =
+      StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
+
+  // Process the SVE callee-saves to determine what space needs to be
+  // allocated.
+  MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
+  if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
+    LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize
+                      << "\n");
+    SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
+    SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
+    // Find callee save instructions in frame.
+    // Note: With FPAfterSVECalleeSaves the callee saves have already been
+    // allocated.
+    if (!FPAfterSVECalleeSaves) {
+      MachineBasicBlock::iterator CalleeSavesBegin = AfterGPRSavesI;
+      assert(AFL.isSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
+      while (AFL.isSVECalleeSave(AfterSVESavesI) &&
+             AfterSVESavesI != MBB.getFirstTerminator())
+        ++AfterSVESavesI;
+      CalleeSavesEnd = AfterSVESavesI;
+
+      StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
+      // Allocate space for the callee saves (if any).
+      AFL.allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize,
+                             false, nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
+                             MFI.hasVarSizedObjects() || LocalsSize);
+    }
+  }
+  CFAOffset += SVECalleeSavesSize;
+
+  if (EmitAsyncCFI)
+    emitCalleeSavedSVELocations(CalleeSavesEnd);
+
+  // Allocate space for the rest of the frame including SVE locals. Align the
+  // stack as necessary.
+  assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
+         "Cannot use redzone with stack realignment");
+  if (!AFL.canUseRedZone(MF)) {
+    // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
+    // the correct value here, as NumBytes also includes padding bytes,
+    // which shouldn't be counted here.
+    AFL.allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding,
+                           SVELocalsSize + StackOffset::getFixed(NumBytes),
+                           NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
+                           CFAOffset, MFI.hasVarSizedObjects());
+  }
+
+  // If we need a base pointer, set it up here. It's whatever the value of the
+  // stack pointer is at this point. Any variable size objects will be allocated
+  // after this, so we can still use the base pointer to reference locals.
+  //
+  // FIXME: Clarify FrameSetup flags here.
+  // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
+  // needed.
+  // For funclets the BP belongs to the containing function.
+  if (!IsFunclet && RegInfo.hasBasePointer(MF)) {
+    TII->copyPhysReg(MBB, AfterSVESavesI, DL, RegInfo.getBaseRegister(),
+                     AArch64::SP, false);
+    if (NeedsWinCFI) {
+      HasWinCFI = true;
+      BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_Nop))
+          .setMIFlag(MachineInstr::FrameSetup);
+    }
+  }
+
+  // The very last FrameSetup instruction indicates the end of prologue. Emit a
+  // SEH opcode indicating the prologue end.
+  if (NeedsWinCFI && HasWinCFI) {
+    BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_PrologEnd))
+        .setMIFlag(MachineInstr::FrameSetup);
+  }
+
+  // SEH funclets are passed the frame pointer in X1.  If the parent
+  // function uses the base register, then the base register is used
+  // directly, and is not retrieved from X1.
+  if (IsFunclet && F.hasPersonalityFn()) {
+    EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
+    if (isAsynchronousEHPersonality(Per)) {
+      BuildMI(MBB, AfterSVESavesI, DL, TII->get(TargetOpcode::COPY),
+              AArch64::FP)
+          .addReg(AArch64::X1)
+          .setMIFlag(MachineInstr::FrameSetup);
+      MBB.addLiveIn(AArch64::X1);
+    }
+  }
+
+  if (EmitCFI && !EmitAsyncCFI) {
+    if (HasFP) {
+      emitDefineCFAWithFP(AfterSVESavesI, FixedObject);
+    } else {
+      StackOffset TotalSize =
+          SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
+      CFIInstBuilder CFIBuilder(MBB, AfterSVESavesI, MachineInstr::FrameSetup);
+      CFIBuilder.insertCFIInst(
+          createDefCFA(RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
+                       TotalSize, /*LastAdjustmentWasScalable=*/false));
+    }
+    emitCalleeSavedGPRLocations(AfterSVESavesI);
+    emitCalleeSavedSVELocations(AfterSVESavesI);
+  }
+}
+
+void AArch64PrologueEmitter::emitShadowCallStackPrologue(
+    MachineBasicBlock::iterator MBBI, const DebugLoc &DL) const {
+  // Shadow call stack prolog: str x30, [x18], #8
+  BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXpost))
+      .addReg(AArch64::X18, RegState::Define)
+      .addReg(AArch64::LR)
+      .addReg(AArch64::X18)
+      .addImm(8)
+      .setMIFlag(MachineInstr::FrameSetup);
+
+  // This instruction also makes x18 live-in to the entry block.
+  MBB.addLiveIn(AArch64::X18);
+
+  if (NeedsWinCFI)
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+        .setMIFlag(MachineInstr::FrameSetup);
+
+  if (EmitCFI) {
+    // Emit a CFI instruction that causes 8 to be subtracted from the value of
+    // x18 when unwinding past this frame.
+    static const char CFIInst[] = {
+        dwarf::DW_CFA_val_expression,
+        18, // register
+        2,  // length
+        static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
+        static_cast<char>(-8) & 0x7f, // addend (sleb128)
+    };
+    CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
+        .buildEscape(StringRef(CFIInst, sizeof(CFIInst)));
+  }
+}
+
+void AArch64PrologueEmitter::emitSwiftAsyncContextFramePointer(
+    MachineBasicBlock::iterator MBBI, const DebugLoc &DL) const {
+  switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
+  case SwiftAsyncFramePointerMode::DeploymentBased:
+    if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
+      // The special symbol below is absolute and has a *value* that can be
+      // combined with the frame pointer to signal an extended frame.
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
+          .addExternalSymbol("swift_async_extendedFramePointerFlags",
+                             AArch64II::MO_GOT);
+      if (NeedsWinCFI) {
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+            .setMIFlags(MachineInstr::FrameSetup);
+        HasWinCFI = true;
+      }
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
+          .addUse(AArch64::FP)
+          .addUse(AArch64::X16)
+          .addImm(Subtarget.isTargetILP32() ? 32 : 0);
+      if (NeedsWinCFI) {
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+            .setMIFlags(MachineInstr::FrameSetup);
+        HasWinCFI = true;
+      }
+      break;
+    }
+    [[fallthrough]];
+
+  case SwiftAsyncFramePointerMode::Always:
+    // ORR x29, x29, #0x1000_0000_0000_0000
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
+        .addUse(AArch64::FP)
+        .addImm(0x1100)
+        .setMIFlag(MachineInstr::FrameSetup);
+    if (NeedsWinCFI) {
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+          .setMIFlags(MachineInstr::FrameSetup);
+      HasWinCFI = true;
+    }
+    break;
+
+  case SwiftAsyncFramePointerMode::Never:
+    break;
+  }
+}
+
+void AArch64PrologueEmitter::emitEmptyStackFramePrologue(
+    int64_t NumBytes, MachineBasicBlock::iterator MBBI,
+    const DebugLoc &DL) const {
+  assert(!HasFP && "unexpected function without stack frame but with FP");
+  assert(!AFL.getSVEStackSize(MF) &&
+         "unexpected function without stack frame but with SVE objects");
+  // All of the stack allocation is for locals.
+  AFI->setLocalStackSize(NumBytes);
+  if (!NumBytes) {
+    if (NeedsWinCFI && HasWinCFI) {
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+          .setMIFlag(MachineInstr::FrameSetup);
+    }
+    return;
+  }
+  // REDZONE: If the stack size is less than 128 bytes, we don't need
+  // to actually allocate.
+  if (AFL.canUseRedZone(MF)) {
+    AFI->setHasRedZone(true);
+    ++NumRedZoneFunctions;
+  } else {
+    emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
+                    StackOffset::getFixed(-NumBytes), TII,
+                    MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+    if (EmitCFI) {
+      // Label used to tie together the PROLOG_LABEL and the MachineMoves.
+      MCSymbol *FrameLabel = MF.getContext().createTempSymbol();
+      // Encode the stack size of the leaf function.
+      CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
+          .buildDefCFAOffset(NumBytes, FrameLabel);
+    }
+  }
+
+  if (NeedsWinCFI) {
+    HasWinCFI = true;
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+        .setMIFlag(MachineInstr::FrameSetup);
+  }
+}
+
+void AArch64PrologueEmitter::emitFramePointerSetup(
+    MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
+    unsigned FixedObject) {
+  int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
+  if (CombineSPBump)
+    FPOffset += AFI->getLocalStackSize();
+
+  if (AFI->hasSwiftAsyncContext()) {
+    // Before we update the live FP we have to ensure there's a valid (or
+    // null) asynchronous context in its slot just before FP in the frame
+    // record, so store it now.
+    const auto &Attrs = MF.getFunction().getAttributes();
+    bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
+    if (HaveInitialContext)
+      MBB.addLiveIn(AArch64::X22);
+    Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
+        .addUse(Reg)
+        .addUse(AArch64::SP)
+        .addImm(FPOffset - 8)
+        .setMIFlags(MachineInstr::FrameSetup);
+    if (NeedsWinCFI) {
+      // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
+      // to multiple instructions, should be mutually-exclusive.
+      assert(Subtarget.getTargetTriple().getArchName() != "arm64e");
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+          .setMIFlags(MachineInstr::FrameSetup);
+      HasWinCFI = true;
+    }
+  }
+
+  if (HomPrologEpilog) {
+    auto Prolog = MBBI;
+    --Prolog;
+    assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
+    Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
+  } else {
+    // Issue    sub fp, sp, FPOffset or
+    //          mov fp,sp          when FPOffset is zero.
+    // Note: All stores of callee-saved registers are marked as "FrameSetup".
+    // This code marks the instruction(s) that set the FP also.
+    emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
+                    StackOffset::getFixed(FPOffset), TII,
+                    MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+    if (NeedsWinCFI && HasWinCFI) {
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+          .setMIFlag(MachineInstr::FrameSetup);
+      // After setting up the FP, the rest of the prolog doesn't need to be
+      // included in the SEH unwind info.
+      NeedsWinCFI = false;
+    }
+  }
+  if (EmitAsyncCFI)
+    emitDefineCFAWithFP(MBBI, FixedObject);
+}
+
+// Define the current CFA rule to use the provided FP.
+void AArch64PrologueEmitter::emitDefineCFAWithFP(
+    MachineBasicBlock::iterator MBBI, unsigned FixedObject) const {
+  const AArch64RegisterInfo *TRI = Subtarget.getRegisterInfo();
+  const int OffsetToFirstCalleeSaveFromFP =
+      AFI->getCalleeSaveBaseToFrameRecordOffset() -
+      AFI->getCalleeSavedStackSize();
+  Register FramePtr = TRI->getFrameRegister(MF);
+  CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
+      .buildDefCFA(FramePtr, FixedObject - OffsetToFirstCalleeSaveFromFP);
+}
+
+void AArch64PrologueEmitter::emitWindowsStackProbe(
+    MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t &NumBytes,
+    int64_t RealignmentPadding) const {
+  if (AFI->getSVECalleeSavedStackSize())
+    report_fatal_error("SVE callee saves not yet supported with stack probing");
+
+  // Find an available register to spill the value of X15 to, if X15 is being
+  // used already for nest.
+  unsigned X15Scratch = AArch64::NoRegister;
+  const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
+  if (llvm::any_of(MBB.liveins(),
+                   [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
+                     return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
+                         AArch64::X15, LiveIn.PhysReg);
+                   })) {
+    X15Scratch = AFL.findScratchNonCalleeSaveRegister(&MBB, /*HasCall=*/true);
+    assert(X15Scratch != AArch64::NoRegister &&
+           (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17));
+#ifndef NDEBUG
+    LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it
+#endif
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), X15Scratch)
+        .addReg(AArch64::XZR)
+        .addReg(AArch64::X15, RegState::Undef)
+        .addReg(AArch64::X15, RegState::Implicit)
+        .setMIFlag(MachineInstr::FrameSetup);
+  }
+
+  uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
+  if (NeedsWinCFI) {
+    HasWinCFI = true;
+    // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
+    // exceed this amount.  We need to move at most 2^24 - 1 into x15.
+    // This is at most two instructions, MOVZ followed by MOVK.
+    // TODO: Fix to use multiple stack alloc unwind codes for stacks
+    // exceeding 256MB in size.
+    if (NumBytes >= (1 << 28))
+      report_fatal_error("Stack size cannot exceed 256MB for stack "
+                         "unwinding purposes");
+
+    uint32_t LowNumWords = NumWords & 0xFFFF;
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
+        .addImm(LowNumWords)
+        .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
+        .setMIFlag(MachineInstr::FrameSetup);
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+        .setMIFlag(MachineInstr::FrameSetup);
+    if ((NumWords & 0xFFFF0000) != 0) {
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
+          .addReg(AArch64::X15)
+          .addImm((NumWords & 0xFFFF0000) >> 16) // High half
+          .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16))
+          .setMIFlag(MachineInstr::FrameSetup);
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+          .setMIFlag(MachineInstr::FrameSetup);
+    }
+  } else {
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
+        .addImm(NumWords)
+        .setMIFlags(MachineInstr::FrameSetup);
+  }
+
+  const char *ChkStk = Subtarget.getChkStkName();
+  switch (MF.getTarget().getCodeModel()) {
+  case CodeModel::Tiny:
+  case CodeModel::Small:
+  case CodeModel::Medium:
+  case CodeModel::Kernel:
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
+        .addExternalSymbol(ChkStk)
+        .addReg(AArch64::X15, RegState::Implicit)
+        .addReg(AArch64::X16,
+                RegState::Implicit | RegState::Define | RegState::Dead)
+        .addReg(AArch64::X17,
+                RegState::Implicit | RegState::Define | RegState::Dead)
+        .addReg(AArch64::NZCV,
+                RegState::Implicit | RegState::Define | RegState::Dead)
+        .setMIFlags(MachineInstr::FrameSetup);
+    if (NeedsWinCFI) {
+      HasWinCFI = true;
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+          .setMIFlag(MachineInstr::FrameSetup);
+    }
+    break;
+  case CodeModel::Large:
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
+        .addReg(AArch64::X16, RegState::Define)
+        .addExternalSymbol(ChkStk)
+        .addExternalSymbol(ChkStk)
+        .setMIFlags(MachineInstr::FrameSetup);
+    if (NeedsWinCFI) {
+      HasWinCFI = true;
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+          .setMIFlag(MachineInstr::FrameSetup);
+    }
+
+    BuildMI(MBB, MBBI, DL, TII->get(getBLRCallOpcode(MF)))
+        .addReg(AArch64::X16, RegState::Kill)
+        .addReg(AArch64::X15, RegState::Implicit | RegState::Define)
+        .addReg(AArch64::X16,
+                RegState::Implicit | RegState::Define | RegState::Dead)
+        .addReg(AArch64::X17,
+                RegState::Implicit | RegState::Define | RegState::Dead)
+        .addReg(AArch64::NZCV,
+                RegState::Implicit | RegState::Define | RegState::Dead)
+        .setMIFlags(MachineInstr::FrameSetup);
+    if (NeedsWinCFI) {
+      HasWinCFI = true;
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+          .setMIFlag(MachineInstr::FrameSetup);
+    }
+    break;
+  }
+
+  BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
+      .addReg(AArch64::SP, RegState::Kill)
+      .addReg(AArch64::X15, RegState::Kill)
+      .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
+      .setMIFlags(MachineInstr::FrameSetup);
+  if (NeedsWinCFI) {
+    HasWinCFI = true;
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
+        .addImm(NumBytes)
+        .setMIFlag(MachineInstr::FrameSetup);
+  }
+  NumBytes = 0;
+
+  if (RealignmentPadding > 0) {
+    if (RealignmentPadding >= 4096) {
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm))
+          .addReg(AArch64::X16, RegState::Define)
+          .addImm(RealignmentPadding)
+          .setMIFlags(MachineInstr::FrameSetup);
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15)
+          .addReg(AArch64::SP)
+          .addReg(AArch64::X16, RegState::Kill)
+          .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
+          .setMIFlag(MachineInstr::FrameSetup);
+    } else {
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
+          .addReg(AArch64::SP)
+          .addImm(RealignmentPadding)
+          .addImm(0)
+          .setMIFlag(MachineInstr::FrameSetup);
+    }
+
+    uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
+        .addReg(AArch64::X15, RegState::Kill)
+        .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64));
+    AFI->setStackRealigned(true);
+
+    // No need for SEH instructions here; if we're realigning the stack,
+    // we've set a frame pointer and already finished the SEH prologue.
+    assert(!NeedsWinCFI);
+  }
+  if (X15Scratch != AArch64::NoRegister) {
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), AArch64::X15)
+        .addReg(AArch64::XZR)
+        .addReg(X15Scratch, RegState::Undef)
+        .addReg(X15Scratch, RegState::Implicit)
+        .setMIFlag(MachineInstr::FrameSetup);
+  }
+}
+
+void AArch64PrologueEmitter::emitCalleeSavedGPRLocations(
+    MachineBasicBlock::iterator MBBI) const {
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+  if (CSI.empty())
+    return;
+
+  CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
+  for (const auto &Info : CSI) {
+    unsigned FrameIdx = Info.getFrameIdx();
+    if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector)
+      continue;
+
+    assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
+    int64_t Offset = MFI.getObjectOffset(FrameIdx) - AFL.getOffsetOfLocalArea();
+    CFIBuilder.buildOffset(Info.getReg(), Offset);
+  }
+}
+
+void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
+    MachineBasicBlock::iterator MBBI) const {
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+
+  // Add callee saved registers to move list.
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+  if (CSI.empty())
+    return;
+
+  const TargetSubtargetInfo &STI = MF.getSubtarget();
+  const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+  AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
+  CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
+
+  std::optional<int64_t> IncomingVGOffsetFromDefCFA;
+  if (AFL.requiresSaveVG(MF)) {
+    auto IncomingVG = *find_if(
+        reverse(CSI), [](auto &Info) { return Info.getReg() == AArch64::VG; });
+    IncomingVGOffsetFromDefCFA = MFI.getObjectOffset(IncomingVG.getFrameIdx()) -
+                                 AFL.getOffsetOfLocalArea();
+  }
+
+  for (const auto &Info : CSI) {
+    if (MFI.getStackID(Info.getFrameIdx()) != TargetStackID::ScalableVector)
+      continue;
+
+    // Not all unwinders may know about SVE registers, so assume the lowest
+    // common denominator.
+    assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
+    MCRegister Reg = Info.getReg();
+    if (!static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
+      continue;
+
+    StackOffset Offset =
+        StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
+        StackOffset::getFixed(AFI.getCalleeSavedStackSize(MFI));
+
+    CFIBuilder.insertCFIInst(
+        createCFAOffset(TRI, Reg, Offset, IncomingVGOffsetFromDefCFA));
+  }
+}
+
+} // namespace llvm
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
new file mode 100644
index 0000000000000..94029ede60c76
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
@@ -0,0 +1,111 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the declaration of the AArch64PrologueEmitter class,
+/// which is is used to emit the prologue on AArch64.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64PROLOGUEEPILOGUE_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64PROLOGUEEPILOGUE_H
+
+#include "AArch64RegisterInfo.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+class AArch64Subtarget;
+class AArch64FunctionInfo;
+class AArch64FrameLowering;
+
+/// A helper class for emitting the prologue. Substantial new functionality
+/// should be factored into a new method. Where possible "emit*" methods should
+/// be const, and any flags that change how the prologue is emitted should be
+/// set in the constructor.
+class AArch64PrologueEmitter {
+public:
+  AArch64PrologueEmitter(MachineFunction &MF, MachineBasicBlock &MBB,
+                         const AArch64FrameLowering &AFL);
+
+  /// Emit the prologue.
+  void emitPrologue();
+
+  ~AArch64PrologueEmitter() {
+    MF.setHasWinCFI(HasWinCFI);
+#ifndef NDEBUG
+    verifyPrologueClobbers();
+#endif
+  }
+
+private:
+  void emitShadowCallStackPrologue(MachineBasicBlock::iterator MBBI,
+                                   const DebugLoc &DL) const;
+
+  void emitSwiftAsyncContextFramePointer(MachineBasicBlock::iterator MBBI,
+                                         const DebugLoc &DL) const;
+
+  void emitEmptyStackFramePrologue(int64_t NumBytes,
+                                   MachineBasicBlock::iterator MBBI,
+                                   const DebugLoc &DL) const;
+
+  void emitFramePointerSetup(MachineBasicBlock::iterator MBBI,
+                             const DebugLoc &DL, unsigned FixedObject);
+
+  void emitDefineCFAWithFP(MachineBasicBlock::iterator MBBI,
+                           unsigned FixedObject) const;
+
+  void emitWindowsStackProbe(MachineBasicBlock::iterator MBBI,
+                             const DebugLoc &DL, int64_t &NumBytes,
+                             int64_t RealignmentPadding) const;
+
+  void emitCalleeSavedGPRLocations(MachineBasicBlock::iterator MBBI) const;
+  void emitCalleeSavedSVELocations(MachineBasicBlock::iterator MBBI) const;
+
+  void determineLocalsStackSize(uint64_t StackSize, uint64_t PrologueSaveSize);
+
+  MachineFunction &MF;
+  MachineBasicBlock &MBB;
+
+  const Function &F;
+  const MachineFrameInfo &MFI;
+  const AArch64Subtarget &Subtarget;
+  const AArch64FrameLowering &AFL;
+  const AArch64RegisterInfo &RegInfo;
+
+#ifndef NDEBUG
+  mutable LivePhysRegs LiveRegs{RegInfo};
+  MachineBasicBlock::iterator PrologueEndI;
+
+  void collectBlockLiveins();
+  void verifyPrologueClobbers() const;
+#endif
+
+  // Prologue flags. These generally should not change outside of the
+  // constructor. Two exceptions are "CombineSPBump" which is set in
+  // determineLocalsStackSize, and "NeedsWinCFI" which is set in
+  // emitFramePointerSetup.
+  bool EmitCFI = false;
+  bool EmitAsyncCFI = false;
+  bool HasFP = false;
+  bool IsFunclet = false;
+  bool CombineSPBump = false;
+  bool HomPrologEpilog = false;
+  bool NeedsWinCFI = false;
+
+  // Note: "HasWinCFI" is mutable as it can change in any "emit" function.
+  mutable bool HasWinCFI = false;
+
+  const TargetInstrInfo *TII = nullptr;
+  AArch64FunctionInfo *AFI = nullptr;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index 79b56ea9cf850..2bce75f75b964 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -92,6 +92,7 @@ add_llvm_target(AArch64CodeGen
   SVEIntrinsicOpts.cpp
   MachineSMEABIPass.cpp
   AArch64SIMDInstrOpt.cpp
+  AArch64PrologueEpilogue.cpp
 
   DEPENDS
   intrinsics_gen



More information about the llvm-commits mailing list