[llvm] [AArch64] Break up `AArch64FrameLowering::emitEpilogue` (NFCI) (PR #157889)

Benjamin Maxwell via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 10 09:08:14 PDT 2025


https://github.com/MacDue created https://github.com/llvm/llvm-project/pull/157889

This is much smaller than #157485 (as the epilogue code was already a ore reasonable size); however, this change will allow some further tidy up of methods shared between the prologue and epilogue code (in a follow-up patch).

>From e27cb4e91ed4b59714f45ea7ce816e1f588e6919 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 8 Sep 2025 16:15:40 +0000
Subject: [PATCH] [AArch64] Break up `AArch64FrameLowering::emitEpilogue`
 (NFCI)

This is much smaller than #157485 (as the epilogue code was already a
ore reasonable size); however, this change will allow some further tidy
up of methods shared between the prologue and epilogue code (in a
follow-up patch).
---
 .../Target/AArch64/AArch64FrameLowering.cpp   | 454 +-----------------
 .../lib/Target/AArch64/AArch64FrameLowering.h |  18 +-
 .../AArch64/AArch64PrologueEpilogue.cpp       | 432 +++++++++++++++++
 .../Target/AArch64/AArch64PrologueEpilogue.h  |  62 ++-
 4 files changed, 510 insertions(+), 456 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 175b5e04d82ff..fd53f04443766 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -293,14 +293,9 @@ static cl::opt<bool> DisableMultiVectorSpillFill(
     cl::desc("Disable use of LD/ST pairs for SME2 or SVE2p1"), cl::init(false),
     cl::Hidden);
 
-/// Returns how much of the incoming argument stack area (in bytes) we should
-/// clean up in an epilogue. For the C calling convention this will be 0, for
-/// guaranteed tail call conventions it can be positive (a normal return or a
-/// tail call to a function that uses less stack space for arguments) or
-/// negative (for a tail call to a function that needs more stack space than us
-/// for arguments).
-static int64_t getArgumentStackToRestore(MachineFunction &MF,
-                                         MachineBasicBlock &MBB) {
+int64_t
+AArch64FrameLowering::getArgumentStackToRestore(MachineFunction &MF,
+                                                MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   bool IsTailCallReturn = (MBB.end() != MBBI)
@@ -711,44 +706,6 @@ void AArch64FrameLowering::resetCFIToInitialState(
   }
 }
 
-static void emitCalleeSavedRestores(MachineBasicBlock &MBB,
-                                    MachineBasicBlock::iterator MBBI,
-                                    bool SVE) {
-  MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo &MFI = MF.getFrameInfo();
-
-  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
-  if (CSI.empty())
-    return;
-
-  const TargetSubtargetInfo &STI = MF.getSubtarget();
-  const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
-  CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameDestroy);
-
-  for (const auto &Info : CSI) {
-    if (SVE !=
-        (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector))
-      continue;
-
-    MCRegister Reg = Info.getReg();
-    if (SVE &&
-        !static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
-      continue;
-
-    CFIBuilder.buildRestore(Info.getReg());
-  }
-}
-
-void AArch64FrameLowering::emitCalleeSavedGPRRestores(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
-  emitCalleeSavedRestores(MBB, MBBI, false);
-}
-
-void AArch64FrameLowering::emitCalleeSavedSVERestores(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
-  emitCalleeSavedRestores(MBB, MBBI, true);
-}
-
 // Return the maximum possible number of bytes for `Size` due to the
 // architectural limit on the size of a SVE register.
 static int64_t upperBound(StackOffset Size) {
@@ -1642,28 +1599,6 @@ bool AArch64FrameLowering::isSVECalleeSave(
   }
 }
 
-static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII,
-                                        MachineFunction &MF,
-                                        MachineBasicBlock &MBB,
-                                        MachineBasicBlock::iterator MBBI,
-                                        const DebugLoc &DL, bool NeedsWinCFI) {
-  // Shadow call stack epilog: ldr x30, [x18, #-8]!
-  BuildMI(MBB, MBBI, DL, TII.get(AArch64::LDRXpre))
-      .addReg(AArch64::X18, RegState::Define)
-      .addReg(AArch64::LR, RegState::Define)
-      .addReg(AArch64::X18)
-      .addImm(-8)
-      .setMIFlag(MachineInstr::FrameDestroy);
-
-  if (NeedsWinCFI)
-    BuildMI(MBB, MBBI, DL, TII.get(AArch64::SEH_Nop))
-        .setMIFlag(MachineInstr::FrameDestroy);
-
-  if (MF.getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo(MF))
-    CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy)
-        .buildRestore(AArch64::X18);
-}
-
 void AArch64FrameLowering::emitPacRetPlusLeafHardening(
     MachineFunction &MF) const {
   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
@@ -1703,389 +1638,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   PrologueEmitter.emitPrologue();
 }
 
-static bool isFuncletReturnInstr(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    return false;
-  case AArch64::CATCHRET:
-  case AArch64::CLEANUPRET:
-    return true;
-  }
-}
-
 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
                                         MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  MachineFrameInfo &MFI = MF.getFrameInfo();
-  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
-  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
-  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
-  DebugLoc DL;
-  bool NeedsWinCFI = needsWinCFI(MF);
-  bool EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
-  bool HasWinCFI = false;
-  bool IsFunclet = false;
-
-  if (MBB.end() != MBBI) {
-    DL = MBBI->getDebugLoc();
-    IsFunclet = isFuncletReturnInstr(*MBBI);
-  }
-
-  MachineBasicBlock::iterator EpilogStartI = MBB.end();
-
-  auto FinishingTouches = make_scope_exit([&]() {
-    if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
-      emitShadowCallStackEpilogue(*TII, MF, MBB, MBB.getFirstTerminator(), DL,
-                                  NeedsWinCFI);
-      HasWinCFI |= NeedsWinCFI;
-    }
-    if (EmitCFI)
-      emitCalleeSavedGPRRestores(MBB, MBB.getFirstTerminator());
-    if (AFI->shouldSignReturnAddress(MF)) {
-      // If pac-ret+leaf is in effect, PAUTH_EPILOGUE pseudo instructions
-      // are inserted by emitPacRetPlusLeafHardening().
-      if (!shouldSignReturnAddressEverywhere(MF)) {
-        BuildMI(MBB, MBB.getFirstTerminator(), DL,
-                TII->get(AArch64::PAUTH_EPILOGUE))
-            .setMIFlag(MachineInstr::FrameDestroy);
-      }
-      // AArch64PointerAuth pass will insert SEH_PACSignLR
-      HasWinCFI |= NeedsWinCFI;
-    }
-    if (HasWinCFI) {
-      BuildMI(MBB, MBB.getFirstTerminator(), DL,
-              TII->get(AArch64::SEH_EpilogEnd))
-          .setMIFlag(MachineInstr::FrameDestroy);
-      if (!MF.hasWinCFI())
-        MF.setHasWinCFI(true);
-    }
-    if (NeedsWinCFI) {
-      assert(EpilogStartI != MBB.end());
-      if (!HasWinCFI)
-        MBB.erase(EpilogStartI);
-    }
-  });
-
-  int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF)
-                               : MFI.getStackSize();
-
-  // All calls are tail calls in GHC calling conv, and functions have no
-  // prologue/epilogue.
-  if (MF.getFunction().getCallingConv() == CallingConv::GHC)
-    return;
-
-  // How much of the stack used by incoming arguments this function is expected
-  // to restore in this particular epilogue.
-  int64_t ArgumentStackToRestore = getArgumentStackToRestore(MF, MBB);
-  bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(),
-                                              MF.getFunction().isVarArg());
-  unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
-
-  int64_t AfterCSRPopSize = ArgumentStackToRestore;
-  auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
-  // We cannot rely on the local stack size set in emitPrologue if the function
-  // has funclets, as funclets have different local stack size requirements, and
-  // the current value set in emitPrologue may be that of the containing
-  // function.
-  if (MF.hasEHFunclets())
-    AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
-  if (homogeneousPrologEpilog(MF, &MBB)) {
-    assert(!NeedsWinCFI);
-    auto FirstHomogenousEpilogI = MBB.getFirstTerminator();
-    if (FirstHomogenousEpilogI != MBB.begin()) {
-      auto HomogeneousEpilog = std::prev(FirstHomogenousEpilogI);
-      if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
-        FirstHomogenousEpilogI = HomogeneousEpilog;
-    }
-
-    // Adjust local stack
-    emitFrameOffset(MBB, FirstHomogenousEpilogI, DL, AArch64::SP, AArch64::SP,
-                    StackOffset::getFixed(AFI->getLocalStackSize()), TII,
-                    MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
-
-    // SP has been already adjusted while restoring callee save regs.
-    // We've bailed-out the case with adjusting SP for arguments.
-    assert(AfterCSRPopSize == 0);
-    return;
-  }
-
-  bool FPAfterSVECalleeSaves =
-      Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();
-
-  bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
-  // Assume we can't combine the last pop with the sp restore.
-  bool CombineAfterCSRBump = false;
-  if (FPAfterSVECalleeSaves) {
-    AfterCSRPopSize += FixedObject;
-  } else if (!CombineSPBump && PrologueSaveSize != 0) {
-    MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
-    while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
-           AArch64InstrInfo::isSEHInstruction(*Pop))
-      Pop = std::prev(Pop);
-    // Converting the last ldp to a post-index ldp is valid only if the last
-    // ldp's offset is 0.
-    const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
-    // If the offset is 0 and the AfterCSR pop is not actually trying to
-    // allocate more stack for arguments (in space that an untimely interrupt
-    // may clobber), convert it to a post-index ldp.
-    if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
-      convertCalleeSaveRestoreToSPPrePostIncDec(
-          MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, EmitCFI,
-          MachineInstr::FrameDestroy, PrologueSaveSize);
-    } else {
-      // If not, make sure to emit an add after the last ldp.
-      // We're doing this by transferring the size to be restored from the
-      // adjustment *before* the CSR pops to the adjustment *after* the CSR
-      // pops.
-      AfterCSRPopSize += PrologueSaveSize;
-      CombineAfterCSRBump = true;
-    }
-  }
-
-  // Move past the restores of the callee-saved registers.
-  // If we plan on combining the sp bump of the local stack size and the callee
-  // save stack size, we might need to adjust the CSR save and restore offsets.
-  MachineBasicBlock::iterator FirstGPRRestoreI = MBB.getFirstTerminator();
-  MachineBasicBlock::iterator Begin = MBB.begin();
-  while (FirstGPRRestoreI != Begin) {
-    --FirstGPRRestoreI;
-    if (!FirstGPRRestoreI->getFlag(MachineInstr::FrameDestroy) ||
-        (!FPAfterSVECalleeSaves && isSVECalleeSave(FirstGPRRestoreI))) {
-      ++FirstGPRRestoreI;
-      break;
-    } else if (CombineSPBump)
-      fixupCalleeSaveRestoreStackOffset(
-          *FirstGPRRestoreI, AFI->getLocalStackSize(), NeedsWinCFI, &HasWinCFI);
-  }
-
-  if (NeedsWinCFI) {
-    // Note that there are cases where we insert SEH opcodes in the
-    // epilogue when we had no SEH opcodes in the prologue. For
-    // example, when there is no stack frame but there are stack
-    // arguments. Insert the SEH_EpilogStart and remove it later if it
-    // we didn't emit any SEH opcodes to avoid generating WinCFI for
-    // functions that don't need it.
-    BuildMI(MBB, FirstGPRRestoreI, DL, TII->get(AArch64::SEH_EpilogStart))
-        .setMIFlag(MachineInstr::FrameDestroy);
-    EpilogStartI = FirstGPRRestoreI;
-    --EpilogStartI;
-  }
-
-  if (hasFP(MF) && AFI->hasSwiftAsyncContext()) {
-    switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
-    case SwiftAsyncFramePointerMode::DeploymentBased:
-      // Avoid the reload as it is GOT relative, and instead fall back to the
-      // hardcoded value below.  This allows a mismatch between the OS and
-      // application without immediately terminating on the difference.
-      [[fallthrough]];
-    case SwiftAsyncFramePointerMode::Always:
-      // We need to reset FP to its untagged state on return. Bit 60 is
-      // currently used to show the presence of an extended frame.
-
-      // BIC x29, x29, #0x1000_0000_0000_0000
-      BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::ANDXri),
-              AArch64::FP)
-          .addUse(AArch64::FP)
-          .addImm(0x10fe)
-          .setMIFlag(MachineInstr::FrameDestroy);
-      if (NeedsWinCFI) {
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
-            .setMIFlags(MachineInstr::FrameDestroy);
-        HasWinCFI = true;
-      }
-      break;
-
-    case SwiftAsyncFramePointerMode::Never:
-      break;
-    }
-  }
-
-  const StackOffset &SVEStackSize = getSVEStackSize(MF);
-
-  // If there is a single SP update, insert it before the ret and we're done.
-  if (CombineSPBump) {
-    assert(!SVEStackSize && "Cannot combine SP bump with SVE");
-
-    // When we are about to restore the CSRs, the CFA register is SP again.
-    if (EmitCFI && hasFP(MF))
-      CFIInstBuilder(MBB, FirstGPRRestoreI, MachineInstr::FrameDestroy)
-          .buildDefCFA(AArch64::SP, NumBytes);
-
-    emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
-                    StackOffset::getFixed(NumBytes + AfterCSRPopSize), TII,
-                    MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI,
-                    EmitCFI, StackOffset::getFixed(NumBytes));
-    return;
-  }
-
-  NumBytes -= PrologueSaveSize;
-  assert(NumBytes >= 0 && "Negative stack allocation size!?");
-
-  // Process the SVE callee-saves to determine what space needs to be
-  // deallocated.
-  StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
-  MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI,
-                              RestoreEnd = FirstGPRRestoreI;
-  if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
-    if (FPAfterSVECalleeSaves)
-      RestoreEnd = MBB.getFirstTerminator();
-
-    RestoreBegin = std::prev(RestoreEnd);
-    while (RestoreBegin != MBB.begin() &&
-           isSVECalleeSave(std::prev(RestoreBegin)))
-      --RestoreBegin;
-
-    assert(isSVECalleeSave(RestoreBegin) &&
-           isSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
-
-    StackOffset CalleeSavedSizeAsOffset =
-        StackOffset::getScalable(CalleeSavedSize);
-    DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
-    DeallocateAfter = CalleeSavedSizeAsOffset;
-  }
-
-  // Deallocate the SVE area.
-  if (FPAfterSVECalleeSaves) {
-    // If the callee-save area is before FP, restoring the FP implicitly
-    // deallocates non-callee-save SVE allocations.  Otherwise, deallocate
-    // them explicitly.
-    if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
-      emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
-                      DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
-                      NeedsWinCFI, &HasWinCFI);
-    }
-
-    // Deallocate callee-save non-SVE registers.
-    emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
-                    StackOffset::getFixed(AFI->getCalleeSavedStackSize()), TII,
-                    MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
-
-    // Deallocate fixed objects.
-    emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
-                    StackOffset::getFixed(FixedObject), TII,
-                    MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
-
-    // Deallocate callee-save SVE registers.
-    emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
-                    DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
-                    NeedsWinCFI, &HasWinCFI);
-  } else if (SVEStackSize) {
-    int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize();
-    // If we have stack realignment or variable-sized objects we must use the
-    // FP to restore SVE callee saves (as there is an unknown amount of
-    // data/padding between the SP and SVE CS area).
-    Register BaseForSVEDealloc =
-        (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
-                                                              : AArch64::SP;
-    if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) {
-      Register CalleeSaveBase = AArch64::FP;
-      if (int64_t CalleeSaveBaseOffset =
-              AFI->getCalleeSaveBaseToFrameRecordOffset()) {
-        // If we have have an non-zero offset to the non-SVE CS base we need to
-        // compute the base address by subtracting the offest in a temporary
-        // register first (to avoid briefly deallocating the SVE CS).
-        CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister(
-            &AArch64::GPR64RegClass);
-        emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
-                        StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
-                        MachineInstr::FrameDestroy);
-      }
-      // The code below will deallocate the stack space space by moving the
-      // SP to the start of the SVE callee-save area.
-      emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
-                      StackOffset::getScalable(-SVECalleeSavedSize), TII,
-                      MachineInstr::FrameDestroy);
-    } else if (BaseForSVEDealloc == AArch64::SP) {
-      if (SVECalleeSavedSize) {
-        // Deallocate the non-SVE locals first before we can deallocate (and
-        // restore callee saves) from the SVE area.
-        emitFrameOffset(
-            MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
-            StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy,
-            false, NeedsWinCFI, &HasWinCFI, EmitCFI && !hasFP(MF),
-            SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize));
-        NumBytes = 0;
-      }
-
-      emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
-                      DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
-                      NeedsWinCFI, &HasWinCFI, EmitCFI && !hasFP(MF),
-                      SVEStackSize +
-                          StackOffset::getFixed(NumBytes + PrologueSaveSize));
-
-      emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
-                      DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
-                      NeedsWinCFI, &HasWinCFI, EmitCFI && !hasFP(MF),
-                      DeallocateAfter +
-                          StackOffset::getFixed(NumBytes + PrologueSaveSize));
-    }
-    if (EmitCFI)
-      emitCalleeSavedSVERestores(MBB, RestoreEnd);
-  }
-
-  if (!hasFP(MF)) {
-    bool RedZone = canUseRedZone(MF);
-    // If this was a redzone leaf function, we don't need to restore the
-    // stack pointer (but we may need to pop stack args for fastcc).
-    if (RedZone && AfterCSRPopSize == 0)
-      return;
-
-    // Pop the local variables off the stack. If there are no callee-saved
-    // registers, it means we are actually positioned at the terminator and can
-    // combine stack increment for the locals and the stack increment for
-    // callee-popped arguments into (possibly) a single instruction and be done.
-    bool NoCalleeSaveRestore = PrologueSaveSize == 0;
-    int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
-    if (NoCalleeSaveRestore)
-      StackRestoreBytes += AfterCSRPopSize;
-
-    emitFrameOffset(
-        MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
-        StackOffset::getFixed(StackRestoreBytes), TII,
-        MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI, EmitCFI,
-        StackOffset::getFixed((RedZone ? 0 : NumBytes) + PrologueSaveSize));
-
-    // If we were able to combine the local stack pop with the argument pop,
-    // then we're done.
-    if (NoCalleeSaveRestore || AfterCSRPopSize == 0) {
-      return;
-    }
-
-    NumBytes = 0;
-  }
-
-  // Restore the original stack pointer.
-  // FIXME: Rather than doing the math here, we should instead just use
-  // non-post-indexed loads for the restores if we aren't actually going to
-  // be able to save any instructions.
-  if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
-    emitFrameOffset(
-        MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::FP,
-        StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
-        TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
-  } else if (NumBytes)
-    emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
-                    StackOffset::getFixed(NumBytes), TII,
-                    MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
-
-  // When we are about to restore the CSRs, the CFA register is SP again.
-  if (EmitCFI && hasFP(MF))
-    CFIInstBuilder(MBB, FirstGPRRestoreI, MachineInstr::FrameDestroy)
-        .buildDefCFA(AArch64::SP, PrologueSaveSize);
-
-  // This must be placed after the callee-save restore code because that code
-  // assumes the SP is at the same location as it was after the callee-save save
-  // code in the prologue.
-  if (AfterCSRPopSize) {
-    assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an "
-                                  "interrupt may have clobbered");
-
-    emitFrameOffset(
-        MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
-        StackOffset::getFixed(AfterCSRPopSize), TII, MachineInstr::FrameDestroy,
-        false, NeedsWinCFI, &HasWinCFI, EmitCFI,
-        StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0));
-  }
+  AArch64EpilogueEmitter EpilogueEmitter(MF, MBB, *this);
+  EpilogueEmitter.emitEpilogue();
 }
 
 bool AArch64FrameLowering::enableCFIFixup(const MachineFunction &MF) const {
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index a9d65441a4e30..0825d03bcb0d8 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -22,6 +22,7 @@ namespace llvm {
 class TargetLowering;
 class AArch64FunctionInfo;
 class AArch64PrologueEmitter;
+class AArch64EpilogueEmitter;
 
 class AArch64FrameLowering : public TargetFrameLowering {
 public:
@@ -134,7 +135,6 @@ class AArch64FrameLowering : public TargetFrameLowering {
     return StackId != TargetStackID::ScalableVector;
   }
 
-  friend class AArch64PrologueEmitter;
   void
   orderFrameObjects(const MachineFunction &MF,
                     SmallVectorImpl<int> &ObjectsToAllocate) const override;
@@ -147,6 +147,9 @@ class AArch64FrameLowering : public TargetFrameLowering {
 
   StackOffset getSVEStackSize(const MachineFunction &MF) const;
 
+  friend class AArch64PrologueEmitter;
+  friend class AArch64EpilogueEmitter;
+
 protected:
   bool hasFPImpl(const MachineFunction &MF) const override;
 
@@ -170,10 +173,6 @@ class AArch64FrameLowering : public TargetFrameLowering {
                                       int &MaxCSFrameIndex) const;
   bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB,
                                                 uint64_t StackBumpBytes) const;
-  void emitCalleeSavedGPRRestores(MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator MBBI) const;
-  void emitCalleeSavedSVERestores(MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator MBBI) const;
   void allocateStackSpace(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MBBI,
                           int64_t RealignmentPadding, StackOffset AllocSize,
@@ -215,6 +214,15 @@ class AArch64FrameLowering : public TargetFrameLowering {
   StackOffset getStackOffset(const MachineFunction &MF,
                              int64_t ObjectOffset) const;
 
+  /// Returns how much of the incoming argument stack area (in bytes) we should
+  /// clean up in an epilogue. For the C calling convention this will be 0, for
+  /// guaranteed tail call conventions it can be positive (a normal return or a
+  /// tail call to a function that uses less stack space for arguments) or
+  /// negative (for a tail call to a function that needs more stack space than
+  /// us for arguments).
+  int64_t getArgumentStackToRestore(MachineFunction &MF,
+                                    MachineBasicBlock &MBB) const;
+
   // Find a scratch register that we can use at the start of the prologue to
   // re-align the stack pointer.  We avoid using callee-save registers since
   // they may appear to be free when this is called from canUseAsPrologue
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
index af424987b8ddb..700c45a8aec9a 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
@@ -791,4 +791,436 @@ void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
   }
 }
 
+static bool isFuncletReturnInstr(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  default:
+    return false;
+  case AArch64::CATCHRET:
+  case AArch64::CLEANUPRET:
+    return true;
+  }
+}
+
+AArch64EpilogueEmitter::AArch64EpilogueEmitter(MachineFunction &MF,
+                                               MachineBasicBlock &MBB,
+                                               const AArch64FrameLowering &AFL)
+    : MF(MF), MBB(MBB), MFI(MF.getFrameInfo()),
+      Subtarget(MF.getSubtarget<AArch64Subtarget>()), AFL(AFL) {
+  TII = Subtarget.getInstrInfo();
+  AFI = MF.getInfo<AArch64FunctionInfo>();
+
+  NeedsWinCFI = AFL.needsWinCFI(MF);
+  EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
+  SEHEpilogueStartI = MBB.end();
+}
+
+void AArch64EpilogueEmitter::emitEpilogue() {
+  MachineBasicBlock::iterator EpilogueEndI = MBB.getLastNonDebugInstr();
+  if (MBB.end() != EpilogueEndI) {
+    DL = EpilogueEndI->getDebugLoc();
+    IsFunclet = isFuncletReturnInstr(*EpilogueEndI);
+  }
+
+  int64_t NumBytes =
+      IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
+
+  // All calls are tail calls in GHC calling conv, and functions have no
+  // prologue/epilogue.
+  if (MF.getFunction().getCallingConv() == CallingConv::GHC)
+    return;
+
+  // How much of the stack used by incoming arguments this function is expected
+  // to restore in this particular epilogue.
+  int64_t ArgumentStackToRestore = AFL.getArgumentStackToRestore(MF, MBB);
+  bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(),
+                                              MF.getFunction().isVarArg());
+  unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
+
+  int64_t AfterCSRPopSize = ArgumentStackToRestore;
+  auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
+  // We cannot rely on the local stack size set in emitPrologue if the function
+  // has funclets, as funclets have different local stack size requirements, and
+  // the current value set in emitPrologue may be that of the containing
+  // function.
+  if (MF.hasEHFunclets())
+    AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
+
+  if (AFL.homogeneousPrologEpilog(MF, &MBB)) {
+    assert(!NeedsWinCFI);
+    auto FirstHomogenousEpilogI = MBB.getFirstTerminator();
+    if (FirstHomogenousEpilogI != MBB.begin()) {
+      auto HomogeneousEpilog = std::prev(FirstHomogenousEpilogI);
+      if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
+        FirstHomogenousEpilogI = HomogeneousEpilog;
+    }
+
+    // Adjust local stack
+    emitFrameOffset(MBB, FirstHomogenousEpilogI, DL, AArch64::SP, AArch64::SP,
+                    StackOffset::getFixed(AFI->getLocalStackSize()), TII,
+                    MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
+
+    // SP has been already adjusted while restoring callee save regs.
+    // We've bailed-out the case with adjusting SP for arguments.
+    assert(AfterCSRPopSize == 0);
+    return;
+  }
+
+  bool FPAfterSVECalleeSaves =
+      Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();
+
+  bool CombineSPBump =
+      AFL.shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
+  // Assume we can't combine the last pop with the sp restore.
+  bool CombineAfterCSRBump = false;
+  if (FPAfterSVECalleeSaves) {
+    AfterCSRPopSize += FixedObject;
+  } else if (!CombineSPBump && PrologueSaveSize != 0) {
+    MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
+    while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
+           AArch64InstrInfo::isSEHInstruction(*Pop))
+      Pop = std::prev(Pop);
+    // Converting the last ldp to a post-index ldp is valid only if the last
+    // ldp's offset is 0.
+    const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
+    // If the offset is 0 and the AfterCSR pop is not actually trying to
+    // allocate more stack for arguments (in space that an untimely interrupt
+    // may clobber), convert it to a post-index ldp.
+    if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
+      AFL.convertCalleeSaveRestoreToSPPrePostIncDec(
+          MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, EmitCFI,
+          MachineInstr::FrameDestroy, PrologueSaveSize);
+    } else {
+      // If not, make sure to emit an add after the last ldp.
+      // We're doing this by transferring the size to be restored from the
+      // adjustment *before* the CSR pops to the adjustment *after* the CSR
+      // pops.
+      AfterCSRPopSize += PrologueSaveSize;
+      CombineAfterCSRBump = true;
+    }
+  }
+
+  // Move past the restores of the callee-saved registers.
+  // If we plan on combining the sp bump of the local stack size and the callee
+  // save stack size, we might need to adjust the CSR save and restore offsets.
+  MachineBasicBlock::iterator FirstGPRRestoreI = MBB.getFirstTerminator();
+  MachineBasicBlock::iterator Begin = MBB.begin();
+  while (FirstGPRRestoreI != Begin) {
+    --FirstGPRRestoreI;
+    if (!FirstGPRRestoreI->getFlag(MachineInstr::FrameDestroy) ||
+        (!FPAfterSVECalleeSaves && AFL.isSVECalleeSave(FirstGPRRestoreI))) {
+      ++FirstGPRRestoreI;
+      break;
+    } else if (CombineSPBump)
+      AFL.fixupCalleeSaveRestoreStackOffset(
+          *FirstGPRRestoreI, AFI->getLocalStackSize(), NeedsWinCFI, &HasWinCFI);
+  }
+
+  if (NeedsWinCFI) {
+    // Note that there are cases where we insert SEH opcodes in the
+    // epilogue when we had no SEH opcodes in the prologue. For
+    // example, when there is no stack frame but there are stack
+    // arguments. Insert the SEH_EpilogStart and remove it later if it
+    // we didn't emit any SEH opcodes to avoid generating WinCFI for
+    // functions that don't need it.
+    BuildMI(MBB, FirstGPRRestoreI, DL, TII->get(AArch64::SEH_EpilogStart))
+        .setMIFlag(MachineInstr::FrameDestroy);
+    SEHEpilogueStartI = FirstGPRRestoreI;
+    --SEHEpilogueStartI;
+  }
+
+  if (AFL.hasFP(MF) && AFI->hasSwiftAsyncContext())
+    emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
+
+  const StackOffset &SVEStackSize = AFL.getSVEStackSize(MF);
+
+  // If there is a single SP update, insert it before the ret and we're done.
+  if (CombineSPBump) {
+    assert(!SVEStackSize && "Cannot combine SP bump with SVE");
+
+    // When we are about to restore the CSRs, the CFA register is SP again.
+    if (EmitCFI && AFL.hasFP(MF))
+      CFIInstBuilder(MBB, FirstGPRRestoreI, MachineInstr::FrameDestroy)
+          .buildDefCFA(AArch64::SP, NumBytes);
+
+    emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
+                    StackOffset::getFixed(NumBytes + AfterCSRPopSize), TII,
+                    MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI,
+                    EmitCFI, StackOffset::getFixed(NumBytes));
+    return;
+  }
+
+  NumBytes -= PrologueSaveSize;
+  assert(NumBytes >= 0 && "Negative stack allocation size!?");
+
+  // Process the SVE callee-saves to determine what space needs to be
+  // deallocated.
+  StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
+  MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI,
+                              RestoreEnd = FirstGPRRestoreI;
+  if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
+    if (FPAfterSVECalleeSaves)
+      RestoreEnd = MBB.getFirstTerminator();
+
+    RestoreBegin = std::prev(RestoreEnd);
+    while (RestoreBegin != MBB.begin() &&
+           AFL.isSVECalleeSave(std::prev(RestoreBegin)))
+      --RestoreBegin;
+
+    assert(AFL.isSVECalleeSave(RestoreBegin) &&
+           AFL.isSVECalleeSave(std::prev(RestoreEnd)) &&
+           "Unexpected instruction");
+
+    StackOffset CalleeSavedSizeAsOffset =
+        StackOffset::getScalable(CalleeSavedSize);
+    DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
+    DeallocateAfter = CalleeSavedSizeAsOffset;
+  }
+
+  // Deallocate the SVE area.
+  if (FPAfterSVECalleeSaves) {
+    // If the callee-save area is before FP, restoring the FP implicitly
+    // deallocates non-callee-save SVE allocations.  Otherwise, deallocate
+    // them explicitly.
+    if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
+      emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
+                      DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
+                      NeedsWinCFI, &HasWinCFI);
+    }
+
+    // Deallocate callee-save non-SVE registers.
+    emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
+                    StackOffset::getFixed(AFI->getCalleeSavedStackSize()), TII,
+                    MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
+
+    // Deallocate fixed objects.
+    emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
+                    StackOffset::getFixed(FixedObject), TII,
+                    MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
+
+    // Deallocate callee-save SVE registers.
+    emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
+                    DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
+                    NeedsWinCFI, &HasWinCFI);
+  } else if (SVEStackSize) {
+    int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize();
+    // If we have stack realignment or variable-sized objects we must use the
+    // FP to restore SVE callee saves (as there is an unknown amount of
+    // data/padding between the SP and SVE CS area).
+    Register BaseForSVEDealloc =
+        (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
+                                                              : AArch64::SP;
+    if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) {
+      Register CalleeSaveBase = AArch64::FP;
+      if (int64_t CalleeSaveBaseOffset =
+              AFI->getCalleeSaveBaseToFrameRecordOffset()) {
+        // If we have have an non-zero offset to the non-SVE CS base we need to
+        // compute the base address by subtracting the offest in a temporary
+        // register first (to avoid briefly deallocating the SVE CS).
+        CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister(
+            &AArch64::GPR64RegClass);
+        emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
+                        StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
+                        MachineInstr::FrameDestroy);
+      }
+      // The code below will deallocate the stack space space by moving the
+      // SP to the start of the SVE callee-save area.
+      emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
+                      StackOffset::getScalable(-SVECalleeSavedSize), TII,
+                      MachineInstr::FrameDestroy);
+    } else if (BaseForSVEDealloc == AArch64::SP) {
+      if (SVECalleeSavedSize) {
+        // Deallocate the non-SVE locals first before we can deallocate (and
+        // restore callee saves) from the SVE area.
+        emitFrameOffset(
+            MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
+            StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy,
+            false, NeedsWinCFI, &HasWinCFI, EmitCFI && !AFL.hasFP(MF),
+            SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize));
+        NumBytes = 0;
+      }
+
+      emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
+                      DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
+                      NeedsWinCFI, &HasWinCFI, EmitCFI && !AFL.hasFP(MF),
+                      SVEStackSize +
+                          StackOffset::getFixed(NumBytes + PrologueSaveSize));
+
+      emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
+                      DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
+                      NeedsWinCFI, &HasWinCFI, EmitCFI && !AFL.hasFP(MF),
+                      DeallocateAfter +
+                          StackOffset::getFixed(NumBytes + PrologueSaveSize));
+    }
+    if (EmitCFI)
+      emitCalleeSavedSVERestores(RestoreEnd);
+  }
+
+  if (!AFL.hasFP(MF)) {
+    bool RedZone = AFL.canUseRedZone(MF);
+    // If this was a redzone leaf function, we don't need to restore the
+    // stack pointer (but we may need to pop stack args for fastcc).
+    if (RedZone && AfterCSRPopSize == 0)
+      return;
+
+    // Pop the local variables off the stack. If there are no callee-saved
+    // registers, it means we are actually positioned at the terminator and can
+    // combine stack increment for the locals and the stack increment for
+    // callee-popped arguments into (possibly) a single instruction and be done.
+    bool NoCalleeSaveRestore = PrologueSaveSize == 0;
+    int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
+    if (NoCalleeSaveRestore)
+      StackRestoreBytes += AfterCSRPopSize;
+
+    emitFrameOffset(
+        MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
+        StackOffset::getFixed(StackRestoreBytes), TII,
+        MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI, EmitCFI,
+        StackOffset::getFixed((RedZone ? 0 : NumBytes) + PrologueSaveSize));
+
+    // If we were able to combine the local stack pop with the argument pop,
+    // then we're done.
+    if (NoCalleeSaveRestore || AfterCSRPopSize == 0)
+      return;
+
+    NumBytes = 0;
+  }
+
+  // Restore the original stack pointer.
+  // FIXME: Rather than doing the math here, we should instead just use
+  // non-post-indexed loads for the restores if we aren't actually going to
+  // be able to save any instructions.
+  if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
+    emitFrameOffset(
+        MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::FP,
+        StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
+        TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
+  } else if (NumBytes)
+    emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
+                    StackOffset::getFixed(NumBytes), TII,
+                    MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
+
+  // When we are about to restore the CSRs, the CFA register is SP again.
+  if (EmitCFI && AFL.hasFP(MF))
+    CFIInstBuilder(MBB, FirstGPRRestoreI, MachineInstr::FrameDestroy)
+        .buildDefCFA(AArch64::SP, PrologueSaveSize);
+
+  // This must be placed after the callee-save restore code because that code
+  // assumes the SP is at the same location as it was after the callee-save save
+  // code in the prologue.
+  if (AfterCSRPopSize) {
+    assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an "
+                                  "interrupt may have clobbered");
+
+    emitFrameOffset(
+        MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
+        StackOffset::getFixed(AfterCSRPopSize), TII, MachineInstr::FrameDestroy,
+        false, NeedsWinCFI, &HasWinCFI, EmitCFI,
+        StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0));
+  }
+}
+
+void AArch64EpilogueEmitter::emitSwiftAsyncContextFramePointer(
+    MachineBasicBlock::iterator MBBI, const DebugLoc &DL) const {
+  switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
+  case SwiftAsyncFramePointerMode::DeploymentBased:
+    // Avoid the reload as it is GOT relative, and instead fall back to the
+    // hardcoded value below.  This allows a mismatch between the OS and
+    // application without immediately terminating on the difference.
+    [[fallthrough]];
+  case SwiftAsyncFramePointerMode::Always:
+    // We need to reset FP to its untagged state on return. Bit 60 is
+    // currently used to show the presence of an extended frame.
+
+    // BIC x29, x29, #0x1000_0000_0000_0000
+    BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::ANDXri),
+            AArch64::FP)
+        .addUse(AArch64::FP)
+        .addImm(0x10fe)
+        .setMIFlag(MachineInstr::FrameDestroy);
+    if (NeedsWinCFI) {
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+          .setMIFlags(MachineInstr::FrameDestroy);
+      HasWinCFI = true;
+    }
+    break;
+
+  case SwiftAsyncFramePointerMode::Never:
+    break;
+  }
+}
+
+void AArch64EpilogueEmitter::emitShadowCallStackEpilogue(
+    MachineBasicBlock::iterator MBBI, const DebugLoc &DL) const {
+  // Shadow call stack epilog: ldr x30, [x18, #-8]!
+  BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXpre))
+      .addReg(AArch64::X18, RegState::Define)
+      .addReg(AArch64::LR, RegState::Define)
+      .addReg(AArch64::X18)
+      .addImm(-8)
+      .setMIFlag(MachineInstr::FrameDestroy);
+
+  if (NeedsWinCFI)
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+        .setMIFlag(MachineInstr::FrameDestroy);
+
+  if (AFI->needsAsyncDwarfUnwindInfo(MF))
+    CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy)
+        .buildRestore(AArch64::X18);
+}
+
+void AArch64EpilogueEmitter::emitCalleeSavedRestores(
+    MachineBasicBlock::iterator MBBI, bool SVE) const {
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+  if (CSI.empty())
+    return;
+
+  const TargetSubtargetInfo &STI = MF.getSubtarget();
+  const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+  CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameDestroy);
+
+  for (const auto &Info : CSI) {
+    if (SVE !=
+        (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector))
+      continue;
+
+    MCRegister Reg = Info.getReg();
+    if (SVE &&
+        !static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
+      continue;
+
+    CFIBuilder.buildRestore(Info.getReg());
+  }
+}
+
+void AArch64EpilogueEmitter::finalizeEpilogue() const {
+  if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
+    emitShadowCallStackEpilogue(MBB.getFirstTerminator(), DL);
+    HasWinCFI |= NeedsWinCFI;
+  }
+  if (EmitCFI)
+    emitCalleeSavedGPRRestores(MBB.getFirstTerminator());
+  if (AFI->shouldSignReturnAddress(MF)) {
+    // If pac-ret+leaf is in effect, PAUTH_EPILOGUE pseudo instructions
+    // are inserted by emitPacRetPlusLeafHardening().
+    if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
+      BuildMI(MBB, MBB.getFirstTerminator(), DL,
+              TII->get(AArch64::PAUTH_EPILOGUE))
+          .setMIFlag(MachineInstr::FrameDestroy);
+    }
+    // AArch64PointerAuth pass will insert SEH_PACSignLR
+    HasWinCFI |= NeedsWinCFI;
+  }
+  if (HasWinCFI) {
+    BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
+        .setMIFlag(MachineInstr::FrameDestroy);
+    if (!MF.hasWinCFI())
+      MF.setHasWinCFI(true);
+  }
+  if (NeedsWinCFI) {
+    assert(SEHEpilogueStartI != MBB.end());
+    if (!HasWinCFI)
+      MBB.erase(SEHEpilogueStartI);
+  }
+}
+
 } // namespace llvm
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
index 94029ede60c76..20bbffcdb33f2 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
@@ -7,8 +7,9 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file contains the declaration of the AArch64PrologueEmitter class,
-/// which is is used to emit the prologue on AArch64.
+/// This file contains the declaration of the AArch64PrologueEmitter and
+/// AArch64EpilogueEmitter classes, which are is used to emit the prologue and
+/// epilogue on AArch64.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -106,6 +107,63 @@ class AArch64PrologueEmitter {
   AArch64FunctionInfo *AFI = nullptr;
 };
 
+/// A helper class for emitting the epilogue. Substantial new functionality
+/// should be factored into a new method. Where possible "emit*" methods should
+/// be const, and any flags that change how the epilogue is emitted should be
+/// set in the constructor.
+class AArch64EpilogueEmitter {
+public:
+  AArch64EpilogueEmitter(MachineFunction &MF, MachineBasicBlock &MBB,
+                         const AArch64FrameLowering &AFL);
+
+  /// Emit the epilogue.
+  void emitEpilogue();
+
+  ~AArch64EpilogueEmitter() { finalizeEpilogue(); }
+
+private:
+  void emitSwiftAsyncContextFramePointer(MachineBasicBlock::iterator MBBI,
+                                         const DebugLoc &DL) const;
+
+  void emitShadowCallStackEpilogue(MachineBasicBlock::iterator MBBI,
+                                   const DebugLoc &DL) const;
+
+  void emitCalleeSavedRestores(MachineBasicBlock::iterator MBBI,
+                               bool SVE) const;
+
+  void emitCalleeSavedGPRRestores(MachineBasicBlock::iterator MBBI) const {
+    emitCalleeSavedRestores(MBBI, /*SVE=*/false);
+  }
+
+  void emitCalleeSavedSVERestores(MachineBasicBlock::iterator MBBI) const {
+    emitCalleeSavedRestores(MBBI, /*SVE=*/true);
+  }
+
+  void finalizeEpilogue() const;
+
+  MachineFunction &MF;
+  MachineBasicBlock &MBB;
+
+  const MachineFrameInfo &MFI;
+  const AArch64Subtarget &Subtarget;
+  const AArch64FrameLowering &AFL;
+
+  // Epilogue flags. These generally should not change outside of the
+  // constructor (or early in emitEpilogue).
+  bool NeedsWinCFI = false;
+  bool EmitCFI = false;
+  bool IsFunclet = false;
+
+  // Note: "HasWinCFI" is mutable as it can change in any "emit" function.
+  mutable bool HasWinCFI = false;
+
+  const TargetInstrInfo *TII = nullptr;
+  AArch64FunctionInfo *AFI = nullptr;
+
+  DebugLoc DL;
+  MachineBasicBlock::iterator SEHEpilogueStartI;
+};
+
 } // namespace llvm
 
 #endif



More information about the llvm-commits mailing list