[llvm] c379127 - [AArch64] Refactor and move common code to `AArch64PrologueEpilogue` (NFCI) (#158920)

via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 18 05:04:41 PDT 2025


Author: Benjamin Maxwell
Date: 2025-09-18T13:04:38+01:00
New Revision: c379127c123e8346f764630dc659e0871099f2fb

URL: https://github.com/llvm/llvm-project/commit/c379127c123e8346f764630dc659e0871099f2fb
DIFF: https://github.com/llvm/llvm-project/commit/c379127c123e8346f764630dc659e0871099f2fb.diff

LOG: [AArch64] Refactor and move common code to `AArch64PrologueEpilogue` (NFCI) (#158920)

This is the final patch in a series reworking the structure of the
prologue/epilogue code. This patch moves some methods from
`AArch64FrameLowering` to `Arch64PrologueEpilogue` as they are only used
by `.emitPrologue/Epilogue`. This includes:

- `shouldCombineCSRLocalStackBump()`
- `shouldCombineCSRLocalStackBumpInEpilogue()`
- `allocateStackSpace()`
- `convertCalleeSaveRestoreToSPPrePostIncDec()`
- `fixupCalleeSaveRestoreStackOffset()`

Common code/methods have been factored into a
`AArch64PrologueEpilogueCommon` base class used by both
`AArch64PrologueEmitter` and `AArch64EpilogueEmitter`.

Finally, some redundant fetching of target classes has been removed from
methods.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
    llvm/lib/Target/AArch64/AArch64FrameLowering.h
    llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
    llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index fd53f04443766..ab5c6f3c0a19d 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -706,148 +706,6 @@ void AArch64FrameLowering::resetCFIToInitialState(
   }
 }
 
-// Return the maximum possible number of bytes for `Size` due to the
-// architectural limit on the size of a SVE register.
-static int64_t upperBound(StackOffset Size) {
-  static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
-  return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed();
-}
-
-void AArch64FrameLowering::allocateStackSpace(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-    int64_t RealignmentPadding, StackOffset AllocSize, bool NeedsWinCFI,
-    bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset,
-    bool FollowupAllocs) const {
-
-  if (!AllocSize)
-    return;
-
-  DebugLoc DL;
-  MachineFunction &MF = *MBB.getParent();
-  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
-  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
-  AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
-  const MachineFrameInfo &MFI = MF.getFrameInfo();
-
-  const int64_t MaxAlign = MFI.getMaxAlign().value();
-  const uint64_t AndMask = ~(MaxAlign - 1);
-
-  if (!Subtarget.getTargetLowering()->hasInlineStackProbe(MF)) {
-    Register TargetReg = RealignmentPadding
-                             ? findScratchNonCalleeSaveRegister(&MBB)
-                             : AArch64::SP;
-    // SUB Xd/SP, SP, AllocSize
-    emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII,
-                    MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
-                    EmitCFI, InitialOffset);
-
-    if (RealignmentPadding) {
-      // AND SP, X9, 0b11111...0000
-      BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP)
-          .addReg(TargetReg, RegState::Kill)
-          .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
-          .setMIFlags(MachineInstr::FrameSetup);
-      AFI.setStackRealigned(true);
-
-      // No need for SEH instructions here; if we're realigning the stack,
-      // we've set a frame pointer and already finished the SEH prologue.
-      assert(!NeedsWinCFI);
-    }
-    return;
-  }
-
-  //
-  // Stack probing allocation.
-  //
-
-  // Fixed length allocation. If we don't need to re-align the stack and don't
-  // have SVE objects, we can use a more efficient sequence for stack probing.
-  if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) {
-    Register ScratchReg = findScratchNonCalleeSaveRegister(&MBB);
-    assert(ScratchReg != AArch64::NoRegister);
-    BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC))
-        .addDef(ScratchReg)
-        .addImm(AllocSize.getFixed())
-        .addImm(InitialOffset.getFixed())
-        .addImm(InitialOffset.getScalable());
-    // The fixed allocation may leave unprobed bytes at the top of the
-    // stack. If we have subsequent allocation (e.g. if we have variable-sized
-    // objects), we need to issue an extra probe, so these allocations start in
-    // a known state.
-    if (FollowupAllocs) {
-      // STR XZR, [SP]
-      BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui))
-          .addReg(AArch64::XZR)
-          .addReg(AArch64::SP)
-          .addImm(0)
-          .setMIFlags(MachineInstr::FrameSetup);
-    }
-
-    return;
-  }
-
-  // Variable length allocation.
-
-  // If the (unknown) allocation size cannot exceed the probe size, decrement
-  // the stack pointer right away.
-  int64_t ProbeSize = AFI.getStackProbeSize();
-  if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {
-    Register ScratchReg = RealignmentPadding
-                              ? findScratchNonCalleeSaveRegister(&MBB)
-                              : AArch64::SP;
-    assert(ScratchReg != AArch64::NoRegister);
-    // SUB Xd, SP, AllocSize
-    emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, &TII,
-                    MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
-                    EmitCFI, InitialOffset);
-    if (RealignmentPadding) {
-      // AND SP, Xn, 0b11111...0000
-      BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP)
-          .addReg(ScratchReg, RegState::Kill)
-          .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
-          .setMIFlags(MachineInstr::FrameSetup);
-      AFI.setStackRealigned(true);
-    }
-    if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding >
-                              AArch64::StackProbeMaxUnprobedStack) {
-      // STR XZR, [SP]
-      BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui))
-          .addReg(AArch64::XZR)
-          .addReg(AArch64::SP)
-          .addImm(0)
-          .setMIFlags(MachineInstr::FrameSetup);
-    }
-    return;
-  }
-
-  // Emit a variable-length allocation probing loop.
-  // TODO: As an optimisation, the loop can be "unrolled" into a few parts,
-  // each of them guaranteed to adjust the stack by less than the probe size.
-  Register TargetReg = findScratchNonCalleeSaveRegister(&MBB);
-  assert(TargetReg != AArch64::NoRegister);
-  // SUB Xd, SP, AllocSize
-  emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII,
-                  MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
-                  EmitCFI, InitialOffset);
-  if (RealignmentPadding) {
-    // AND Xn, Xn, 0b11111...0000
-    BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), TargetReg)
-        .addReg(TargetReg, RegState::Kill)
-        .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
-        .setMIFlags(MachineInstr::FrameSetup);
-  }
-
-  BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC_VAR))
-      .addReg(TargetReg);
-  if (EmitCFI) {
-    // Set the CFA register back to SP.
-    CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
-        .buildDefCFARegister(AArch64::SP);
-  }
-  if (RealignmentPadding)
-    AFI.setStackRealigned(true);
-}
-
 static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) {
   switch (Reg.id()) {
   default:
@@ -1088,92 +946,12 @@ bool AArch64FrameLowering::shouldSignReturnAddressEverywhere(
   return SignReturnAddressAll;
 }
 
-bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
-    MachineFunction &MF, uint64_t StackBumpBytes) const {
-  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
-  const MachineFrameInfo &MFI = MF.getFrameInfo();
-  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
-  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
-  if (homogeneousPrologEpilog(MF))
-    return false;
-
-  if (AFI->getLocalStackSize() == 0)
-    return false;
-
-  // For WinCFI, if optimizing for size, prefer to not combine the stack bump
-  // (to force a stp with predecrement) to match the packed unwind format,
-  // provided that there actually are any callee saved registers to merge the
-  // decrement with.
-  // This is potentially marginally slower, but allows using the packed
-  // unwind format for functions that both have a local area and callee saved
-  // registers. Using the packed unwind format notably reduces the size of
-  // the unwind info.
-  if (needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
-      MF.getFunction().hasOptSize())
-    return false;
-
-  // 512 is the maximum immediate for stp/ldp that will be used for
-  // callee-save save/restores
-  if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes))
-    return false;
-
-  if (MFI.hasVarSizedObjects())
-    return false;
-
-  if (RegInfo->hasStackRealignment(MF))
-    return false;
-
-  // This isn't strictly necessary, but it simplifies things a bit since the
-  // current RedZone handling code assumes the SP is adjusted by the
-  // callee-save save/restore code.
-  if (canUseRedZone(MF))
-    return false;
-
-  // When there is an SVE area on the stack, always allocate the
-  // callee-saves and spills/locals separately.
-  if (getSVEStackSize(MF))
-    return false;
-
-  return true;
-}
-
-bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
-    MachineBasicBlock &MBB, uint64_t StackBumpBytes) const {
-  if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes))
-    return false;
-  if (MBB.empty())
-    return true;
-
-  // Disable combined SP bump if the last instruction is an MTE tag store. It
-  // is almost always better to merge SP adjustment into those instructions.
-  MachineBasicBlock::iterator LastI = MBB.getFirstTerminator();
-  MachineBasicBlock::iterator Begin = MBB.begin();
-  while (LastI != Begin) {
-    --LastI;
-    if (LastI->isTransient())
-      continue;
-    if (!LastI->getFlag(MachineInstr::FrameDestroy))
-      break;
-  }
-  switch (LastI->getOpcode()) {
-  case AArch64::STGloop:
-  case AArch64::STZGloop:
-  case AArch64::STGi:
-  case AArch64::STZGi:
-  case AArch64::ST2Gi:
-  case AArch64::STZ2Gi:
-    return false;
-  default:
-    return true;
-  }
-  llvm_unreachable("unreachable");
-}
-
 // Given a load or a store instruction, generate an appropriate unwinding SEH
 // code on Windows.
-static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
-                                             const TargetInstrInfo &TII,
-                                             MachineInstr::MIFlag Flag) {
+MachineBasicBlock::iterator
+AArch64FrameLowering::insertSEH(MachineBasicBlock::iterator MBBI,
+                                const TargetInstrInfo &TII,
+                                MachineInstr::MIFlag Flag) const {
   unsigned Opc = MBBI->getOpcode();
   MachineBasicBlock *MBB = MBBI->getParent();
   MachineFunction &MF = *MBB->getParent();
@@ -1332,34 +1110,6 @@ static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
   return I;
 }
 
-// Fix up the SEH opcode associated with the save/restore instruction.
-static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
-                           unsigned LocalStackSize) {
-  MachineOperand *ImmOpnd = nullptr;
-  unsigned ImmIdx = MBBI->getNumOperands() - 1;
-  switch (MBBI->getOpcode()) {
-  default:
-    llvm_unreachable("Fix the offset in the SEH instruction");
-  case AArch64::SEH_SaveFPLR:
-  case AArch64::SEH_SaveRegP:
-  case AArch64::SEH_SaveReg:
-  case AArch64::SEH_SaveFRegP:
-  case AArch64::SEH_SaveFReg:
-  case AArch64::SEH_SaveAnyRegQP:
-  case AArch64::SEH_SaveAnyRegQPX:
-    ImmOpnd = &MBBI->getOperand(ImmIdx);
-    break;
-  }
-  if (ImmOpnd)
-    ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
-}
-
-bool AArch64FrameLowering::requiresGetVGCall(const MachineFunction &MF) const {
-  auto *AFI = MF.getInfo<AArch64FunctionInfo>();
-  return AFI->hasStreamingModeChanges() &&
-         !MF.getSubtarget<AArch64Subtarget>().hasSVE();
-}
-
 bool AArch64FrameLowering::requiresSaveVG(const MachineFunction &MF) const {
   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   if (!AFI->needsDwarfUnwindInfo(MF) || !AFI->hasStreamingModeChanges())
@@ -1372,201 +1122,6 @@ bool AArch64FrameLowering::requiresSaveVG(const MachineFunction &MF) const {
   return true;
 }
 
-static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO,
-                         RTLIB::Libcall LC) {
-  return MO.isSymbol() &&
-         StringRef(TLI.getLibcallName(LC)) == MO.getSymbolName();
-}
-
-bool AArch64FrameLowering::isVGInstruction(MachineBasicBlock::iterator MBBI,
-                                           const TargetLowering &TLI) const {
-  unsigned Opc = MBBI->getOpcode();
-  if (Opc == AArch64::CNTD_XPiI)
-    return true;
-
-  if (!requiresGetVGCall(*MBBI->getMF()))
-    return false;
-
-  if (Opc == AArch64::BL)
-    return matchLibcall(TLI, MBBI->getOperand(0), RTLIB::SMEABI_GET_CURRENT_VG);
-
-  return Opc == TargetOpcode::COPY;
-}
-
-MachineBasicBlock::iterator
-AArch64FrameLowering::convertCalleeSaveRestoreToSPPrePostIncDec(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-    const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
-    bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
-    MachineInstr::MIFlag FrameFlag, int CFAOffset) const {
-  unsigned NewOpc;
-
-  // If the function contains streaming mode changes, we expect instructions
-  // to calculate the value of VG before spilling. Move past these instructions
-  // if necessary.
-  MachineFunction &MF = *MBB.getParent();
-  if (requiresSaveVG(MF)) {
-    auto &TLI = *MF.getSubtarget().getTargetLowering();
-    while (isVGInstruction(MBBI, TLI))
-      ++MBBI;
-  }
-
-  switch (MBBI->getOpcode()) {
-  default:
-    llvm_unreachable("Unexpected callee-save save/restore opcode!");
-  case AArch64::STPXi:
-    NewOpc = AArch64::STPXpre;
-    break;
-  case AArch64::STPDi:
-    NewOpc = AArch64::STPDpre;
-    break;
-  case AArch64::STPQi:
-    NewOpc = AArch64::STPQpre;
-    break;
-  case AArch64::STRXui:
-    NewOpc = AArch64::STRXpre;
-    break;
-  case AArch64::STRDui:
-    NewOpc = AArch64::STRDpre;
-    break;
-  case AArch64::STRQui:
-    NewOpc = AArch64::STRQpre;
-    break;
-  case AArch64::LDPXi:
-    NewOpc = AArch64::LDPXpost;
-    break;
-  case AArch64::LDPDi:
-    NewOpc = AArch64::LDPDpost;
-    break;
-  case AArch64::LDPQi:
-    NewOpc = AArch64::LDPQpost;
-    break;
-  case AArch64::LDRXui:
-    NewOpc = AArch64::LDRXpost;
-    break;
-  case AArch64::LDRDui:
-    NewOpc = AArch64::LDRDpost;
-    break;
-  case AArch64::LDRQui:
-    NewOpc = AArch64::LDRQpost;
-    break;
-  }
-  TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0);
-  int64_t MinOffset, MaxOffset;
-  bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo(
-      NewOpc, Scale, Width, MinOffset, MaxOffset);
-  (void)Success;
-  assert(Success && "unknown load/store opcode");
-
-  // If the first store isn't right where we want SP then we can't fold the
-  // update in so create a normal arithmetic instruction instead.
-  if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
-      CSStackSizeInc < MinOffset * (int64_t)Scale.getFixedValue() ||
-      CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue()) {
-    // If we are destroying the frame, make sure we add the increment after the
-    // last frame operation.
-    if (FrameFlag == MachineInstr::FrameDestroy) {
-      ++MBBI;
-      // Also skip the SEH instruction, if needed
-      if (NeedsWinCFI && AArch64InstrInfo::isSEHInstruction(*MBBI))
-        ++MBBI;
-    }
-    emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
-                    StackOffset::getFixed(CSStackSizeInc), TII, FrameFlag,
-                    false, NeedsWinCFI, HasWinCFI, EmitCFI,
-                    StackOffset::getFixed(CFAOffset));
-
-    return std::prev(MBBI);
-  }
-
-  // Get rid of the SEH code associated with the old instruction.
-  if (NeedsWinCFI) {
-    auto SEH = std::next(MBBI);
-    if (AArch64InstrInfo::isSEHInstruction(*SEH))
-      SEH->eraseFromParent();
-  }
-
-  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
-  MIB.addReg(AArch64::SP, RegState::Define);
-
-  // Copy all operands other than the immediate offset.
-  unsigned OpndIdx = 0;
-  for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
-       ++OpndIdx)
-    MIB.add(MBBI->getOperand(OpndIdx));
-
-  assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
-         "Unexpected immediate offset in first/last callee-save save/restore "
-         "instruction!");
-  assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
-         "Unexpected base register in callee-save save/restore instruction!");
-  assert(CSStackSizeInc % Scale == 0);
-  MIB.addImm(CSStackSizeInc / (int)Scale);
-
-  MIB.setMIFlags(MBBI->getFlags());
-  MIB.setMemRefs(MBBI->memoperands());
-
-  // Generate a new SEH code that corresponds to the new instruction.
-  if (NeedsWinCFI) {
-    *HasWinCFI = true;
-    InsertSEH(*MIB, *TII, FrameFlag);
-  }
-
-  if (EmitCFI)
-    CFIInstBuilder(MBB, MBBI, FrameFlag)
-        .buildDefCFAOffset(CFAOffset - CSStackSizeInc);
-
-  return std::prev(MBB.erase(MBBI));
-}
-
-void AArch64FrameLowering::fixupCalleeSaveRestoreStackOffset(
-    MachineInstr &MI, uint64_t LocalStackSize, bool NeedsWinCFI,
-    bool *HasWinCFI) const {
-  if (AArch64InstrInfo::isSEHInstruction(MI))
-    return;
-
-  unsigned Opc = MI.getOpcode();
-  unsigned Scale;
-  switch (Opc) {
-  case AArch64::STPXi:
-  case AArch64::STRXui:
-  case AArch64::STPDi:
-  case AArch64::STRDui:
-  case AArch64::LDPXi:
-  case AArch64::LDRXui:
-  case AArch64::LDPDi:
-  case AArch64::LDRDui:
-    Scale = 8;
-    break;
-  case AArch64::STPQi:
-  case AArch64::STRQui:
-  case AArch64::LDPQi:
-  case AArch64::LDRQui:
-    Scale = 16;
-    break;
-  default:
-    llvm_unreachable("Unexpected callee-save save/restore opcode!");
-  }
-
-  unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
-  assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
-         "Unexpected base register in callee-save save/restore instruction!");
-  // Last operand is immediate offset that needs fixing.
-  MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
-  // All generated opcodes have scaled offsets.
-  assert(LocalStackSize % Scale == 0);
-  OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
-
-  if (NeedsWinCFI) {
-    *HasWinCFI = true;
-    auto MBBI = std::next(MachineBasicBlock::iterator(MI));
-    assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
-    assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
-           "Expecting a SEH instruction");
-    fixupSEHOpcode(MBBI, LocalStackSize);
-  }
-}
-
 static bool isTargetWindows(const MachineFunction &MF) {
   return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
 }
@@ -1575,30 +1130,6 @@ static unsigned getStackHazardSize(const MachineFunction &MF) {
   return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
 }
 
-// Convenience function to determine whether I is an SVE callee save.
-bool AArch64FrameLowering::isSVECalleeSave(
-    MachineBasicBlock::iterator I) const {
-  switch (I->getOpcode()) {
-  default:
-    return false;
-  case AArch64::PTRUE_C_B:
-  case AArch64::LD1B_2Z_IMM:
-  case AArch64::ST1B_2Z_IMM:
-  case AArch64::STR_ZXI:
-  case AArch64::STR_PXI:
-  case AArch64::LDR_ZXI:
-  case AArch64::LDR_PXI:
-  case AArch64::PTRUE_B:
-  case AArch64::CPY_ZPzI_B:
-  case AArch64::CMPNE_PPzZI_B:
-    return I->getFlag(MachineInstr::FrameSetup) ||
-           I->getFlag(MachineInstr::FrameDestroy);
-  case AArch64::SEH_SavePReg:
-  case AArch64::SEH_SaveZReg:
-    return true;
-  }
-}
-
 void AArch64FrameLowering::emitPacRetPlusLeafHardening(
     MachineFunction &MF) const {
   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
@@ -2464,7 +1995,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
           MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
           MachineMemOperand::MOStore, Size, Alignment));
       if (NeedsWinCFI)
-        InsertSEH(MIB, TII, MachineInstr::FrameSetup);
+        insertSEH(MIB, TII, MachineInstr::FrameSetup);
     } else { // The code when the pair of ZReg is not present
       MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
       if (!MRI.isReserved(Reg1))
@@ -2486,7 +2017,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
           MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
           MachineMemOperand::MOStore, Size, Alignment));
       if (NeedsWinCFI)
-        InsertSEH(MIB, TII, MachineInstr::FrameSetup);
+        insertSEH(MIB, TII, MachineInstr::FrameSetup);
     }
     // Update the StackIDs of the SVE stack slots.
     MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -2615,7 +2146,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
           MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
           MachineMemOperand::MOLoad, Size, Alignment));
       if (NeedsWinCFI)
-        InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
+        insertSEH(MIB, TII, MachineInstr::FrameDestroy);
     } else {
       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(LdrOpc));
       if (RPI.isPaired()) {
@@ -2633,7 +2164,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
           MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
           MachineMemOperand::MOLoad, Size, Alignment));
       if (NeedsWinCFI)
-        InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
+        insertSEH(MIB, TII, MachineInstr::FrameDestroy);
     }
   }
   return true;

diff  --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 0825d03bcb0d8..7bba053111e89 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -147,6 +147,7 @@ class AArch64FrameLowering : public TargetFrameLowering {
 
   StackOffset getSVEStackSize(const MachineFunction &MF) const;
 
+  friend class AArch64PrologueEpilogueCommon;
   friend class AArch64PrologueEmitter;
   friend class AArch64EpilogueEmitter;
 
@@ -164,20 +165,10 @@ class AArch64FrameLowering : public TargetFrameLowering {
   /// Returns true if CSRs should be paired.
   bool producePairRegisters(MachineFunction &MF) const;
 
-  bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
-                                      uint64_t StackBumpBytes) const;
-
   int64_t estimateSVEStackObjectOffsets(MachineFrameInfo &MF) const;
   int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF,
                                       int &MinCSFrameIndex,
                                       int &MaxCSFrameIndex) const;
-  bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB,
-                                                uint64_t StackBumpBytes) const;
-  void allocateStackSpace(MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator MBBI,
-                          int64_t RealignmentPadding, StackOffset AllocSize,
-                          bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
-                          StackOffset InitialOffset, bool FollowupAllocs) const;
   /// Make a determination whether a Hazard slot is used and create it if
   /// needed.
   void determineStackHazardSlot(MachineFunction &MF,
@@ -214,6 +205,12 @@ class AArch64FrameLowering : public TargetFrameLowering {
   StackOffset getStackOffset(const MachineFunction &MF,
                              int64_t ObjectOffset) const;
 
+  // Given a load or a store instruction, generate an appropriate unwinding SEH
+  // code on Windows.
+  MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,
+                                        const TargetInstrInfo &TII,
+                                        MachineInstr::MIFlag Flag) const;
+
   /// Returns how much of the incoming argument stack area (in bytes) we should
   /// clean up in an epilogue. For the C calling convention this will be 0, for
   /// guaranteed tail call conventions it can be positive (a normal return or a
@@ -237,35 +234,11 @@ class AArch64FrameLowering : public TargetFrameLowering {
   Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
                                             bool HasCall = false) const;
 
-  // Convert callee-save register save/restore instruction to do stack pointer
-  // decrement/increment to allocate/deallocate the callee-save stack area by
-  // converting store/load to use pre/post increment version.
-  MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
-      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-      const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
-      bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
-      MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup,
-      int CFAOffset = 0) const;
-
-  // Fixup callee-save register save/restore instructions to take into account
-  // combined SP bump by adding the local stack size to the stack offsets.
-  void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
-                                         uint64_t LocalStackSize,
-                                         bool NeedsWinCFI,
-                                         bool *HasWinCFI) const;
-
-  bool isSVECalleeSave(MachineBasicBlock::iterator I) const;
-
   /// Returns the size of the fixed object area (allocated next to sp on entry)
   /// On Win64 this may include a var args area and an UnwindHelp object for EH.
   unsigned getFixedObjectSize(const MachineFunction &MF,
                               const AArch64FunctionInfo *AFI, bool IsWin64,
                               bool IsFunclet) const;
-
-  bool isVGInstruction(MachineBasicBlock::iterator MBBI,
-                       const TargetLowering &TLI) const;
-
-  bool requiresGetVGCall(const MachineFunction &MF) const;
 };
 
 } // End llvm namespace

diff  --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
index 700c45a8aec9a..7947469b6c04f 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
@@ -22,19 +22,312 @@ STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
 
 namespace llvm {
 
-AArch64PrologueEmitter::AArch64PrologueEmitter(MachineFunction &MF,
-                                               MachineBasicBlock &MBB,
-                                               const AArch64FrameLowering &AFL)
-    : MF(MF), MBB(MBB), F(MF.getFunction()), MFI(MF.getFrameInfo()),
+static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO,
+                         RTLIB::Libcall LC) {
+  return MO.isSymbol() &&
+         StringRef(TLI.getLibcallName(LC)) == MO.getSymbolName();
+}
+
+bool AArch64PrologueEpilogueCommon::requiresGetVGCall() const {
+  return AFI->hasStreamingModeChanges() &&
+         !MF.getSubtarget<AArch64Subtarget>().hasSVE();
+}
+
+bool AArch64PrologueEpilogueCommon::isVGInstruction(
+    MachineBasicBlock::iterator MBBI, const TargetLowering &TLI) const {
+  unsigned Opc = MBBI->getOpcode();
+  if (Opc == AArch64::CNTD_XPiI)
+    return true;
+
+  if (!requiresGetVGCall())
+    return false;
+
+  if (Opc == AArch64::BL)
+    return matchLibcall(TLI, MBBI->getOperand(0), RTLIB::SMEABI_GET_CURRENT_VG);
+
+  return Opc == TargetOpcode::COPY;
+}
+
+// Convenience function to determine whether I is an SVE callee save.
+static bool isSVECalleeSave(MachineBasicBlock::iterator I) {
+  switch (I->getOpcode()) {
+  default:
+    return false;
+  case AArch64::PTRUE_C_B:
+  case AArch64::LD1B_2Z_IMM:
+  case AArch64::ST1B_2Z_IMM:
+  case AArch64::STR_ZXI:
+  case AArch64::STR_PXI:
+  case AArch64::LDR_ZXI:
+  case AArch64::LDR_PXI:
+  case AArch64::PTRUE_B:
+  case AArch64::CPY_ZPzI_B:
+  case AArch64::CMPNE_PPzZI_B:
+    return I->getFlag(MachineInstr::FrameSetup) ||
+           I->getFlag(MachineInstr::FrameDestroy);
+  case AArch64::SEH_SavePReg:
+  case AArch64::SEH_SaveZReg:
+    return true;
+  }
+}
+
+AArch64PrologueEpilogueCommon::AArch64PrologueEpilogueCommon(
+    MachineFunction &MF, MachineBasicBlock &MBB,
+    const AArch64FrameLowering &AFL)
+    : MF(MF), MBB(MBB), MFI(MF.getFrameInfo()),
       Subtarget(MF.getSubtarget<AArch64Subtarget>()), AFL(AFL),
       RegInfo(*Subtarget.getRegisterInfo()) {
   TII = Subtarget.getInstrInfo();
   AFI = MF.getInfo<AArch64FunctionInfo>();
 
-  EmitCFI = AFI->needsDwarfUnwindInfo(MF);
-  EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
   HasFP = AFL.hasFP(MF);
   NeedsWinCFI = AFL.needsWinCFI(MF);
+}
+
+MachineBasicBlock::iterator
+AArch64PrologueEpilogueCommon::convertCalleeSaveRestoreToSPPrePostIncDec(
+    MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc,
+    bool EmitCFI, MachineInstr::MIFlag FrameFlag, int CFAOffset) const {
+  unsigned NewOpc;
+
+  // If the function contains streaming mode changes, we expect instructions
+  // to calculate the value of VG before spilling. Move past these instructions
+  // if necessary.
+  if (AFL.requiresSaveVG(MF)) {
+    auto &TLI = *Subtarget.getTargetLowering();
+    while (isVGInstruction(MBBI, TLI))
+      ++MBBI;
+  }
+
+  switch (MBBI->getOpcode()) {
+  default:
+    llvm_unreachable("Unexpected callee-save save/restore opcode!");
+  case AArch64::STPXi:
+    NewOpc = AArch64::STPXpre;
+    break;
+  case AArch64::STPDi:
+    NewOpc = AArch64::STPDpre;
+    break;
+  case AArch64::STPQi:
+    NewOpc = AArch64::STPQpre;
+    break;
+  case AArch64::STRXui:
+    NewOpc = AArch64::STRXpre;
+    break;
+  case AArch64::STRDui:
+    NewOpc = AArch64::STRDpre;
+    break;
+  case AArch64::STRQui:
+    NewOpc = AArch64::STRQpre;
+    break;
+  case AArch64::LDPXi:
+    NewOpc = AArch64::LDPXpost;
+    break;
+  case AArch64::LDPDi:
+    NewOpc = AArch64::LDPDpost;
+    break;
+  case AArch64::LDPQi:
+    NewOpc = AArch64::LDPQpost;
+    break;
+  case AArch64::LDRXui:
+    NewOpc = AArch64::LDRXpost;
+    break;
+  case AArch64::LDRDui:
+    NewOpc = AArch64::LDRDpost;
+    break;
+  case AArch64::LDRQui:
+    NewOpc = AArch64::LDRQpost;
+    break;
+  }
+  TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0);
+  int64_t MinOffset, MaxOffset;
+  bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo(
+      NewOpc, Scale, Width, MinOffset, MaxOffset);
+  (void)Success;
+  assert(Success && "unknown load/store opcode");
+
+  // If the first store isn't right where we want SP then we can't fold the
+  // update in so create a normal arithmetic instruction instead.
+  if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
+      CSStackSizeInc < MinOffset * (int64_t)Scale.getFixedValue() ||
+      CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue()) {
+    // If we are destroying the frame, make sure we add the increment after the
+    // last frame operation.
+    if (FrameFlag == MachineInstr::FrameDestroy) {
+      ++MBBI;
+      // Also skip the SEH instruction, if needed
+      if (NeedsWinCFI && AArch64InstrInfo::isSEHInstruction(*MBBI))
+        ++MBBI;
+    }
+    emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
+                    StackOffset::getFixed(CSStackSizeInc), TII, FrameFlag,
+                    false, NeedsWinCFI, &HasWinCFI, EmitCFI,
+                    StackOffset::getFixed(CFAOffset));
+
+    return std::prev(MBBI);
+  }
+
+  // Get rid of the SEH code associated with the old instruction.
+  if (NeedsWinCFI) {
+    auto SEH = std::next(MBBI);
+    if (AArch64InstrInfo::isSEHInstruction(*SEH))
+      SEH->eraseFromParent();
+  }
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
+  MIB.addReg(AArch64::SP, RegState::Define);
+
+  // Copy all operands other than the immediate offset.
+  unsigned OpndIdx = 0;
+  for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
+       ++OpndIdx)
+    MIB.add(MBBI->getOperand(OpndIdx));
+
+  assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
+         "Unexpected immediate offset in first/last callee-save save/restore "
+         "instruction!");
+  assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
+         "Unexpected base register in callee-save save/restore instruction!");
+  assert(CSStackSizeInc % Scale == 0);
+  MIB.addImm(CSStackSizeInc / (int)Scale);
+
+  MIB.setMIFlags(MBBI->getFlags());
+  MIB.setMemRefs(MBBI->memoperands());
+
+  // Generate a new SEH code that corresponds to the new instruction.
+  if (NeedsWinCFI) {
+    HasWinCFI = true;
+    AFL.insertSEH(*MIB, *TII, FrameFlag);
+  }
+
+  if (EmitCFI)
+    CFIInstBuilder(MBB, MBBI, FrameFlag)
+        .buildDefCFAOffset(CFAOffset - CSStackSizeInc);
+
+  return std::prev(MBB.erase(MBBI));
+}
+
+// Fix up the SEH opcode associated with the save/restore instruction.
+static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
+                           unsigned LocalStackSize) {
+  MachineOperand *ImmOpnd = nullptr;
+  unsigned ImmIdx = MBBI->getNumOperands() - 1;
+  switch (MBBI->getOpcode()) {
+  default:
+    llvm_unreachable("Fix the offset in the SEH instruction");
+  case AArch64::SEH_SaveFPLR:
+  case AArch64::SEH_SaveRegP:
+  case AArch64::SEH_SaveReg:
+  case AArch64::SEH_SaveFRegP:
+  case AArch64::SEH_SaveFReg:
+  case AArch64::SEH_SaveAnyRegQP:
+  case AArch64::SEH_SaveAnyRegQPX:
+    ImmOpnd = &MBBI->getOperand(ImmIdx);
+    break;
+  }
+  if (ImmOpnd)
+    ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
+}
+
+void AArch64PrologueEpilogueCommon::fixupCalleeSaveRestoreStackOffset(
+    MachineInstr &MI, uint64_t LocalStackSize) const {
+  if (AArch64InstrInfo::isSEHInstruction(MI))
+    return;
+
+  unsigned Opc = MI.getOpcode();
+  unsigned Scale;
+  switch (Opc) {
+  case AArch64::STPXi:
+  case AArch64::STRXui:
+  case AArch64::STPDi:
+  case AArch64::STRDui:
+  case AArch64::LDPXi:
+  case AArch64::LDRXui:
+  case AArch64::LDPDi:
+  case AArch64::LDRDui:
+    Scale = 8;
+    break;
+  case AArch64::STPQi:
+  case AArch64::STRQui:
+  case AArch64::LDPQi:
+  case AArch64::LDRQui:
+    Scale = 16;
+    break;
+  default:
+    llvm_unreachable("Unexpected callee-save save/restore opcode!");
+  }
+
+  unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
+  assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
+         "Unexpected base register in callee-save save/restore instruction!");
+  // Last operand is immediate offset that needs fixing.
+  MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
+  // All generated opcodes have scaled offsets.
+  assert(LocalStackSize % Scale == 0);
+  OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
+
+  if (NeedsWinCFI) {
+    HasWinCFI = true;
+    auto MBBI = std::next(MachineBasicBlock::iterator(MI));
+    assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
+    assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
+           "Expecting a SEH instruction");
+    fixupSEHOpcode(MBBI, LocalStackSize);
+  }
+}
+
+bool AArch64PrologueEpilogueCommon::shouldCombineCSRLocalStackBump(
+    uint64_t StackBumpBytes) const {
+  if (AFL.homogeneousPrologEpilog(MF))
+    return false;
+
+  if (AFI->getLocalStackSize() == 0)
+    return false;
+
+  // For WinCFI, if optimizing for size, prefer to not combine the stack bump
+  // (to force a stp with predecrement) to match the packed unwind format,
+  // provided that there actually are any callee saved registers to merge the
+  // decrement with.
+  // This is potentially marginally slower, but allows using the packed
+  // unwind format for functions that both have a local area and callee saved
+  // registers. Using the packed unwind format notably reduces the size of
+  // the unwind info.
+  if (AFL.needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
+      MF.getFunction().hasOptSize())
+    return false;
+
+  // 512 is the maximum immediate for stp/ldp that will be used for
+  // callee-save save/restores
+  if (StackBumpBytes >= 512 ||
+      AFL.windowsRequiresStackProbe(MF, StackBumpBytes))
+    return false;
+
+  if (MFI.hasVarSizedObjects())
+    return false;
+
+  if (RegInfo.hasStackRealignment(MF))
+    return false;
+
+  // This isn't strictly necessary, but it simplifies things a bit since the
+  // current RedZone handling code assumes the SP is adjusted by the
+  // callee-save save/restore code.
+  if (AFL.canUseRedZone(MF))
+    return false;
+
+  // When there is an SVE area on the stack, always allocate the
+  // callee-saves and spills/locals separately.
+  if (AFL.getSVEStackSize(MF))
+    return false;
+
+  return true;
+}
+
+AArch64PrologueEmitter::AArch64PrologueEmitter(MachineFunction &MF,
+                                               MachineBasicBlock &MBB,
+                                               const AArch64FrameLowering &AFL)
+    : AArch64PrologueEpilogueCommon(MF, MBB, AFL), F(MF.getFunction()) {
+  EmitCFI = AFI->needsDwarfUnwindInfo(MF);
+  EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
   IsFunclet = MBB.isEHFuncletEntry();
   HomPrologEpilog = AFL.homogeneousPrologEpilog(MF);
 
@@ -75,7 +368,7 @@ void AArch64PrologueEmitter::collectBlockLiveins() {
     // X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
     // This is necessary to spill VG if required where SVE is unavailable, but
     // X0 is preserved around this call.
-    if (AFL.requiresGetVGCall(MF))
+    if (requiresGetVGCall())
       LiveRegs.removeReg(AArch64::X0);
   }
 }
@@ -97,7 +390,142 @@ void AArch64PrologueEmitter::verifyPrologueClobbers() const {
 void AArch64PrologueEmitter::determineLocalsStackSize(
     uint64_t StackSize, uint64_t PrologueSaveSize) {
   AFI->setLocalStackSize(StackSize - PrologueSaveSize);
-  CombineSPBump = AFL.shouldCombineCSRLocalStackBump(MF, StackSize);
+  CombineSPBump = shouldCombineCSRLocalStackBump(StackSize);
+}
+
+// Return the maximum possible number of bytes for `Size` due to the
+// architectural limit on the size of a SVE register.
+static int64_t upperBound(StackOffset Size) {
+  static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
+  return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed();
+}
+
+void AArch64PrologueEmitter::allocateStackSpace(
+    MachineBasicBlock::iterator MBBI, int64_t RealignmentPadding,
+    StackOffset AllocSize, bool EmitCFI, StackOffset InitialOffset,
+    bool FollowupAllocs) {
+
+  if (!AllocSize)
+    return;
+
+  DebugLoc DL;
+  const int64_t MaxAlign = MFI.getMaxAlign().value();
+  const uint64_t AndMask = ~(MaxAlign - 1);
+
+  if (!Subtarget.getTargetLowering()->hasInlineStackProbe(MF)) {
+    Register TargetReg = RealignmentPadding
+                             ? AFL.findScratchNonCalleeSaveRegister(&MBB)
+                             : AArch64::SP;
+    // SUB Xd/SP, SP, AllocSize
+    emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
+                    MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
+                    EmitCFI, InitialOffset);
+
+    if (RealignmentPadding) {
+      // AND SP, X9, 0b11111...0000
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
+          .addReg(TargetReg, RegState::Kill)
+          .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
+          .setMIFlags(MachineInstr::FrameSetup);
+      AFI->setStackRealigned(true);
+
+      // No need for SEH instructions here; if we're realigning the stack,
+      // we've set a frame pointer and already finished the SEH prologue.
+      assert(!NeedsWinCFI);
+    }
+    return;
+  }
+
+  //
+  // Stack probing allocation.
+  //
+
+  // Fixed length allocation. If we don't need to re-align the stack and don't
+  // have SVE objects, we can use a more efficient sequence for stack probing.
+  if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) {
+    Register ScratchReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
+    assert(ScratchReg != AArch64::NoRegister);
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC))
+        .addDef(ScratchReg)
+        .addImm(AllocSize.getFixed())
+        .addImm(InitialOffset.getFixed())
+        .addImm(InitialOffset.getScalable());
+    // The fixed allocation may leave unprobed bytes at the top of the
+    // stack. If we have subsequent allocation (e.g. if we have variable-sized
+    // objects), we need to issue an extra probe, so these allocations start in
+    // a known state.
+    if (FollowupAllocs) {
+      // STR XZR, [SP]
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
+          .addReg(AArch64::XZR)
+          .addReg(AArch64::SP)
+          .addImm(0)
+          .setMIFlags(MachineInstr::FrameSetup);
+    }
+
+    return;
+  }
+
+  // Variable length allocation.
+
+  // If the (unknown) allocation size cannot exceed the probe size, decrement
+  // the stack pointer right away.
+  int64_t ProbeSize = AFI->getStackProbeSize();
+  if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {
+    Register ScratchReg = RealignmentPadding
+                              ? AFL.findScratchNonCalleeSaveRegister(&MBB)
+                              : AArch64::SP;
+    assert(ScratchReg != AArch64::NoRegister);
+    // SUB Xd, SP, AllocSize
+    emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, TII,
+                    MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
+                    EmitCFI, InitialOffset);
+    if (RealignmentPadding) {
+      // AND SP, Xn, 0b11111...0000
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
+          .addReg(ScratchReg, RegState::Kill)
+          .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
+          .setMIFlags(MachineInstr::FrameSetup);
+      AFI->setStackRealigned(true);
+    }
+    if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding >
+                              AArch64::StackProbeMaxUnprobedStack) {
+      // STR XZR, [SP]
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
+          .addReg(AArch64::XZR)
+          .addReg(AArch64::SP)
+          .addImm(0)
+          .setMIFlags(MachineInstr::FrameSetup);
+    }
+    return;
+  }
+
+  // Emit a variable-length allocation probing loop.
+  // TODO: As an optimisation, the loop can be "unrolled" into a few parts,
+  // each of them guaranteed to adjust the stack by less than the probe size.
+  Register TargetReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
+  assert(TargetReg != AArch64::NoRegister);
+  // SUB Xd, SP, AllocSize
+  emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
+                  MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
+                  EmitCFI, InitialOffset);
+  if (RealignmentPadding) {
+    // AND Xn, Xn, 0b11111...0000
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), TargetReg)
+        .addReg(TargetReg, RegState::Kill)
+        .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
+        .setMIFlags(MachineInstr::FrameSetup);
+  }
+
+  BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC_VAR))
+      .addReg(TargetReg);
+  if (EmitCFI) {
+    // Set the CFA register back to SP.
+    CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
+        .buildDefCFARegister(AArch64::SP);
+  }
+  if (RealignmentPadding)
+    AFI->setStackRealigned(true);
 }
 
 void AArch64PrologueEmitter::emitPrologue() {
@@ -198,19 +626,16 @@ void AArch64PrologueEmitter::emitPrologue() {
     auto SaveSize =
         StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()) +
         StackOffset::getFixed(FixedObject);
-    AFL.allocateStackSpace(MBB, PrologueBeginI, 0, SaveSize, NeedsWinCFI,
-                           &HasWinCFI,
-                           /*EmitCFI=*/false, StackOffset{},
-                           /*FollowupAllocs=*/true);
+    allocateStackSpace(PrologueBeginI, 0, SaveSize, false, StackOffset{},
+                       /*FollowupAllocs=*/true);
     NumBytes -= FixedObject;
 
     // Now allocate space for the GPR callee saves.
     MachineBasicBlock::iterator MBBI = PrologueBeginI;
-    while (MBBI != EndI && AFL.isSVECalleeSave(MBBI))
+    while (MBBI != EndI && isSVECalleeSave(MBBI))
       ++MBBI;
-    FirstGPRSaveI = AFL.convertCalleeSaveRestoreToSPPrePostIncDec(
-        MBB, MBBI, DL, TII, -AFI->getCalleeSavedStackSize(), NeedsWinCFI,
-        &HasWinCFI, EmitAsyncCFI);
+    FirstGPRSaveI = convertCalleeSaveRestoreToSPPrePostIncDec(
+        MBBI, DL, -AFI->getCalleeSavedStackSize(), EmitAsyncCFI);
     NumBytes -= AFI->getCalleeSavedStackSize();
   } else if (CombineSPBump) {
     assert(!AFL.getSVEStackSize(MF) && "Cannot combine SP bump with SVE");
@@ -223,9 +648,8 @@ void AArch64PrologueEmitter::emitPrologue() {
     // Stack has been already adjusted.
     NumBytes -= PrologueSaveSize;
   } else if (PrologueSaveSize != 0) {
-    FirstGPRSaveI = AFL.convertCalleeSaveRestoreToSPPrePostIncDec(
-        MBB, PrologueBeginI, DL, TII, -PrologueSaveSize, NeedsWinCFI,
-        &HasWinCFI, EmitAsyncCFI);
+    FirstGPRSaveI = convertCalleeSaveRestoreToSPPrePostIncDec(
+        PrologueBeginI, DL, -PrologueSaveSize, EmitAsyncCFI);
     NumBytes -= PrologueSaveSize;
   }
   assert(NumBytes >= 0 && "Negative stack allocation size!?");
@@ -233,17 +657,17 @@ void AArch64PrologueEmitter::emitPrologue() {
   // Move past the saves of the callee-saved registers, fixing up the offsets
   // and pre-inc if we decided to combine the callee-save and local stack
   // pointer bump above.
-  auto &TLI = *MF.getSubtarget().getTargetLowering();
+  auto &TLI = *Subtarget.getTargetLowering();
 
   MachineBasicBlock::iterator AfterGPRSavesI = FirstGPRSaveI;
   while (AfterGPRSavesI != EndI &&
          AfterGPRSavesI->getFlag(MachineInstr::FrameSetup) &&
-         !AFL.isSVECalleeSave(AfterGPRSavesI)) {
+         !isSVECalleeSave(AfterGPRSavesI)) {
     if (CombineSPBump &&
         // Only fix-up frame-setup load/store instructions.
-        (!AFL.requiresSaveVG(MF) || !AFL.isVGInstruction(AfterGPRSavesI, TLI)))
-      AFL.fixupCalleeSaveRestoreStackOffset(
-          *AfterGPRSavesI, AFI->getLocalStackSize(), NeedsWinCFI, &HasWinCFI);
+        (!AFL.requiresSaveVG(MF) || !isVGInstruction(AfterGPRSavesI, TLI)))
+      fixupCalleeSaveRestoreStackOffset(*AfterGPRSavesI,
+                                        AFI->getLocalStackSize());
     ++AfterGPRSavesI;
   }
 
@@ -289,17 +713,17 @@ void AArch64PrologueEmitter::emitPrologue() {
     // allocated.
     if (!FPAfterSVECalleeSaves) {
       MachineBasicBlock::iterator CalleeSavesBegin = AfterGPRSavesI;
-      assert(AFL.isSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
-      while (AFL.isSVECalleeSave(AfterSVESavesI) &&
+      assert(isSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
+      while (isSVECalleeSave(AfterSVESavesI) &&
              AfterSVESavesI != MBB.getFirstTerminator())
         ++AfterSVESavesI;
       CalleeSavesEnd = AfterSVESavesI;
 
       StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
       // Allocate space for the callee saves (if any).
-      AFL.allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize,
-                             false, nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
-                             MFI.hasVarSizedObjects() || LocalsSize);
+      allocateStackSpace(CalleeSavesBegin, 0, SVECalleeSavesSize,
+                         EmitAsyncCFI && !HasFP, CFAOffset,
+                         MFI.hasVarSizedObjects() || LocalsSize);
     }
   }
   CFAOffset += SVECalleeSavesSize;
@@ -315,10 +739,10 @@ void AArch64PrologueEmitter::emitPrologue() {
     // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
     // the correct value here, as NumBytes also includes padding bytes,
     // which shouldn't be counted here.
-    AFL.allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding,
-                           SVELocalsSize + StackOffset::getFixed(NumBytes),
-                           NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
-                           CFAOffset, MFI.hasVarSizedObjects());
+    allocateStackSpace(CalleeSavesEnd, RealignmentPadding,
+                       SVELocalsSize + StackOffset::getFixed(NumBytes),
+                       EmitAsyncCFI && !HasFP, CFAOffset,
+                       MFI.hasVarSizedObjects());
   }
 
   // If we need a base pointer, set it up here. It's whatever the value of the
@@ -553,11 +977,10 @@ void AArch64PrologueEmitter::emitFramePointerSetup(
 // Define the current CFA rule to use the provided FP.
 void AArch64PrologueEmitter::emitDefineCFAWithFP(
     MachineBasicBlock::iterator MBBI, unsigned FixedObject) const {
-  const AArch64RegisterInfo *TRI = Subtarget.getRegisterInfo();
   const int OffsetToFirstCalleeSaveFromFP =
       AFI->getCalleeSaveBaseToFrameRecordOffset() -
       AFI->getCalleeSavedStackSize();
-  Register FramePtr = TRI->getFrameRegister(MF);
+  Register FramePtr = RegInfo.getFrameRegister(MF);
   CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
       .buildDefCFA(FramePtr, FixedObject - OffsetToFirstCalleeSaveFromFP);
 }
@@ -571,11 +994,10 @@ void AArch64PrologueEmitter::emitWindowsStackProbe(
   // Find an available register to spill the value of X15 to, if X15 is being
   // used already for nest.
   unsigned X15Scratch = AArch64::NoRegister;
-  const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
   if (llvm::any_of(MBB.liveins(),
-                   [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
-                     return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
-                         AArch64::X15, LiveIn.PhysReg);
+                   [this](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
+                     return RegInfo.isSuperOrSubRegisterEq(AArch64::X15,
+                                                           LiveIn.PhysReg);
                    })) {
     X15Scratch = AFL.findScratchNonCalleeSaveRegister(&MBB, /*HasCall=*/true);
     assert(X15Scratch != AArch64::NoRegister &&
@@ -729,9 +1151,6 @@ void AArch64PrologueEmitter::emitWindowsStackProbe(
 
 void AArch64PrologueEmitter::emitCalleeSavedGPRLocations(
     MachineBasicBlock::iterator MBBI) const {
-  MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo &MFI = MF.getFrameInfo();
-
   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
   if (CSI.empty())
     return;
@@ -750,17 +1169,11 @@ void AArch64PrologueEmitter::emitCalleeSavedGPRLocations(
 
 void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
     MachineBasicBlock::iterator MBBI) const {
-  MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo &MFI = MF.getFrameInfo();
-
   // Add callee saved registers to move list.
   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
   if (CSI.empty())
     return;
 
-  const TargetSubtargetInfo &STI = MF.getSubtarget();
-  const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
-  AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
   CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
 
   std::optional<int64_t> IncomingVGOffsetFromDefCFA;
@@ -779,15 +1192,15 @@ void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
     // common denominator.
     assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
     MCRegister Reg = Info.getReg();
-    if (!static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
+    if (!RegInfo.regNeedsCFI(Reg, Reg))
       continue;
 
     StackOffset Offset =
         StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
-        StackOffset::getFixed(AFI.getCalleeSavedStackSize(MFI));
+        StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI));
 
     CFIBuilder.insertCFIInst(
-        createCFAOffset(TRI, Reg, Offset, IncomingVGOffsetFromDefCFA));
+        createCFAOffset(RegInfo, Reg, Offset, IncomingVGOffsetFromDefCFA));
   }
 }
 
@@ -804,13 +1217,9 @@ static bool isFuncletReturnInstr(const MachineInstr &MI) {
 AArch64EpilogueEmitter::AArch64EpilogueEmitter(MachineFunction &MF,
                                                MachineBasicBlock &MBB,
                                                const AArch64FrameLowering &AFL)
-    : MF(MF), MBB(MBB), MFI(MF.getFrameInfo()),
-      Subtarget(MF.getSubtarget<AArch64Subtarget>()), AFL(AFL) {
-  TII = Subtarget.getInstrInfo();
-  AFI = MF.getInfo<AArch64FunctionInfo>();
-
-  NeedsWinCFI = AFL.needsWinCFI(MF);
+    : AArch64PrologueEpilogueCommon(MF, MBB, AFL) {
   EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
+  HomPrologEpilog = AFL.homogeneousPrologEpilog(MF, &MBB);
   SEHEpilogueStartI = MBB.end();
 }
 
@@ -845,7 +1254,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
   if (MF.hasEHFunclets())
     AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
 
-  if (AFL.homogeneousPrologEpilog(MF, &MBB)) {
+  if (HomPrologEpilog) {
     assert(!NeedsWinCFI);
     auto FirstHomogenousEpilogI = MBB.getFirstTerminator();
     if (FirstHomogenousEpilogI != MBB.begin()) {
@@ -868,8 +1277,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
   bool FPAfterSVECalleeSaves =
       Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();
 
-  bool CombineSPBump =
-      AFL.shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
+  bool CombineSPBump = shouldCombineCSRLocalStackBump(NumBytes);
   // Assume we can't combine the last pop with the sp restore.
   bool CombineAfterCSRBump = false;
   if (FPAfterSVECalleeSaves) {
@@ -886,9 +1294,9 @@ void AArch64EpilogueEmitter::emitEpilogue() {
     // allocate more stack for arguments (in space that an untimely interrupt
     // may clobber), convert it to a post-index ldp.
     if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
-      AFL.convertCalleeSaveRestoreToSPPrePostIncDec(
-          MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, EmitCFI,
-          MachineInstr::FrameDestroy, PrologueSaveSize);
+      convertCalleeSaveRestoreToSPPrePostIncDec(
+          Pop, DL, PrologueSaveSize, EmitCFI, MachineInstr::FrameDestroy,
+          PrologueSaveSize);
     } else {
       // If not, make sure to emit an add after the last ldp.
       // We're doing this by transferring the size to be restored from the
@@ -907,12 +1315,12 @@ void AArch64EpilogueEmitter::emitEpilogue() {
   while (FirstGPRRestoreI != Begin) {
     --FirstGPRRestoreI;
     if (!FirstGPRRestoreI->getFlag(MachineInstr::FrameDestroy) ||
-        (!FPAfterSVECalleeSaves && AFL.isSVECalleeSave(FirstGPRRestoreI))) {
+        (!FPAfterSVECalleeSaves && isSVECalleeSave(FirstGPRRestoreI))) {
       ++FirstGPRRestoreI;
       break;
     } else if (CombineSPBump)
-      AFL.fixupCalleeSaveRestoreStackOffset(
-          *FirstGPRRestoreI, AFI->getLocalStackSize(), NeedsWinCFI, &HasWinCFI);
+      fixupCalleeSaveRestoreStackOffset(*FirstGPRRestoreI,
+                                        AFI->getLocalStackSize());
   }
 
   if (NeedsWinCFI) {
@@ -928,7 +1336,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
     --SEHEpilogueStartI;
   }
 
-  if (AFL.hasFP(MF) && AFI->hasSwiftAsyncContext())
+  if (HasFP && AFI->hasSwiftAsyncContext())
     emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
 
   const StackOffset &SVEStackSize = AFL.getSVEStackSize(MF);
@@ -938,7 +1346,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
     assert(!SVEStackSize && "Cannot combine SP bump with SVE");
 
     // When we are about to restore the CSRs, the CFA register is SP again.
-    if (EmitCFI && AFL.hasFP(MF))
+    if (EmitCFI && HasFP)
       CFIInstBuilder(MBB, FirstGPRRestoreI, MachineInstr::FrameDestroy)
           .buildDefCFA(AArch64::SP, NumBytes);
 
@@ -963,12 +1371,11 @@ void AArch64EpilogueEmitter::emitEpilogue() {
 
     RestoreBegin = std::prev(RestoreEnd);
     while (RestoreBegin != MBB.begin() &&
-           AFL.isSVECalleeSave(std::prev(RestoreBegin)))
+           isSVECalleeSave(std::prev(RestoreBegin)))
       --RestoreBegin;
 
-    assert(AFL.isSVECalleeSave(RestoreBegin) &&
-           AFL.isSVECalleeSave(std::prev(RestoreEnd)) &&
-           "Unexpected instruction");
+    assert(isSVECalleeSave(RestoreBegin) &&
+           isSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
 
     StackOffset CalleeSavedSizeAsOffset =
         StackOffset::getScalable(CalleeSavedSize);
@@ -1016,8 +1423,8 @@ void AArch64EpilogueEmitter::emitEpilogue() {
         // If we have have an non-zero offset to the non-SVE CS base we need to
         // compute the base address by subtracting the offest in a temporary
         // register first (to avoid briefly deallocating the SVE CS).
-        CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister(
-            &AArch64::GPR64RegClass);
+        CalleeSaveBase =
+            MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
         emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
                         StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
                         MachineInstr::FrameDestroy);
@@ -1034,20 +1441,20 @@ void AArch64EpilogueEmitter::emitEpilogue() {
         emitFrameOffset(
             MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
             StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy,
-            false, NeedsWinCFI, &HasWinCFI, EmitCFI && !AFL.hasFP(MF),
+            false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
             SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize));
         NumBytes = 0;
       }
 
       emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
                       DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
-                      NeedsWinCFI, &HasWinCFI, EmitCFI && !AFL.hasFP(MF),
+                      NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
                       SVEStackSize +
                           StackOffset::getFixed(NumBytes + PrologueSaveSize));
 
       emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
                       DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
-                      NeedsWinCFI, &HasWinCFI, EmitCFI && !AFL.hasFP(MF),
+                      NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
                       DeallocateAfter +
                           StackOffset::getFixed(NumBytes + PrologueSaveSize));
     }
@@ -1055,7 +1462,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
       emitCalleeSavedSVERestores(RestoreEnd);
   }
 
-  if (!AFL.hasFP(MF)) {
+  if (!HasFP) {
     bool RedZone = AFL.canUseRedZone(MF);
     // If this was a redzone leaf function, we don't need to restore the
     // stack pointer (but we may need to pop stack args for fastcc).
@@ -1100,7 +1507,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
                     MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
 
   // When we are about to restore the CSRs, the CFA register is SP again.
-  if (EmitCFI && AFL.hasFP(MF))
+  if (EmitCFI && HasFP)
     CFIInstBuilder(MBB, FirstGPRRestoreI, MachineInstr::FrameDestroy)
         .buildDefCFA(AArch64::SP, PrologueSaveSize);
 
@@ -1119,6 +1526,39 @@ void AArch64EpilogueEmitter::emitEpilogue() {
   }
 }
 
+bool AArch64EpilogueEmitter::shouldCombineCSRLocalStackBump(
+    uint64_t StackBumpBytes) const {
+  if (!AArch64PrologueEpilogueCommon::shouldCombineCSRLocalStackBump(
+          StackBumpBytes))
+    return false;
+  if (MBB.empty())
+    return true;
+
+  // Disable combined SP bump if the last instruction is an MTE tag store. It
+  // is almost always better to merge SP adjustment into those instructions.
+  MachineBasicBlock::iterator LastI = MBB.getFirstTerminator();
+  MachineBasicBlock::iterator Begin = MBB.begin();
+  while (LastI != Begin) {
+    --LastI;
+    if (LastI->isTransient())
+      continue;
+    if (!LastI->getFlag(MachineInstr::FrameDestroy))
+      break;
+  }
+  switch (LastI->getOpcode()) {
+  case AArch64::STGloop:
+  case AArch64::STZGloop:
+  case AArch64::STGi:
+  case AArch64::STZGi:
+  case AArch64::ST2Gi:
+  case AArch64::STZ2Gi:
+    return false;
+  default:
+    return true;
+  }
+  llvm_unreachable("unreachable");
+}
+
 void AArch64EpilogueEmitter::emitSwiftAsyncContextFramePointer(
     MachineBasicBlock::iterator MBBI, const DebugLoc &DL) const {
   switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
@@ -1174,8 +1614,6 @@ void AArch64EpilogueEmitter::emitCalleeSavedRestores(
   if (CSI.empty())
     return;
 
-  const TargetSubtargetInfo &STI = MF.getSubtarget();
-  const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
   CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameDestroy);
 
   for (const auto &Info : CSI) {
@@ -1184,8 +1622,7 @@ void AArch64EpilogueEmitter::emitCalleeSavedRestores(
       continue;
 
     MCRegister Reg = Info.getReg();
-    if (SVE &&
-        !static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
+    if (SVE && !RegInfo.regNeedsCFI(Reg, Reg))
       continue;
 
     CFIBuilder.buildRestore(Info.getReg());

diff  --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
index 20bbffcdb33f2..a1c9b34a77c3f 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
@@ -22,15 +22,65 @@
 
 namespace llvm {
 
+class TargetLowering;
 class AArch64Subtarget;
 class AArch64FunctionInfo;
 class AArch64FrameLowering;
 
+class AArch64PrologueEpilogueCommon {
+public:
+  AArch64PrologueEpilogueCommon(MachineFunction &MF, MachineBasicBlock &MBB,
+                                const AArch64FrameLowering &AFL);
+
+protected:
+  bool requiresGetVGCall() const;
+
+  bool isVGInstruction(MachineBasicBlock::iterator MBBI,
+                       const TargetLowering &TLI) const;
+
+  // Convert callee-save register save/restore instruction to do stack pointer
+  // decrement/increment to allocate/deallocate the callee-save stack area by
+  // converting store/load to use pre/post increment version.
+  MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
+      MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc,
+      bool EmitCFI, MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup,
+      int CFAOffset = 0) const;
+
+  // Fixup callee-save register save/restore instructions to take into account
+  // combined SP bump by adding the local stack size to the stack offsets.
+  void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
+                                         uint64_t LocalStackSize) const;
+
+  bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const;
+
+  MachineFunction &MF;
+  MachineBasicBlock &MBB;
+
+  const MachineFrameInfo &MFI;
+  const AArch64Subtarget &Subtarget;
+  const AArch64FrameLowering &AFL;
+  const AArch64RegisterInfo &RegInfo;
+
+  // Common flags. These generally should not change outside of the (possibly
+  // derived) constructor.
+  bool HasFP = false;
+  bool EmitCFI = false;     // Note: Set in derived constructors.
+  bool IsFunclet = false;   // Note: Set in derived constructors.
+  bool NeedsWinCFI = false; // Note: Can be changed in emitFramePointerSetup.
+  bool HomPrologEpilog = false; // Note: Set in derived constructors.
+
+  // Note: "HasWinCFI" is mutable as it can change in any "emit" function.
+  mutable bool HasWinCFI = false;
+
+  const TargetInstrInfo *TII = nullptr;
+  AArch64FunctionInfo *AFI = nullptr;
+};
+
 /// A helper class for emitting the prologue. Substantial new functionality
 /// should be factored into a new method. Where possible "emit*" methods should
 /// be const, and any flags that change how the prologue is emitted should be
 /// set in the constructor.
-class AArch64PrologueEmitter {
+class AArch64PrologueEmitter final : public AArch64PrologueEpilogueCommon {
 public:
   AArch64PrologueEmitter(MachineFunction &MF, MachineBasicBlock &MBB,
                          const AArch64FrameLowering &AFL);
@@ -46,6 +96,11 @@ class AArch64PrologueEmitter {
   }
 
 private:
+  void allocateStackSpace(MachineBasicBlock::iterator MBBI,
+                          int64_t RealignmentPadding, StackOffset AllocSize,
+                          bool EmitCFI, StackOffset InitialOffset,
+                          bool FollowupAllocs);
+
   void emitShadowCallStackPrologue(MachineBasicBlock::iterator MBBI,
                                    const DebugLoc &DL) const;
 
@@ -71,14 +126,7 @@ class AArch64PrologueEmitter {
 
   void determineLocalsStackSize(uint64_t StackSize, uint64_t PrologueSaveSize);
 
-  MachineFunction &MF;
-  MachineBasicBlock &MBB;
-
   const Function &F;
-  const MachineFrameInfo &MFI;
-  const AArch64Subtarget &Subtarget;
-  const AArch64FrameLowering &AFL;
-  const AArch64RegisterInfo &RegInfo;
 
 #ifndef NDEBUG
   mutable LivePhysRegs LiveRegs{RegInfo};
@@ -89,29 +137,16 @@ class AArch64PrologueEmitter {
 #endif
 
   // Prologue flags. These generally should not change outside of the
-  // constructor. Two exceptions are "CombineSPBump" which is set in
-  // determineLocalsStackSize, and "NeedsWinCFI" which is set in
-  // emitFramePointerSetup.
-  bool EmitCFI = false;
+  // constructor.
   bool EmitAsyncCFI = false;
-  bool HasFP = false;
-  bool IsFunclet = false;
-  bool CombineSPBump = false;
-  bool HomPrologEpilog = false;
-  bool NeedsWinCFI = false;
-
-  // Note: "HasWinCFI" is mutable as it can change in any "emit" function.
-  mutable bool HasWinCFI = false;
-
-  const TargetInstrInfo *TII = nullptr;
-  AArch64FunctionInfo *AFI = nullptr;
+  bool CombineSPBump = false; // Note: This is set in determineLocalsStackSize.
 };
 
 /// A helper class for emitting the epilogue. Substantial new functionality
 /// should be factored into a new method. Where possible "emit*" methods should
 /// be const, and any flags that change how the epilogue is emitted should be
 /// set in the constructor.
-class AArch64EpilogueEmitter {
+class AArch64EpilogueEmitter final : public AArch64PrologueEpilogueCommon {
 public:
   AArch64EpilogueEmitter(MachineFunction &MF, MachineBasicBlock &MBB,
                          const AArch64FrameLowering &AFL);
@@ -122,6 +157,8 @@ class AArch64EpilogueEmitter {
   ~AArch64EpilogueEmitter() { finalizeEpilogue(); }
 
 private:
+  bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const;
+
   void emitSwiftAsyncContextFramePointer(MachineBasicBlock::iterator MBBI,
                                          const DebugLoc &DL) const;
 
@@ -141,27 +178,8 @@ class AArch64EpilogueEmitter {
 
   void finalizeEpilogue() const;
 
-  MachineFunction &MF;
-  MachineBasicBlock &MBB;
-
-  const MachineFrameInfo &MFI;
-  const AArch64Subtarget &Subtarget;
-  const AArch64FrameLowering &AFL;
-
-  // Epilogue flags. These generally should not change outside of the
-  // constructor (or early in emitEpilogue).
-  bool NeedsWinCFI = false;
-  bool EmitCFI = false;
-  bool IsFunclet = false;
-
-  // Note: "HasWinCFI" is mutable as it can change in any "emit" function.
-  mutable bool HasWinCFI = false;
-
-  const TargetInstrInfo *TII = nullptr;
-  AArch64FunctionInfo *AFI = nullptr;
-
-  DebugLoc DL;
   MachineBasicBlock::iterator SEHEpilogueStartI;
+  DebugLoc DL;
 };
 
 } // namespace llvm


        


More information about the llvm-commits mailing list