[llvm] Spill/restore FP/BP around instructions in which they are clobbered (PR #81048)

Sun Aug 4 20:11:36 PDT 2024

https://github.com/weiguozhi updated https://github.com/llvm/llvm-project/pull/81048

>From a302b282532aa286ac68b9dd8f365fa6bcf889cb Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Wed, 7 Feb 2024 22:23:45 +0000
Subject: [PATCH 1/6] Spill/restore FP/BP around instructions in which they are
 clobbered

This patch fixes https://github.com/llvm/llvm-project/issues/17204.

If a base pointer is used in a function, and it is clobbered by an
instruction (typically an inline asm), current register allocator can't
handle this situation, so BP becomes garbage after those instructions.
It can also occur to FP in theory.

We can spill and reload FP/BP registers around those instructions. But
normal spill/reload instructions also use FP/BP, so we can't spill them
into normal spill slots, instead we spill them into the top of stack by
using SP register.
---
 llvm/include/llvm/CodeGen/MachineFrameInfo.h  |   1 +
 .../llvm/CodeGen/TargetFrameLowering.h        |  25 ++++
 .../include/llvm/CodeGen/TargetRegisterInfo.h |  10 ++
 llvm/lib/CodeGen/PrologEpilogInserter.cpp     | 124 ++++++++++++++++++
 llvm/lib/Target/X86/X86FrameLowering.cpp      | 111 ++++++++++++++++
 llvm/lib/Target/X86/X86FrameLowering.h        |  14 ++
 llvm/lib/Target/X86/X86RegisterInfo.h         |   6 +
 llvm/test/CodeGen/X86/clobber_base_ptr.ll     |  59 +++++++++
 llvm/test/CodeGen/X86/i386-baseptr.ll         |   4 +
 llvm/test/CodeGen/X86/x86-64-baseptr.ll       |  12 ++
 10 files changed, 366 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/clobber_base_ptr.ll

diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index 466fed7fb3a29..3534e50253af6 100644
--- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -601,6 +601,7 @@ class MachineFrameInfo {
   /// Return the alignment in bytes that this function must be aligned to,
   /// which is greater than the default stack alignment provided by the target.
   Align getMaxAlign() const { return MaxAlignment; }
+  Align getStackAlignment() const { return StackAlignment; }
 
   /// Make sure the function is at least Align bytes aligned.
   void ensureMaxAlignment(Align Alignment);
diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index 0b9cacecc7cbe..e8ff1d69b4eb1 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -466,6 +466,31 @@ class TargetFrameLowering {
   /// Return the frame base information to be encoded in the DWARF subprogram
   /// debug info.
   virtual DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const;
+
+  /// Issue instructions to allocate stack space and spill FP and/or BP to stack
+  /// using SP register.
+  virtual void spillFPBPUsingSP(MachineFunction &MF,
+                                const MachineBasicBlock::iterator BeforeMI,
+                                bool SpillFP, bool SpillBP) const {
+    report_fatal_error("spillFPBPUsingSP is not implemented for this target!");
+  }
+
+  /// Issue instructions to restore FP and/or BP from stack using SP register,
+  /// and free stack space.
+  virtual void restoreFPBPUsingSP(MachineFunction &MF,
+                                  const MachineBasicBlock::iterator AfterMI,
+                                  bool SpillFP, bool SpillBP) const {
+    report_fatal_error("restoreFPBPUsingSP is not implemented for this "
+                       "target!");
+  }
+
+  // If MI uses fp/bp, but target can handle it, and doesn't want PEI to spill
+  // it, this function should return true, and update MI so PEI will not check
+  // any instructions from it and later.
+  virtual bool skipSpillFPBP(MachineFunction &MF,
+                             MachineBasicBlock::reverse_iterator &MI) const {
+    return false;
+  }
 };
 
 } // End llvm namespace
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 33c4c745c3416..c29901d5ffb7a 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -988,6 +988,16 @@ class TargetRegisterInfo : public MCRegisterInfo {
     return false;
   }
 
+  /// Returns true if the target needs a base register.
+  virtual bool hasBaseRegister(const MachineFunction &MF) const {
+    return false;
+  }
+
+  /// Returns the physical base register used to access stack object.
+  virtual Register getBaseRegister(const MachineFunction &MF) const {
+    return 0;
+  }
+
   /// Return true if target has reserved a spill slot in the stack frame of
   /// the given function for the specified register. e.g. On x86, if the frame
   /// register is required, the first fixed stack object is reserved as its
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 3db5e17615fd4..4b3e5860686b4 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -77,6 +77,15 @@ using MBBVector = SmallVector<MachineBasicBlock *, 4>;
 STATISTIC(NumLeafFuncWithSpills, "Number of leaf functions with CSRs");
 STATISTIC(NumFuncSeen, "Number of functions seen in PEI");
 
+static cl::opt<bool> SpillClobberedFP(
+    "spill-clobbered-fp",
+    cl::desc("Spill clobbered fp register to stack."),
+    cl::init(false), cl::Hidden);
+
+static cl::opt<bool> SpillClobberedBP(
+    "spill-clobbered-bp",
+    cl::desc("Spill clobbered bp register to stack."),
+    cl::init(true), cl::Hidden);
 
 namespace {
 
@@ -122,6 +131,7 @@ class PEI : public MachineFunctionPass {
   void calculateCallFrameInfo(MachineFunction &MF);
   void calculateSaveRestoreBlocks(MachineFunction &MF);
   void spillCalleeSavedRegs(MachineFunction &MF);
+  void spillFrameBasePointer(MachineFunction &MF);
 
   void calculateFrameObjectOffsets(MachineFunction &MF);
   void replaceFrameIndices(MachineFunction &MF);
@@ -228,6 +238,9 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
   FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF);
   ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
 
+  // Handle FP and BP spilling and restoring, for instructions that need it.
+  spillFrameBasePointer(MF);
+
   // Calculate the MaxCallFrameSize value for the function's frame
   // information. Also eliminates call frame pseudo instructions.
   calculateCallFrameInfo(MF);
@@ -1571,3 +1584,114 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
       ++I;
   }
 }
+
+static bool accessFrameBasePointer(const MachineInstr &MI, Register FP,
+                                   Register BP, bool &AccessFP, bool &AccessBP,
+                                   const TargetRegisterInfo *TRI) {
+  AccessFP = AccessBP = false;
+  if (FP) {
+    if (MI.findRegisterUseOperandIdx(FP, false, TRI) != -1 ||
+        MI.findRegisterDefOperandIdx(FP, false, true, TRI) != -1)
+      AccessFP = true;
+  }
+  if (BP) {
+    if (MI.findRegisterUseOperandIdx(BP, false, TRI) != -1 ||
+        MI.findRegisterDefOperandIdx(BP, false, true, TRI) != -1)
+      AccessBP = true;
+  }
+  return AccessFP || AccessBP;
+}
+
+/// If a function uses base pointer and the base pointer is clobbered by inline
+/// asm, RA doesn't detect this case, and after the inline asm, the base pointer
+/// cotains garbage value.
+/// For example if a 32b x86 function uses base pointer esi, and esi is
+/// clobbered by following inline asm
+///     asm("rep movsb" : "+D"(ptr), "+S"(x), "+c"(c)::"memory");
+/// We need to save esi before the asm and restore it after the asm.
+///
+/// The problem can also occur to frame pointer if there is a function call, and
+/// the callee uses a different calling convention and clobbers the fp.
+///
+/// Because normal frame objects (spill slots) are accessed through fp/bp
+/// register, so we can't spill fp/bp to normal spill slots.
+///
+/// FIXME: There are 2 possible enhancements:
+/// 1. In many cases there are different physical registers not clobbered by
+/// inline asm, we can use one of them as base pointer. Or use a virtual
+/// register as base pointer and let RA allocate a physical register to it.
+/// 2. If there is no other instructions access stack with fp/bp from the
+/// inline asm to the epilog, we can skip the save and restore operations.
+void PEI::spillFrameBasePointer(MachineFunction &MF) {
+  Register FP, BP;
+  const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  if (TFI.hasFP(MF) && SpillClobberedFP)
+    FP = TRI->getFrameRegister(MF);
+  if (TRI->hasBaseRegister(MF) && SpillClobberedBP)
+    BP = TRI->getBaseRegister(MF);
+
+  for (MachineBasicBlock &MBB : MF) {
+    auto MI = MBB.rbegin(), ME = MBB.rend();
+    while (MI != ME) {
+      // Skip frame setup/destroy instructions.
+      // Skip instructions handled by target.
+      if (MI->getFlag(MachineInstr::MIFlag::FrameSetup) ||
+          MI->getFlag(MachineInstr::MIFlag::FrameDestroy) ||
+          TFI.skipSpillFPBP(MF, MI)) {
+        ++MI;
+        continue;
+      }
+
+      bool AccessFP, AccessBP;
+      // Check if fp or bp is used in MI.
+      if (!accessFrameBasePointer(*MI, FP, BP, AccessFP, AccessBP, TRI)) {
+        ++MI;
+        continue;
+      }
+
+      // Look for the range [Start, Stop] in which fp or bp is defined and used.
+      bool FPLive = false, BPLive = false;
+      bool SpillFP = false, SpillBP = false;
+      auto Start = MI, Stop = MI;
+      do {
+        SpillFP |= AccessFP;
+        SpillBP |= AccessBP;
+
+        // Maintain FPLive and BPLive.
+        if (FPLive && MI->findRegisterDefOperandIdx(FP, false, true, TRI) != -1)
+          FPLive = false;
+        if (FP && MI->findRegisterUseOperandIdx(FP, false, TRI) != -1)
+          FPLive = true;
+        if (BPLive && MI->findRegisterDefOperandIdx(BP, false, true, TRI) != -1)
+          BPLive = false;
+        if (BP && MI->findRegisterUseOperandIdx(BP, false, TRI) != -1)
+          BPLive = true;
+
+        Start = MI++;
+      } while ((MI != ME) && (FPLive || BPLive ||
+                accessFrameBasePointer(*MI, FP, BP, AccessFP, AccessBP, TRI)));
+
+      // If the bp is clobbered by a call, we should save and restore outside of
+      // the frame setup instructions.
+      if (Stop->isCall()) {
+        const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+        auto FrameSetup = std::next(Start);
+        while (FrameSetup != ME && !TII.isFrameSetup(*FrameSetup) &&
+               !FrameSetup->isCall())
+          ++FrameSetup;
+        if (FrameSetup != ME && TII.isFrameSetup(*FrameSetup)) {
+          while (!TII.isFrameInstr(*Stop))
+            --Stop;
+          Start = FrameSetup;
+          MI = Start;
+          ++MI;
+        }
+      }
+
+      // Call target functions to spill and restore FP and BP registers.
+      TFI.spillFPBPUsingSP(MF, &(*Start), SpillFP, SpillBP);
+      TFI.restoreFPBPUsingSP(MF, &(*Stop), SpillFP, SpillBP);
+    }
+  }
+}
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 8a263e1a0678b..d82125069604b 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -4231,3 +4231,114 @@ void X86FrameLowering::restoreWinEHStackPointersInParent(
                                   /*RestoreSP=*/IsSEH);
   }
 }
+
+static int computeSPAdjust4SpillFPBP(MachineFunction &MF,
+                                     const TargetRegisterClass *RC,
+                                     unsigned SpillRegNum) {
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  unsigned AllocSize = TRI->getSpillSize(*RC) * SpillRegNum;
+  Align StackAlign = MF.getFrameInfo().getStackAlignment();
+  unsigned AlignedSize = alignTo(AllocSize, StackAlign);
+  return AlignedSize - AllocSize;
+}
+
+void X86FrameLowering::spillFPBPUsingSP(
+    MachineFunction &MF, MachineBasicBlock::iterator BeforeMI,
+    bool SpillFP, bool SpillBP) const {
+  const TargetRegisterClass *RC;
+  unsigned RegNum = 0;
+  MachineBasicBlock *MBB = BeforeMI->getParent();
+  DebugLoc DL = BeforeMI->getDebugLoc();
+
+  // Spill FP.
+  if (SpillFP) {
+    Register FP = TRI->getFrameRegister(MF);
+    if (STI.isTarget64BitILP32())
+      FP = Register(getX86SubSuperRegister(FP, 64));
+    RC = TRI->getMinimalPhysRegClass(FP);
+    ++RegNum;
+
+    BuildMI(*MBB, BeforeMI, DL,
+            TII.get(getPUSHOpcode(MF.getSubtarget<X86Subtarget>())))
+        .addReg(FP);
+  }
+
+  // Spill BP.
+  if (SpillBP) {
+    Register BP = TRI->getBaseRegister(MF);
+    if (STI.isTarget64BitILP32())
+      BP = Register(getX86SubSuperRegister(BP, 64));
+    RC = TRI->getMinimalPhysRegClass(BP);
+    ++RegNum;
+
+    BuildMI(*MBB, BeforeMI, DL,
+            TII.get(getPUSHOpcode(MF.getSubtarget<X86Subtarget>())))
+        .addReg(BP);
+  }
+
+  // Make sure SP is aligned.
+  int SPAdjust = computeSPAdjust4SpillFPBP(MF, RC, RegNum);
+  if (SPAdjust)
+    emitSPUpdate(*MBB, BeforeMI, DL, -SPAdjust, false);
+}
+
+void X86FrameLowering::restoreFPBPUsingSP(
+    MachineFunction &MF, MachineBasicBlock::iterator AfterMI,
+    bool SpillFP, bool SpillBP) const {
+  Register FP, BP;
+  const TargetRegisterClass *RC;
+  unsigned RegNum = 0;
+  if (SpillFP) {
+    FP = TRI->getFrameRegister(MF);
+    if (STI.isTarget64BitILP32())
+      FP = Register(getX86SubSuperRegister(FP, 64));
+    RC = TRI->getMinimalPhysRegClass(FP);
+    ++RegNum;
+  }
+  if (SpillBP) {
+    BP = TRI->getBaseRegister(MF);
+    if (STI.isTarget64BitILP32())
+      BP = Register(getX86SubSuperRegister(BP, 64));
+    RC = TRI->getMinimalPhysRegClass(BP);
+    ++RegNum;
+  }
+
+  // Adjust SP so it points to spilled FP or BP.
+  MachineBasicBlock *MBB = AfterMI->getParent();
+  MachineBasicBlock::iterator Pos = std::next(AfterMI);
+  DebugLoc DL = AfterMI->getDebugLoc();
+  int SPAdjust = computeSPAdjust4SpillFPBP(MF, RC, RegNum);
+  if (SPAdjust)
+    emitSPUpdate(*MBB, Pos, DL, SPAdjust, false);
+
+  // Restore BP.
+  if (SpillBP) {
+    BuildMI(*MBB, Pos, DL,
+            TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), BP);
+  }
+
+  // Restore FP.
+  if (SpillFP) {
+    BuildMI(*MBB, Pos, DL,
+            TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), FP);
+  }
+}
+
+bool X86FrameLowering::skipSpillFPBP(
+    MachineFunction &MF, MachineBasicBlock::reverse_iterator &MI) const {
+  if (MI->getOpcode() == X86::LCMPXCHG16B_SAVE_RBX) {
+    // The pseudo instruction LCMPXCHG16B_SAVE_RBX is generated in the form
+    //     SaveRbx = COPY RBX
+    //     SaveRbx = LCMPXCHG16B_SAVE_RBX ..., SaveRbx, implicit-def rbx
+    // And later LCMPXCHG16B_SAVE_RBX is expanded to restore RBX from SaveRbx.
+    // We should skip this instruction sequence.
+    int FI;
+    unsigned Reg;
+    while (!(MI->getOpcode() == TargetOpcode::COPY &&
+             MI->getOperand(1).getReg() == X86::RBX) &&
+           !((Reg = TII.isStoreToStackSlot(*MI, FI)) && Reg == X86::RBX))
+      ++MI;
+    return true;
+  }
+  return false;
+}
diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
index 2dc9ecc6109d7..0d77933363ce4 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/llvm/lib/Target/X86/X86FrameLowering.h
@@ -103,6 +103,20 @@ class X86FrameLowering : public TargetFrameLowering {
                               MutableArrayRef<CalleeSavedInfo> CSI,
                               const TargetRegisterInfo *TRI) const override;
 
+  void
+  spillFPBPUsingSP(MachineFunction &MF,
+                   const MachineBasicBlock::iterator BeforeMI,
+                   bool SpillFP, bool SpillBP) const override;
+
+  void
+  restoreFPBPUsingSP(MachineFunction &MF,
+                     const MachineBasicBlock::iterator AfterMI,
+                     bool SpillFP, bool SpillBP) const override;
+
+  bool
+  skipSpillFPBP(MachineFunction &MF,
+                MachineBasicBlock::reverse_iterator &MI) const override;
+
   bool hasFP(const MachineFunction &MF) const override;
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
   bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override;
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h
index 7296a5f021e4a..cccb731d24354 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -133,6 +133,9 @@ class X86RegisterInfo final : public X86GenRegisterInfo {
   void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
 
   bool hasBasePointer(const MachineFunction &MF) const;
+  bool hasBaseRegister(const MachineFunction &MF) const override {
+    return hasBasePointer(MF);
+  }
 
   bool canRealignStack(const MachineFunction &MF) const override;
 
@@ -164,6 +167,9 @@ class X86RegisterInfo final : public X86GenRegisterInfo {
   unsigned getPtrSizedStackRegister(const MachineFunction &MF) const;
   Register getStackRegister() const { return StackPtr; }
   Register getBaseRegister() const { return BasePtr; }
+  Register getBaseRegister(const MachineFunction &MF) const override {
+    return BasePtr;
+  }
   /// Returns physical register used as frame pointer.
   /// This will always returns the frame pointer register, contrary to
   /// getFrameRegister() which returns the "base pointer" in situations
diff --git a/llvm/test/CodeGen/X86/clobber_base_ptr.ll b/llvm/test/CodeGen/X86/clobber_base_ptr.ll
new file mode 100644
index 0000000000000..29894b6d38906
--- /dev/null
+++ b/llvm/test/CodeGen/X86/clobber_base_ptr.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i386-pc-windows-gnu"
+
+; This function uses esi as base pointer, the inline asm clobbers esi, so we
+; should save esi using esp before the inline asm, and restore esi after the
+; inline asm.
+
+define i32 @foo() {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    .cfi_offset %ebp, -8
+; CHECK-NEXT:    movl %esp, %ebp
+; CHECK-NEXT:    .cfi_def_cfa_register %ebp
+; CHECK-NEXT:    pushl %edi
+; CHECK-NEXT:    pushl %esi
+; CHECK-NEXT:    andl $-16, %esp
+; CHECK-NEXT:    subl $16, %esp
+; CHECK-NEXT:    movl %esp, %esi
+; CHECK-NEXT:    .cfi_offset %esi, -16
+; CHECK-NEXT:    .cfi_offset %edi, -12
+; CHECK-NEXT:    movl $4, 12(%esi)
+; CHECK-NEXT:    movl 12(%esi), %eax
+; CHECK-NEXT:    addl $3, %eax
+; CHECK-NEXT:    andl $-4, %eax
+; CHECK-NEXT:    calll __alloca
+; CHECK-NEXT:    movl %esp, %eax
+; CHECK-NEXT:    andl $-16, %eax
+; CHECK-NEXT:    movl %eax, %esp
+; CHECK-NEXT:    movl $1, (%eax)
+; CHECK-NEXT:    leal 8(%esi), %edi
+; CHECK-NEXT:    movl $4, %ecx
+; CHECK-NEXT:    pushl %esi
+; CHECK-NEXT:    movl %eax, %esi
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    rep movsb (%esi), %es:(%edi)
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    movl 8(%esi), %eax
+; CHECK-NEXT:    leal -8(%ebp), %esp
+; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    popl %edi
+; CHECK-NEXT:    popl %ebp
+; CHECK-NEXT:    retl
+entry:
+  %size = alloca i32, align 4
+  %g = alloca i32, align 4
+  store volatile i32 4, ptr %size, align 4
+  %len = load volatile i32, ptr %size, align 4
+  %var_array = alloca i8, i32 %len, align 16
+  store i32 1, ptr %var_array, align 16
+  %nil = call { ptr, ptr, i32 } asm "rep movsb", "={di},={si},={cx},0,1,2,~{memory},~{dirflag},~{fpsr},~{flags}"(ptr %g, ptr %var_array, i32 4)
+  %retval = load i32, ptr %g, align 4
+  ret i32 %retval
+}
diff --git a/llvm/test/CodeGen/X86/i386-baseptr.ll b/llvm/test/CodeGen/X86/i386-baseptr.ll
index 08e4bde7353a4..777eb838b84cc 100644
--- a/llvm/test/CodeGen/X86/i386-baseptr.ll
+++ b/llvm/test/CodeGen/X86/i386-baseptr.ll
@@ -109,10 +109,14 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32
 ; CHECK-NEXT:    subl %eax, %edx
 ; CHECK-NEXT:    movl %edx, %esp
 ; CHECK-NEXT:    negl %eax
+; CHECK-NEXT:    pushl %esi
+; CHECK-NEXT:    subl $28, %esp
 ; CHECK-NEXT:    movl $405, %esi # imm = 0x195
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    addl $28, %esp
+; CHECK-NEXT:    popl %esi
 ; CHECK-NEXT:    movl $405, %ebx # imm = 0x195
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    nop
diff --git a/llvm/test/CodeGen/X86/x86-64-baseptr.ll b/llvm/test/CodeGen/X86/x86-64-baseptr.ll
index 8cda4ba2814bb..020004def6e7a 100644
--- a/llvm/test/CodeGen/X86/x86-64-baseptr.ll
+++ b/llvm/test/CodeGen/X86/x86-64-baseptr.ll
@@ -136,10 +136,14 @@ define void @clobber_base() #0 {
 ; X32ABI-NEXT:    subl %eax, %edx
 ; X32ABI-NEXT:    negl %eax
 ; X32ABI-NEXT:    movl %edx, %esp
+; X32ABI-NEXT:    pushq %rbx
+; X32ABI-NEXT:    subl $24, %esp
 ; X32ABI-NEXT:    movl $405, %ebx # imm = 0x195
 ; X32ABI-NEXT:    #APP
 ; X32ABI-NEXT:    nop
 ; X32ABI-NEXT:    #NO_APP
+; X32ABI-NEXT:    addl $24, %esp
+; X32ABI-NEXT:    popq %rbx
 ; X32ABI-NEXT:    movl $8, %edx
 ; X32ABI-NEXT:    #APP
 ; X32ABI-NEXT:    movl %edx, (%ebx)
@@ -268,6 +272,8 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32
 ; X32ABI-NEXT:    subl %eax, %edx
 ; X32ABI-NEXT:    negl %eax
 ; X32ABI-NEXT:    movl %edx, %esp
+; X32ABI-NEXT:    pushq %rbx
+; X32ABI-NEXT:    subl $24, %esp
 ; X32ABI-NEXT:    movl $405, %ebx # imm = 0x195
 ; X32ABI-NEXT:    #APP
 ; X32ABI-NEXT:    nop
@@ -275,6 +281,8 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32
 ; X32ABI-NEXT:    #APP
 ; X32ABI-NEXT:    nop
 ; X32ABI-NEXT:    #NO_APP
+; X32ABI-NEXT:    addl $24, %esp
+; X32ABI-NEXT:    popq %rbx
 ; X32ABI-NEXT:    movl $8, %edx
 ; X32ABI-NEXT:    #APP
 ; X32ABI-NEXT:    movl %edx, (%ebx)
@@ -385,10 +393,14 @@ define void @vmw_host_printf(ptr %fmt, ...) nounwind {
 ; X32ABI-NEXT:    movl $48, (%eax)
 ; X32ABI-NEXT:    movl $8, (%eax)
 ; X32ABI-NEXT:    xorl %eax, %eax
+; X32ABI-NEXT:    pushq %rbx
+; X32ABI-NEXT:    subl $24, %esp
 ; X32ABI-NEXT:    xorl %ebx, %ebx
 ; X32ABI-NEXT:    xorl %ecx, %ecx
 ; X32ABI-NEXT:    #APP
 ; X32ABI-NEXT:    #NO_APP
+; X32ABI-NEXT:    addl $24, %esp
+; X32ABI-NEXT:    popq %rbx
 ; X32ABI-NEXT:    leal -8(%ebp), %esp
 ; X32ABI-NEXT:    popq %rbx
 ; X32ABI-NEXT:    popq %rbp

>From 4b070311fcf5774f8435b1d9f09d42f2335a0631 Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Wed, 14 Feb 2024 01:01:48 +0000
Subject: [PATCH 2/6] Spill/restore FP/BP around instructions in which they are
 clobbered

Add a new test case to demostrate save/restore of FP register.
---
 llvm/test/CodeGen/X86/clobber_frame_ptr.ll | 49 ++++++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/clobber_frame_ptr.ll

diff --git a/llvm/test/CodeGen/X86/clobber_frame_ptr.ll b/llvm/test/CodeGen/X86/clobber_frame_ptr.ll
new file mode 100644
index 0000000000000..591676bf3f15c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/clobber_frame_ptr.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=x86_64-pc-linux -stackrealign -spill-clobbered-fp=true -verify-machineinstrs < %s | FileCheck %s
+
+; Calling convention ghccc uses ebp to pass parameter, so calling a function
+; using ghccc clobbers ebp. We should save and restore ebp around such a call
+; if ebp is used as frame pointer.
+
+declare ghccc i32 @external(i32)
+
+define i32 @foo(i32 %0, i32 %1) {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbp, -16
+; CHECK-NEXT:    movq %rsp, %rbp
+; CHECK-NEXT:    .cfi_def_cfa_register %rbp
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    pushq %r13
+; CHECK-NEXT:    pushq %r12
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    andq $-16, %rsp
+; CHECK-NEXT:    subq $16, %rsp
+; CHECK-NEXT:    .cfi_offset %rbx, -56
+; CHECK-NEXT:    .cfi_offset %r12, -48
+; CHECK-NEXT:    .cfi_offset %r13, -40
+; CHECK-NEXT:    .cfi_offset %r14, -32
+; CHECK-NEXT:    .cfi_offset %r15, -24
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    movl %esi, %ebp
+; CHECK-NEXT:    movq %rdi, %r13
+; CHECK-NEXT:    callq external at PLT
+; CHECK-NEXT:    addq $8, %rsp
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    leaq -40(%rbp), %rsp
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    popq %r12
+; CHECK-NEXT:    popq %r13
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
+; CHECK-NEXT:    retq
+    %x = call ghccc i32 @external(i32 %0, i32 %1)
+    ret i32 %x
+}

>From 5d51df45a165ad8d924d3cbb7d9957df24def435 Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Mon, 6 May 2024 13:48:07 -0700
Subject: [PATCH 3/6] Add unwinding information when FP is changed.

---
 llvm/lib/CodeGen/CFIInstrInserter.cpp      | 29 ++++++++---
 llvm/lib/CodeGen/PrologEpilogInserter.cpp  | 35 +++++++-------
 llvm/lib/Target/X86/X86FrameLowering.cpp   | 56 +++++++++++++++++++---
 llvm/lib/Target/X86/X86FrameLowering.h     | 19 ++++----
 llvm/test/CodeGen/X86/clobber_frame_ptr.ll |  3 ++
 5 files changed, 102 insertions(+), 40 deletions(-)

diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp
index 1ff01ad34b30e..d6fbb9c02ab97 100644
--- a/llvm/lib/CodeGen/CFIInstrInserter.cpp
+++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -184,6 +184,10 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
   unsigned NumRegs = TRI.getNumSupportedRegs(*MF);
   BitVector CSRSaved(NumRegs), CSRRestored(NumRegs);
 
+#ifndef NDEBUG
+  int RememberState = 0;
+#endif
+
   // Determine cfa offset and register set by the block.
   for (MachineInstr &MI : *MBBInfo.MBB) {
     if (MI.isCFIInstruction()) {
@@ -228,17 +232,23 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
       case MCCFIInstruction::OpRememberState:
         // TODO: Add support for handling cfi_remember_state.
 #ifndef NDEBUG
-        report_fatal_error(
-            "Support for cfi_remember_state not implemented! Value of CFA "
-            "may be incorrect!\n");
+        // Currently we need cfi_remember_state and cfi_restore_state to be in
+        // the same BB, so it will not impact outgoing CFA.
+        ++RememberState;
+        if (RememberState != 1)
+          report_fatal_error(
+              "Support for cfi_remember_state not implemented! Value of CFA "
+              "may be incorrect!\n");
 #endif
         break;
       case MCCFIInstruction::OpRestoreState:
         // TODO: Add support for handling cfi_restore_state.
 #ifndef NDEBUG
-        report_fatal_error(
-            "Support for cfi_restore_state not implemented! Value of CFA may "
-            "be incorrect!\n");
+        --RememberState;
+        if (RememberState != 0)
+          report_fatal_error(
+              "Support for cfi_restore_state not implemented! Value of CFA may "
+              "be incorrect!\n");
 #endif
         break;
       // Other CFI directives do not affect CFA value.
@@ -264,6 +274,13 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
     }
   }
 
+#ifndef NDEBUG
+  if (RememberState != 0)
+    report_fatal_error(
+        "Support for cfi_remember_state not implemented! Value of CFA "
+        "may be incorrect!\n");
+#endif
+
   MBBInfo.Processed = true;
 
   // Update outgoing CFA info.
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 4b3e5860686b4..0698f9d5d8828 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -77,15 +77,15 @@ using MBBVector = SmallVector<MachineBasicBlock *, 4>;
 STATISTIC(NumLeafFuncWithSpills, "Number of leaf functions with CSRs");
 STATISTIC(NumFuncSeen, "Number of functions seen in PEI");
 
-static cl::opt<bool> SpillClobberedFP(
-    "spill-clobbered-fp",
-    cl::desc("Spill clobbered fp register to stack."),
-    cl::init(false), cl::Hidden);
+static cl::opt<bool>
+    SpillClobberedFP("spill-clobbered-fp",
+                     cl::desc("Spill clobbered fp register to stack."),
+                     cl::init(false), cl::Hidden);
 
-static cl::opt<bool> SpillClobberedBP(
-    "spill-clobbered-bp",
-    cl::desc("Spill clobbered bp register to stack."),
-    cl::init(true), cl::Hidden);
+static cl::opt<bool>
+    SpillClobberedBP("spill-clobbered-bp",
+                     cl::desc("Spill clobbered bp register to stack."),
+                     cl::init(true), cl::Hidden);
 
 namespace {
 
@@ -1590,13 +1590,13 @@ static bool accessFrameBasePointer(const MachineInstr &MI, Register FP,
                                    const TargetRegisterInfo *TRI) {
   AccessFP = AccessBP = false;
   if (FP) {
-    if (MI.findRegisterUseOperandIdx(FP, false, TRI) != -1 ||
-        MI.findRegisterDefOperandIdx(FP, false, true, TRI) != -1)
+    if (MI.findRegisterUseOperandIdx(FP, TRI, false) != -1 ||
+        MI.findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
       AccessFP = true;
   }
   if (BP) {
-    if (MI.findRegisterUseOperandIdx(BP, false, TRI) != -1 ||
-        MI.findRegisterDefOperandIdx(BP, false, true, TRI) != -1)
+    if (MI.findRegisterUseOperandIdx(BP, TRI, false) != -1 ||
+        MI.findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
       AccessBP = true;
   }
   return AccessFP || AccessBP;
@@ -1659,17 +1659,18 @@ void PEI::spillFrameBasePointer(MachineFunction &MF) {
         SpillBP |= AccessBP;
 
         // Maintain FPLive and BPLive.
-        if (FPLive && MI->findRegisterDefOperandIdx(FP, false, true, TRI) != -1)
+        if (FPLive && MI->findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
           FPLive = false;
-        if (FP && MI->findRegisterUseOperandIdx(FP, false, TRI) != -1)
+        if (FP && MI->findRegisterUseOperandIdx(FP, TRI, false) != -1)
           FPLive = true;
-        if (BPLive && MI->findRegisterDefOperandIdx(BP, false, true, TRI) != -1)
+        if (BPLive && MI->findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
           BPLive = false;
-        if (BP && MI->findRegisterUseOperandIdx(BP, false, TRI) != -1)
+        if (BP && MI->findRegisterUseOperandIdx(BP, TRI, false) != -1)
           BPLive = true;
 
         Start = MI++;
-      } while ((MI != ME) && (FPLive || BPLive ||
+      } while ((MI != ME) &&
+               (FPLive || BPLive ||
                 accessFrameBasePointer(*MI, FP, BP, AccessFP, AccessBP, TRI)));
 
       // If the bp is clobbered by a call, we should save and restore outside of
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index d82125069604b..afd6afd4630c0 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -4242,9 +4242,9 @@ static int computeSPAdjust4SpillFPBP(MachineFunction &MF,
   return AlignedSize - AllocSize;
 }
 
-void X86FrameLowering::spillFPBPUsingSP(
-    MachineFunction &MF, MachineBasicBlock::iterator BeforeMI,
-    bool SpillFP, bool SpillBP) const {
+void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF,
+                                        MachineBasicBlock::iterator BeforeMI,
+                                        bool SpillFP, bool SpillBP) const {
   const TargetRegisterClass *RC;
   unsigned RegNum = 0;
   MachineBasicBlock *MBB = BeforeMI->getParent();
@@ -4280,11 +4280,46 @@ void X86FrameLowering::spillFPBPUsingSP(
   int SPAdjust = computeSPAdjust4SpillFPBP(MF, RC, RegNum);
   if (SPAdjust)
     emitSPUpdate(*MBB, BeforeMI, DL, -SPAdjust, false);
+
+  // Emit unwinding information.
+  if (SpillFP && needsDwarfCFI(MF)) {
+    // Emit .cfi_remember_state to remember old frame.
+    unsigned CFIIndex =
+        MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr));
+    BuildMI(*MBB, BeforeMI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+        .addCFIIndex(CFIIndex);
+
+    // Setup new CFA value with DW_CFA_def_cfa_expression:
+    //     DW_OP_breg7+offset, DW_OP_deref, DW_OP_consts 16, DW_OP_plus
+    SmallString<64> CfaExpr;
+    uint8_t buffer[16];
+    int Offset = SPAdjust;
+    if (SpillBP)
+      Offset += TRI->getSpillSize(*RC);
+    Register StackPtr = TRI->getStackRegister();
+    if (STI.isTarget64BitILP32())
+      StackPtr = Register(getX86SubSuperRegister(StackPtr, 64));
+    unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackPtr, true);
+    CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfStackPtr));
+    CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
+    CfaExpr.push_back(dwarf::DW_OP_deref);
+    CfaExpr.push_back(dwarf::DW_OP_consts);
+    CfaExpr.append(buffer, buffer + encodeSLEB128(SlotSize * 2, buffer));
+    CfaExpr.push_back((uint8_t)dwarf::DW_OP_plus);
+
+    SmallString<64> DefCfaExpr;
+    DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
+    DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
+    DefCfaExpr.append(CfaExpr.str());
+    BuildCFI(*MBB, BeforeMI, DL,
+             MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
+             MachineInstr::FrameSetup);
+  }
 }
 
-void X86FrameLowering::restoreFPBPUsingSP(
-    MachineFunction &MF, MachineBasicBlock::iterator AfterMI,
-    bool SpillFP, bool SpillBP) const {
+void X86FrameLowering::restoreFPBPUsingSP(MachineFunction &MF,
+                                          MachineBasicBlock::iterator AfterMI,
+                                          bool SpillFP, bool SpillBP) const {
   Register FP, BP;
   const TargetRegisterClass *RC;
   unsigned RegNum = 0;
@@ -4321,6 +4356,15 @@ void X86FrameLowering::restoreFPBPUsingSP(
   if (SpillFP) {
     BuildMI(*MBB, Pos, DL,
             TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), FP);
+
+    // Emit unwinding information.
+    if (needsDwarfCFI(MF)) {
+      // Restore original frame with .cfi_restore_state.
+      unsigned CFIIndex =
+          MF.addFrameInst(MCCFIInstruction::createRestoreState(nullptr));
+      BuildMI(*MBB, Pos, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+          .addCFIIndex(CFIIndex);
+    }
   }
 }
 
diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
index 0d77933363ce4..0c83fa43f265d 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/llvm/lib/Target/X86/X86FrameLowering.h
@@ -103,19 +103,16 @@ class X86FrameLowering : public TargetFrameLowering {
                               MutableArrayRef<CalleeSavedInfo> CSI,
                               const TargetRegisterInfo *TRI) const override;
 
-  void
-  spillFPBPUsingSP(MachineFunction &MF,
-                   const MachineBasicBlock::iterator BeforeMI,
-                   bool SpillFP, bool SpillBP) const override;
+  void spillFPBPUsingSP(MachineFunction &MF,
+                        const MachineBasicBlock::iterator BeforeMI,
+                        bool SpillFP, bool SpillBP) const override;
 
-  void
-  restoreFPBPUsingSP(MachineFunction &MF,
-                     const MachineBasicBlock::iterator AfterMI,
-                     bool SpillFP, bool SpillBP) const override;
+  void restoreFPBPUsingSP(MachineFunction &MF,
+                          const MachineBasicBlock::iterator AfterMI,
+                          bool SpillFP, bool SpillBP) const override;
 
-  bool
-  skipSpillFPBP(MachineFunction &MF,
-                MachineBasicBlock::reverse_iterator &MI) const override;
+  bool skipSpillFPBP(MachineFunction &MF,
+                     MachineBasicBlock::reverse_iterator &MI) const override;
 
   bool hasFP(const MachineFunction &MF) const override;
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
diff --git a/llvm/test/CodeGen/X86/clobber_frame_ptr.ll b/llvm/test/CodeGen/X86/clobber_frame_ptr.ll
index 591676bf3f15c..81403e5a64549 100644
--- a/llvm/test/CodeGen/X86/clobber_frame_ptr.ll
+++ b/llvm/test/CodeGen/X86/clobber_frame_ptr.ll
@@ -30,11 +30,14 @@ define i32 @foo(i32 %0, i32 %1) {
 ; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
 ; CHECK-NEXT:    pushq %rbp
 ; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_remember_state
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x06, 0x77, 0x08, 0x06, 0x11, 0x10, 0x22 #
 ; CHECK-NEXT:    movl %esi, %ebp
 ; CHECK-NEXT:    movq %rdi, %r13
 ; CHECK-NEXT:    callq external at PLT
 ; CHECK-NEXT:    addq $8, %rsp
 ; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_restore_state
 ; CHECK-NEXT:    leaq -40(%rbp), %rsp
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r12

>From 9a1d2d37e6f6be4b33079455aed5b15f75c5e51a Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Mon, 15 Jul 2024 10:34:59 -0700
Subject: [PATCH 4/6] Move the implementation to X86 backend.

---
 llvm/include/llvm/CodeGen/MachineFrameInfo.h  |   1 -
 .../llvm/CodeGen/TargetFrameLowering.h        |  26 +--
 .../include/llvm/CodeGen/TargetRegisterInfo.h |  10 -
 llvm/lib/CodeGen/PrologEpilogInserter.cpp     | 125 +-----------
 llvm/lib/Target/X86/X86FrameLowering.cpp      | 193 ++++++++++++++++--
 llvm/lib/Target/X86/X86FrameLowering.h        |  29 ++-
 llvm/lib/Target/X86/X86RegisterInfo.h         |   6 -
 .../X86/apx/push2-pop2-vector-register.ll     |   4 +
 llvm/test/CodeGen/X86/apx/push2-pop2.ll       |  24 +++
 llvm/test/CodeGen/X86/apx/pushp-popp.ll       |   4 +
 llvm/test/CodeGen/X86/avx512-intel-ocl.ll     |   8 +
 llvm/test/CodeGen/X86/clobber_base_ptr.ll     |  63 +++++-
 llvm/test/CodeGen/X86/clobber_frame_ptr.ll    |   2 +-
 .../test/CodeGen/X86/clobber_frame_ptr_x32.ll |  53 +++++
 .../X86/inline-asm-function-call-pic.ll       |   4 +
 llvm/test/CodeGen/X86/x86-32-intrcc.ll        |   4 +
 .../CodeGen/X86/x86-64-flags-intrinsics.ll    |   8 +
 17 files changed, 374 insertions(+), 190 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll

diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index 3534e50253af6..466fed7fb3a29 100644
--- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -601,7 +601,6 @@ class MachineFrameInfo {
   /// Return the alignment in bytes that this function must be aligned to,
   /// which is greater than the default stack alignment provided by the target.
   Align getMaxAlign() const { return MaxAlignment; }
-  Align getStackAlignment() const { return StackAlignment; }
 
   /// Make sure the function is at least Align bytes aligned.
   void ensureMaxAlignment(Align Alignment);
diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index e8ff1d69b4eb1..3cc0c7269148b 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -467,29 +467,9 @@ class TargetFrameLowering {
   /// debug info.
   virtual DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const;
 
-  /// Issue instructions to allocate stack space and spill FP and/or BP to stack
-  /// using SP register.
-  virtual void spillFPBPUsingSP(MachineFunction &MF,
-                                const MachineBasicBlock::iterator BeforeMI,
-                                bool SpillFP, bool SpillBP) const {
-    report_fatal_error("spillFPBPUsingSP is not implemented for this target!");
-  }
-
-  /// Issue instructions to restore FP and/or BP from stack using SP register,
-  /// and free stack space.
-  virtual void restoreFPBPUsingSP(MachineFunction &MF,
-                                  const MachineBasicBlock::iterator AfterMI,
-                                  bool SpillFP, bool SpillBP) const {
-    report_fatal_error("restoreFPBPUsingSP is not implemented for this "
-                       "target!");
-  }
-
-  // If MI uses fp/bp, but target can handle it, and doesn't want PEI to spill
-  // it, this function should return true, and update MI so PEI will not check
-  // any instructions from it and later.
-  virtual bool skipSpillFPBP(MachineFunction &MF,
-                             MachineBasicBlock::reverse_iterator &MI) const {
-    return false;
+  /// If frame pointer or base pointer is clobbered by an instruction, we should
+  /// spill/restore it around that instruction.
+  virtual void spillFPBP(MachineFunction &MF) const {
   }
 };
 
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index c29901d5ffb7a..33c4c745c3416 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -988,16 +988,6 @@ class TargetRegisterInfo : public MCRegisterInfo {
     return false;
   }
 
-  /// Returns true if the target needs a base register.
-  virtual bool hasBaseRegister(const MachineFunction &MF) const {
-    return false;
-  }
-
-  /// Returns the physical base register used to access stack object.
-  virtual Register getBaseRegister(const MachineFunction &MF) const {
-    return 0;
-  }
-
   /// Return true if target has reserved a spill slot in the stack frame of
   /// the given function for the specified register. e.g. On x86, if the frame
   /// register is required, the first fixed stack object is reserved as its
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 0698f9d5d8828..db16f01bf0a7a 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -77,15 +77,6 @@ using MBBVector = SmallVector<MachineBasicBlock *, 4>;
 STATISTIC(NumLeafFuncWithSpills, "Number of leaf functions with CSRs");
 STATISTIC(NumFuncSeen, "Number of functions seen in PEI");
 
-static cl::opt<bool>
-    SpillClobberedFP("spill-clobbered-fp",
-                     cl::desc("Spill clobbered fp register to stack."),
-                     cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
-    SpillClobberedBP("spill-clobbered-bp",
-                     cl::desc("Spill clobbered bp register to stack."),
-                     cl::init(true), cl::Hidden);
 
 namespace {
 
@@ -131,7 +122,6 @@ class PEI : public MachineFunctionPass {
   void calculateCallFrameInfo(MachineFunction &MF);
   void calculateSaveRestoreBlocks(MachineFunction &MF);
   void spillCalleeSavedRegs(MachineFunction &MF);
-  void spillFrameBasePointer(MachineFunction &MF);
 
   void calculateFrameObjectOffsets(MachineFunction &MF);
   void replaceFrameIndices(MachineFunction &MF);
@@ -238,8 +228,7 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
   FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF);
   ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
 
-  // Handle FP and BP spilling and restoring, for instructions that need it.
-  spillFrameBasePointer(MF);
+  TFI->spillFPBP(MF);
 
   // Calculate the MaxCallFrameSize value for the function's frame
   // information. Also eliminates call frame pseudo instructions.
@@ -1584,115 +1573,3 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
       ++I;
   }
 }
-
-static bool accessFrameBasePointer(const MachineInstr &MI, Register FP,
-                                   Register BP, bool &AccessFP, bool &AccessBP,
-                                   const TargetRegisterInfo *TRI) {
-  AccessFP = AccessBP = false;
-  if (FP) {
-    if (MI.findRegisterUseOperandIdx(FP, TRI, false) != -1 ||
-        MI.findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
-      AccessFP = true;
-  }
-  if (BP) {
-    if (MI.findRegisterUseOperandIdx(BP, TRI, false) != -1 ||
-        MI.findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
-      AccessBP = true;
-  }
-  return AccessFP || AccessBP;
-}
-
-/// If a function uses base pointer and the base pointer is clobbered by inline
-/// asm, RA doesn't detect this case, and after the inline asm, the base pointer
-/// cotains garbage value.
-/// For example if a 32b x86 function uses base pointer esi, and esi is
-/// clobbered by following inline asm
-///     asm("rep movsb" : "+D"(ptr), "+S"(x), "+c"(c)::"memory");
-/// We need to save esi before the asm and restore it after the asm.
-///
-/// The problem can also occur to frame pointer if there is a function call, and
-/// the callee uses a different calling convention and clobbers the fp.
-///
-/// Because normal frame objects (spill slots) are accessed through fp/bp
-/// register, so we can't spill fp/bp to normal spill slots.
-///
-/// FIXME: There are 2 possible enhancements:
-/// 1. In many cases there are different physical registers not clobbered by
-/// inline asm, we can use one of them as base pointer. Or use a virtual
-/// register as base pointer and let RA allocate a physical register to it.
-/// 2. If there is no other instructions access stack with fp/bp from the
-/// inline asm to the epilog, we can skip the save and restore operations.
-void PEI::spillFrameBasePointer(MachineFunction &MF) {
-  Register FP, BP;
-  const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
-  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-  if (TFI.hasFP(MF) && SpillClobberedFP)
-    FP = TRI->getFrameRegister(MF);
-  if (TRI->hasBaseRegister(MF) && SpillClobberedBP)
-    BP = TRI->getBaseRegister(MF);
-
-  for (MachineBasicBlock &MBB : MF) {
-    auto MI = MBB.rbegin(), ME = MBB.rend();
-    while (MI != ME) {
-      // Skip frame setup/destroy instructions.
-      // Skip instructions handled by target.
-      if (MI->getFlag(MachineInstr::MIFlag::FrameSetup) ||
-          MI->getFlag(MachineInstr::MIFlag::FrameDestroy) ||
-          TFI.skipSpillFPBP(MF, MI)) {
-        ++MI;
-        continue;
-      }
-
-      bool AccessFP, AccessBP;
-      // Check if fp or bp is used in MI.
-      if (!accessFrameBasePointer(*MI, FP, BP, AccessFP, AccessBP, TRI)) {
-        ++MI;
-        continue;
-      }
-
-      // Look for the range [Start, Stop] in which fp or bp is defined and used.
-      bool FPLive = false, BPLive = false;
-      bool SpillFP = false, SpillBP = false;
-      auto Start = MI, Stop = MI;
-      do {
-        SpillFP |= AccessFP;
-        SpillBP |= AccessBP;
-
-        // Maintain FPLive and BPLive.
-        if (FPLive && MI->findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
-          FPLive = false;
-        if (FP && MI->findRegisterUseOperandIdx(FP, TRI, false) != -1)
-          FPLive = true;
-        if (BPLive && MI->findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
-          BPLive = false;
-        if (BP && MI->findRegisterUseOperandIdx(BP, TRI, false) != -1)
-          BPLive = true;
-
-        Start = MI++;
-      } while ((MI != ME) &&
-               (FPLive || BPLive ||
-                accessFrameBasePointer(*MI, FP, BP, AccessFP, AccessBP, TRI)));
-
-      // If the bp is clobbered by a call, we should save and restore outside of
-      // the frame setup instructions.
-      if (Stop->isCall()) {
-        const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
-        auto FrameSetup = std::next(Start);
-        while (FrameSetup != ME && !TII.isFrameSetup(*FrameSetup) &&
-               !FrameSetup->isCall())
-          ++FrameSetup;
-        if (FrameSetup != ME && TII.isFrameSetup(*FrameSetup)) {
-          while (!TII.isFrameInstr(*Stop))
-            --Stop;
-          Start = FrameSetup;
-          MI = Start;
-          ++MI;
-        }
-      }
-
-      // Call target functions to spill and restore FP and BP registers.
-      TFI.spillFPBPUsingSP(MF, &(*Start), SpillFP, SpillBP);
-      TFI.restoreFPBPUsingSP(MF, &(*Stop), SpillFP, SpillBP);
-    }
-  }
-}
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index afd6afd4630c0..7d07a037b0c36 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -4234,10 +4234,10 @@ void X86FrameLowering::restoreWinEHStackPointersInParent(
 
 static int computeSPAdjust4SpillFPBP(MachineFunction &MF,
                                      const TargetRegisterClass *RC,
-                                     unsigned SpillRegNum) {
+                                     unsigned NumSpilledRegs) {
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-  unsigned AllocSize = TRI->getSpillSize(*RC) * SpillRegNum;
-  Align StackAlign = MF.getFrameInfo().getStackAlignment();
+  unsigned AllocSize = TRI->getSpillSize(*RC) * NumSpilledRegs;
+  Align StackAlign = MF.getSubtarget().getFrameLowering()->getStackAlign();
   unsigned AlignedSize = alignTo(AllocSize, StackAlign);
   return AlignedSize - AllocSize;
 }
@@ -4245,8 +4245,10 @@ static int computeSPAdjust4SpillFPBP(MachineFunction &MF,
 void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF,
                                         MachineBasicBlock::iterator BeforeMI,
                                         bool SpillFP, bool SpillBP) const {
+  assert(SpillFP || SpillBP);
+
   const TargetRegisterClass *RC;
-  unsigned RegNum = 0;
+  unsigned NumRegs = 0;
   MachineBasicBlock *MBB = BeforeMI->getParent();
   DebugLoc DL = BeforeMI->getDebugLoc();
 
@@ -4256,7 +4258,7 @@ void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF,
     if (STI.isTarget64BitILP32())
       FP = Register(getX86SubSuperRegister(FP, 64));
     RC = TRI->getMinimalPhysRegClass(FP);
-    ++RegNum;
+    ++NumRegs;
 
     BuildMI(*MBB, BeforeMI, DL,
             TII.get(getPUSHOpcode(MF.getSubtarget<X86Subtarget>())))
@@ -4265,11 +4267,11 @@ void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF,
 
   // Spill BP.
   if (SpillBP) {
-    Register BP = TRI->getBaseRegister(MF);
+    Register BP = TRI->getBaseRegister();
     if (STI.isTarget64BitILP32())
       BP = Register(getX86SubSuperRegister(BP, 64));
     RC = TRI->getMinimalPhysRegClass(BP);
-    ++RegNum;
+    ++NumRegs;
 
     BuildMI(*MBB, BeforeMI, DL,
             TII.get(getPUSHOpcode(MF.getSubtarget<X86Subtarget>())))
@@ -4277,7 +4279,7 @@ void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF,
   }
 
   // Make sure SP is aligned.
-  int SPAdjust = computeSPAdjust4SpillFPBP(MF, RC, RegNum);
+  int SPAdjust = computeSPAdjust4SpillFPBP(MF, RC, NumRegs);
   if (SPAdjust)
     emitSPUpdate(*MBB, BeforeMI, DL, -SPAdjust, false);
 
@@ -4296,6 +4298,12 @@ void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF,
     int Offset = SPAdjust;
     if (SpillBP)
       Offset += TRI->getSpillSize(*RC);
+    // If BeforeMI is a frame setup instruction, we need to adjust the position
+    // and offset of the new cfi instruction.
+    if (TII.isFrameSetup(*BeforeMI)) {
+      Offset += alignTo(TII.getFrameSize(*BeforeMI), getStackAlign());
+      BeforeMI = std::next(BeforeMI);
+    }
     Register StackPtr = TRI->getStackRegister();
     if (STI.isTarget64BitILP32())
       StackPtr = Register(getX86SubSuperRegister(StackPtr, 64));
@@ -4320,29 +4328,31 @@ void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF,
 void X86FrameLowering::restoreFPBPUsingSP(MachineFunction &MF,
                                           MachineBasicBlock::iterator AfterMI,
                                           bool SpillFP, bool SpillBP) const {
+  assert(SpillFP || SpillBP);
+
   Register FP, BP;
   const TargetRegisterClass *RC;
-  unsigned RegNum = 0;
+  unsigned NumRegs = 0;
   if (SpillFP) {
     FP = TRI->getFrameRegister(MF);
     if (STI.isTarget64BitILP32())
       FP = Register(getX86SubSuperRegister(FP, 64));
     RC = TRI->getMinimalPhysRegClass(FP);
-    ++RegNum;
+    ++NumRegs;
   }
   if (SpillBP) {
-    BP = TRI->getBaseRegister(MF);
+    BP = TRI->getBaseRegister();
     if (STI.isTarget64BitILP32())
       BP = Register(getX86SubSuperRegister(BP, 64));
     RC = TRI->getMinimalPhysRegClass(BP);
-    ++RegNum;
+    ++NumRegs;
   }
 
   // Adjust SP so it points to spilled FP or BP.
   MachineBasicBlock *MBB = AfterMI->getParent();
   MachineBasicBlock::iterator Pos = std::next(AfterMI);
   DebugLoc DL = AfterMI->getDebugLoc();
-  int SPAdjust = computeSPAdjust4SpillFPBP(MF, RC, RegNum);
+  int SPAdjust = computeSPAdjust4SpillFPBP(MF, RC, NumRegs);
   if (SPAdjust)
     emitSPUpdate(*MBB, Pos, DL, SPAdjust, false);
 
@@ -4386,3 +4396,160 @@ bool X86FrameLowering::skipSpillFPBP(
   }
   return false;
 }
+
+static bool isFPBPAccess(const MachineInstr &MI, Register FP, Register BP,
+                         const TargetRegisterInfo *TRI, bool &AccessFP,
+                         bool &AccessBP) {
+  AccessFP = AccessBP = false;
+  if (FP) {
+    if (MI.findRegisterUseOperandIdx(FP, TRI, false) != -1 ||
+        MI.findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
+      AccessFP = true;
+  }
+  if (BP) {
+    if (MI.findRegisterUseOperandIdx(BP, TRI, false) != -1 ||
+        MI.findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
+      AccessBP = true;
+  }
+  return AccessFP || AccessBP;
+}
+
+// Invoke instruction has been lowered to normal function call. We try to figure
+// out if MI comes from Invoke.
+// Do we have any better method?
+static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels) {
+  if (!MI.isCall())
+    return false;
+  if (InsideEHLabels)
+    return true;
+
+  const MachineBasicBlock *MBB = MI.getParent();
+  if (!MBB->hasEHPadSuccessor())
+    return false;
+
+  // Check if there is another call instruction from MI to the end of MBB.
+  MachineBasicBlock::const_iterator MBBI = MI, ME = MBB->end();
+  for (++MBBI; MBBI != ME; ++MBBI)
+    if (MBBI->isCall())
+      return false;
+  return true;
+}
+
+/// If a function uses base pointer and the base pointer is clobbered by inline
+/// asm, RA doesn't detect this case, and after the inline asm, the base pointer
+/// contains garbage value.
+/// For example if a 32b x86 function uses base pointer esi, and esi is
+/// clobbered by following inline asm
+///     asm("rep movsb" : "+D"(ptr), "+S"(x), "+c"(c)::"memory");
+/// We need to save esi before the asm and restore it after the asm.
+///
+/// The problem can also occur to frame pointer if there is a function call, and
+/// the callee uses a different calling convention and clobbers the fp.
+///
+/// Because normal frame objects (spill slots) are accessed through fp/bp
+/// register, so we can't spill fp/bp to normal spill slots.
+///
+/// FIXME: There are 2 possible enhancements:
+/// 1. In many cases there are different physical registers not clobbered by
+/// inline asm, we can use one of them as base pointer. Or use a virtual
+/// register as base pointer and let RA allocate a physical register to it.
+/// 2. If there is no other instructions access stack with fp/bp from the
+/// inline asm to the epilog, and no cfi requirement for a correct fp, we can
+/// skip the save and restore operations.
+void X86FrameLowering::spillFPBP(MachineFunction &MF) const {
+  Register FP, BP;
+  const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
+  if (TFI.hasFP(MF))
+    FP = TRI->getFrameRegister(MF);
+  if (TRI->hasBasePointer(MF))
+    BP = TRI->getBaseRegister();
+  if (!FP && !BP)
+    return;
+
+  for (MachineBasicBlock &MBB : MF) {
+    bool InsideEHLabels = false;
+    auto MI = MBB.rbegin(), ME = MBB.rend();
+    auto TermMI = MBB.getFirstTerminator();
+    if (TermMI != MBB.begin())
+      MI = *(std::prev(TermMI));
+
+    while (MI != ME) {
+      // Skip frame setup/destroy instructions.
+      // Skip Invoke (call inside try block) instructions.
+      // Skip instructions handled by target.
+      if (MI->getFlag(MachineInstr::MIFlag::FrameSetup) ||
+          MI->getFlag(MachineInstr::MIFlag::FrameDestroy) ||
+          isInvoke(*MI, InsideEHLabels) || skipSpillFPBP(MF, MI)) {
+        ++MI;
+        continue;
+      }
+
+      if (MI->getOpcode() == TargetOpcode::EH_LABEL) {
+        InsideEHLabels = !InsideEHLabels;
+        ++MI;
+        continue;
+      }
+
+      bool AccessFP, AccessBP;
+      // Check if fp or bp is used in MI.
+      if (!isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)) {
+        ++MI;
+        continue;
+      }
+
+      // Look for the range [DefMI, KillMI] in which fp or bp is defined and
+      // used.
+      bool FPLive = false, BPLive = false;
+      bool SpillFP = false, SpillBP = false;
+      auto DefMI = MI, KillMI = MI;
+      do {
+        SpillFP |= AccessFP;
+        SpillBP |= AccessBP;
+
+        // Maintain FPLive and BPLive.
+        if (FPLive && MI->findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
+          FPLive = false;
+        if (FP && MI->findRegisterUseOperandIdx(FP, TRI, false) != -1)
+          FPLive = true;
+        if (BPLive && MI->findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
+          BPLive = false;
+        if (BP && MI->findRegisterUseOperandIdx(BP, TRI, false) != -1)
+          BPLive = true;
+
+        DefMI = MI++;
+      } while ((MI != ME) &&
+               (FPLive || BPLive ||
+                isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)));
+
+      // Don't need to save/restore if FP is accessed through llvm.frameaddress.
+      if (FPLive && !SpillBP)
+        continue;
+
+      // If the bp is clobbered by a call, we should save and restore outside of
+      // the frame setup instructions.
+      if (KillMI->isCall() && DefMI != ME) {
+        const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+        auto FrameSetup = std::next(DefMI);
+        // Look for frame setup instruction toward the start of the BB.
+        // If we reach another call instruction, it means no frame setup
+        // instruction for the current call instruction.
+        while (FrameSetup != ME && !TII.isFrameSetup(*FrameSetup) &&
+               !FrameSetup->isCall())
+          ++FrameSetup;
+        // If a frame setup instruction is found, we need to find out the
+        // corresponding frame destroy instruction.
+        if (FrameSetup != ME && TII.isFrameSetup(*FrameSetup)) {
+          while (!TII.isFrameInstr(*KillMI))
+            --KillMI;
+          DefMI = FrameSetup;
+          MI = DefMI;
+          ++MI;
+        }
+      }
+
+      // Call target functions to spill and restore FP and BP registers.
+      spillFPBPUsingSP(MF, &(*DefMI), SpillFP, SpillBP);
+      restoreFPBPUsingSP(MF, &(*KillMI), SpillFP, SpillBP);
+    }
+  }
+}
diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
index 0c83fa43f265d..2d1e35adc589b 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/llvm/lib/Target/X86/X86FrameLowering.h
@@ -103,16 +103,7 @@ class X86FrameLowering : public TargetFrameLowering {
                               MutableArrayRef<CalleeSavedInfo> CSI,
                               const TargetRegisterInfo *TRI) const override;
 
-  void spillFPBPUsingSP(MachineFunction &MF,
-                        const MachineBasicBlock::iterator BeforeMI,
-                        bool SpillFP, bool SpillBP) const override;
-
-  void restoreFPBPUsingSP(MachineFunction &MF,
-                          const MachineBasicBlock::iterator AfterMI,
-                          bool SpillFP, bool SpillBP) const override;
-
-  bool skipSpillFPBP(MachineFunction &MF,
-                     MachineBasicBlock::reverse_iterator &MI) const override;
+  void spillFPBP(MachineFunction &MF) const override;
 
   bool hasFP(const MachineFunction &MF) const override;
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
@@ -278,6 +269,24 @@ class X86FrameLowering : public TargetFrameLowering {
   void emitCatchRetReturnValue(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator MBBI,
                                MachineInstr *CatchRet) const;
+
+  /// Issue instructions to allocate stack space and spill frame pointer and/or
+  /// base pointer to stack using stack pointer register.
+  void spillFPBPUsingSP(MachineFunction &MF,
+                        const MachineBasicBlock::iterator BeforeMI,
+                        bool SpillFP, bool SpillBP) const;
+
+  /// Issue instructions to restore frame pointer and/or base pointer from stack
+  /// using stack pointer register, and free stack space.
+  void restoreFPBPUsingSP(MachineFunction &MF,
+                          const MachineBasicBlock::iterator AfterMI,
+                          bool SpillFP, bool SpillBP) const;
+
+  // If MI uses fp/bp, but target can handle it, and doesn't want to be spilled
+  // again, this function should return true, and update MI so we will not check
+  // any instructions from related sequence.
+  bool skipSpillFPBP(MachineFunction &MF,
+                     MachineBasicBlock::reverse_iterator &MI) const;
 };
 
 } // End llvm namespace
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h
index cccb731d24354..7296a5f021e4a 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -133,9 +133,6 @@ class X86RegisterInfo final : public X86GenRegisterInfo {
   void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
 
   bool hasBasePointer(const MachineFunction &MF) const;
-  bool hasBaseRegister(const MachineFunction &MF) const override {
-    return hasBasePointer(MF);
-  }
 
   bool canRealignStack(const MachineFunction &MF) const override;
 
@@ -167,9 +164,6 @@ class X86RegisterInfo final : public X86GenRegisterInfo {
   unsigned getPtrSizedStackRegister(const MachineFunction &MF) const;
   Register getStackRegister() const { return StackPtr; }
   Register getBaseRegister() const { return BasePtr; }
-  Register getBaseRegister(const MachineFunction &MF) const override {
-    return BasePtr;
-  }
   /// Returns physical register used as frame pointer.
   /// This will always returns the frame pointer register, contrary to
   /// getFrameRegister() which returns the "base pointer" in situations
diff --git a/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll b/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll
index aa5c54d30e3bc..f20c4c1ae2786 100644
--- a/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll
+++ b/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll
@@ -43,8 +43,12 @@ define void @widget(float %arg) nounwind {
 ; FRAME-NEXT:    xorl %r8d, %r8d
 ; FRAME-NEXT:    callq *%rsi
 ; FRAME-NEXT:    movss %xmm6, 0
+; FRAME-NEXT:    pushq %rbp
+; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
+; FRAME-NEXT:    popq %rax
+; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
 ; FRAME-NEXT:    addq $48, %rsp
 ; FRAME-NEXT:    pop2 %r15, %rsi
diff --git a/llvm/test/CodeGen/X86/apx/push2-pop2.ll b/llvm/test/CodeGen/X86/apx/push2-pop2.ll
index 25139f1da8272..6bd9f525090ee 100644
--- a/llvm/test/CodeGen/X86/apx/push2-pop2.ll
+++ b/llvm/test/CodeGen/X86/apx/push2-pop2.ll
@@ -24,8 +24,12 @@ define void @csr1() nounwind {
 ; FRAME:       # %bb.0: # %entry
 ; FRAME-NEXT:    pushq %rbp
 ; FRAME-NEXT:    movq %rsp, %rbp
+; FRAME-NEXT:    pushq %rbp
+; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
+; FRAME-NEXT:    popq %rax
+; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    retq
 entry:
@@ -59,8 +63,12 @@ define void @csr2() nounwind {
 ; FRAME-NEXT:    pushq %rbp
 ; FRAME-NEXT:    movq %rsp, %rbp
 ; FRAME-NEXT:    pushq %r15
+; FRAME-NEXT:    pushq %rbp
+; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
+; FRAME-NEXT:    popq %rax
+; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    popq %r15
 ; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    retq
@@ -95,8 +103,12 @@ define void @csr3() nounwind {
 ; FRAME-NEXT:    pushq %rbp
 ; FRAME-NEXT:    movq %rsp, %rbp
 ; FRAME-NEXT:    push2 %r14, %r15
+; FRAME-NEXT:    pushq %rbp
+; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
+; FRAME-NEXT:    popq %rax
+; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    pop2 %r15, %r14
 ; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    retq
@@ -136,8 +148,12 @@ define void @csr4() nounwind {
 ; FRAME-NEXT:    movq %rsp, %rbp
 ; FRAME-NEXT:    push2 %r14, %r15
 ; FRAME-NEXT:    pushq %r13
+; FRAME-NEXT:    pushq %rbp
+; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
+; FRAME-NEXT:    popq %rax
+; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    popq %r13
 ; FRAME-NEXT:    pop2 %r15, %r14
 ; FRAME-NEXT:    popq %rbp
@@ -178,8 +194,12 @@ define void @csr5() nounwind {
 ; FRAME-NEXT:    movq %rsp, %rbp
 ; FRAME-NEXT:    push2 %r14, %r15
 ; FRAME-NEXT:    push2 %r12, %r13
+; FRAME-NEXT:    pushq %rbp
+; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
+; FRAME-NEXT:    popq %rax
+; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    pop2 %r13, %r12
 ; FRAME-NEXT:    pop2 %r15, %r14
 ; FRAME-NEXT:    popq %rbp
@@ -225,8 +245,12 @@ define void @csr6() nounwind {
 ; FRAME-NEXT:    push2 %r14, %r15
 ; FRAME-NEXT:    push2 %r12, %r13
 ; FRAME-NEXT:    pushq %rbx
+; FRAME-NEXT:    pushq %rbp
+; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
+; FRAME-NEXT:    popq %rax
+; FRAME-NEXT:    popq %rbp
 ; FRAME-NEXT:    popq %rbx
 ; FRAME-NEXT:    pop2 %r13, %r12
 ; FRAME-NEXT:    pop2 %r15, %r14
diff --git a/llvm/test/CodeGen/X86/apx/pushp-popp.ll b/llvm/test/CodeGen/X86/apx/pushp-popp.ll
index ad4306fccce66..625e70b07198e 100644
--- a/llvm/test/CodeGen/X86/apx/pushp-popp.ll
+++ b/llvm/test/CodeGen/X86/apx/pushp-popp.ll
@@ -18,8 +18,12 @@ define void @csr2() nounwind {
 ; FRAME-NEXT:    pushp %rbp
 ; FRAME-NEXT:    movq %rsp, %rbp
 ; FRAME-NEXT:    pushp %r15
+; FRAME-NEXT:    pushp %rbp
+; FRAME-NEXT:    pushq %rax
 ; FRAME-NEXT:    #APP
 ; FRAME-NEXT:    #NO_APP
+; FRAME-NEXT:    popq %rax
+; FRAME-NEXT:    popp %rbp
 ; FRAME-NEXT:    popp %r15
 ; FRAME-NEXT:    popp %rbp
 ; FRAME-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
index 25d182afd66e7..78870278eeace 100644
--- a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
+++ b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
@@ -69,8 +69,12 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
 ; X64-NEXT:    andq $-64, %rsp
 ; X64-NEXT:    subq $128, %rsp
 ; X64-NEXT:    vaddps %zmm1, %zmm0, %zmm0
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    movq %rsp, %rdi
 ; X64-NEXT:    callq _func_float16_ptr
+; X64-NEXT:    addq $8, %rsp
+; X64-NEXT:    popq %rbp
 ; X64-NEXT:    vaddps (%rsp), %zmm0, %zmm0
 ; X64-NEXT:    leaq -16(%rbp), %rsp
 ; X64-NEXT:    popq %r12
@@ -149,8 +153,12 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
 ; X64-NEXT:    subq $128, %rsp
 ; X64-NEXT:    vmovaps %zmm1, %zmm16
 ; X64-NEXT:    vaddps %zmm1, %zmm0, %zmm0
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    movq %rsp, %rdi
 ; X64-NEXT:    callq _func_float16_ptr
+; X64-NEXT:    addq $8, %rsp
+; X64-NEXT:    popq %rbp
 ; X64-NEXT:    vaddps %zmm16, %zmm0, %zmm0
 ; X64-NEXT:    vaddps (%rsp), %zmm0, %zmm0
 ; X64-NEXT:    leaq -16(%rbp), %rsp
diff --git a/llvm/test/CodeGen/X86/clobber_base_ptr.ll b/llvm/test/CodeGen/X86/clobber_base_ptr.ll
index 29894b6d38906..2c39560f02d16 100644
--- a/llvm/test/CodeGen/X86/clobber_base_ptr.ll
+++ b/llvm/test/CodeGen/X86/clobber_base_ptr.ll
@@ -8,8 +8,8 @@ target triple = "i386-pc-windows-gnu"
 ; should save esi using esp before the inline asm, and restore esi after the
 ; inline asm.
 
-define i32 @foo() {
-; CHECK-LABEL: foo:
+define i32 @clober_bp() {
+; CHECK-LABEL: clober_bp:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushl %ebp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
@@ -57,3 +57,62 @@ entry:
   %retval = load i32, ptr %g, align 4
   ret i32 %retval
 }
+
+; This function has the same code except the inline asm also clobbers
+; frame pointer.
+
+define i32 @clobber_bpfp() {
+; CHECK-LABEL: clobber_bpfp:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    .cfi_offset %ebp, -8
+; CHECK-NEXT:    movl %esp, %ebp
+; CHECK-NEXT:    .cfi_def_cfa_register %ebp
+; CHECK-NEXT:    pushl %edi
+; CHECK-NEXT:    pushl %esi
+; CHECK-NEXT:    andl $-16, %esp
+; CHECK-NEXT:    subl $16, %esp
+; CHECK-NEXT:    movl %esp, %esi
+; CHECK-NEXT:    .cfi_offset %esi, -16
+; CHECK-NEXT:    .cfi_offset %edi, -12
+; CHECK-NEXT:    movl $4, 12(%esi)
+; CHECK-NEXT:    movl 12(%esi), %eax
+; CHECK-NEXT:    addl $3, %eax
+; CHECK-NEXT:    andl $-4, %eax
+; CHECK-NEXT:    calll __alloca
+; CHECK-NEXT:    movl %esp, %eax
+; CHECK-NEXT:    andl $-16, %eax
+; CHECK-NEXT:    movl %eax, %esp
+; CHECK-NEXT:    movl $1, (%eax)
+; CHECK-NEXT:    leal 8(%esi), %edi
+; CHECK-NEXT:    movl $4, %ecx
+; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    pushl %esi
+; CHECK-NEXT:    .cfi_remember_state
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x06, 0x74, 0x04, 0x06, 0x11, 0x08, 0x22 #
+; CHECK-NEXT:    movl %eax, %esi
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    rep movsb (%esi), %es:(%edi)
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    popl %ebp
+; CHECK-NEXT:    .cfi_restore_state
+; CHECK-NEXT:    movl 8(%esi), %eax
+; CHECK-NEXT:    leal -8(%ebp), %esp
+; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    popl %edi
+; CHECK-NEXT:    popl %ebp
+; CHECK-NEXT:    retl
+entry:
+  %size = alloca i32, align 4
+  %g = alloca i32, align 4
+  store volatile i32 4, ptr %size, align 4
+  %len = load volatile i32, ptr %size, align 4
+  %var_array = alloca i8, i32 %len, align 16
+  store i32 1, ptr %var_array, align 16
+  %nil = call { ptr, ptr, i32 } asm "rep movsb", "={di},={si},={cx},0,1,2,~{memory},~{dirflag},~{fpsr},~{flags},~{ebp}"(ptr %g, ptr %var_array, i32 4)
+  %retval = load i32, ptr %g, align 4
+  ret i32 %retval
+}
+
diff --git a/llvm/test/CodeGen/X86/clobber_frame_ptr.ll b/llvm/test/CodeGen/X86/clobber_frame_ptr.ll
index 81403e5a64549..ba4fa42362809 100644
--- a/llvm/test/CodeGen/X86/clobber_frame_ptr.ll
+++ b/llvm/test/CodeGen/X86/clobber_frame_ptr.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=x86_64-pc-linux -stackrealign -spill-clobbered-fp=true -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux -stackrealign -verify-machineinstrs < %s | FileCheck %s
 
 ; Calling convention ghccc uses ebp to pass parameter, so calling a function
 ; using ghccc clobbers ebp. We should save and restore ebp around such a call
diff --git a/llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll b/llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll
new file mode 100644
index 0000000000000..25c951d8b1a10
--- /dev/null
+++ b/llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s | FileCheck %s
+
+target triple = "x86_64-linux-gnux32"
+
+define i32 @foo() {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbp, -16
+; CHECK-NEXT:    movq %rsp, %rbp
+; CHECK-NEXT:    .cfi_def_cfa_register %rbp
+; CHECK-NEXT:    subq $16, %rsp
+; CHECK-NEXT:    movl $4, -8(%rbp)
+; CHECK-NEXT:    movl $5, -4(%rbp)
+; CHECK-NEXT:    movl -8(%rbp), %eax
+; CHECK-NEXT:    movq %rsp, %rcx
+; CHECK-NEXT:    addq $15, %rax
+; CHECK-NEXT:    andq $-16, %rax
+; CHECK-NEXT:    movq %rcx, %rdx
+; CHECK-NEXT:    subq %rax, %rdx
+; CHECK-NEXT:    movq %rdx, %rsp
+; CHECK-NEXT:    negq %rax
+; CHECK-NEXT:    movl $1, (%rcx,%rax)
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_remember_state
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x06, 0x77, 0x08, 0x06, 0x11, 0x10, 0x22 #
+; CHECK-NEXT:    movl $123, %ebp
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    addq $8, %rsp
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_restore_state
+; CHECK-NEXT:    movl -4(%rbp), %eax
+; CHECK-NEXT:    movq %rbp, %rsp
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
+; CHECK-NEXT:    retq
+entry:
+  %size = alloca i32, align 4
+  %g = alloca i32, align 4
+  store volatile i32 4, ptr %size, align 4
+  store volatile i32 5, ptr %g, align 4
+  %len = load volatile i32, ptr %size, align 4
+  %var_array = alloca i8, i32 %len, align 16
+  store i32 1, ptr %var_array, align 16
+  call void asm "nop", "{ebp},~{memory}"(i32 123)
+  %retval = load i32, ptr %g, align 4
+  ret i32 %retval
+}
diff --git a/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll b/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll
index 3c98eead8d18f..d3ca872509ad5 100644
--- a/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll
@@ -37,6 +37,8 @@ define void @func() local_unnamed_addr #0 {
 ; CHECK-NEXT:  .Ltmp0:
 ; CHECK-NEXT:    addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %ebx
 ; CHECK-NEXT:    calll static_func
+; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    subl $12, %esp
 ; CHECK-NEXT:    #APP
 ; CHECK-EMPTY:
 ; CHECK-NEXT:    calll static_func
@@ -52,6 +54,8 @@ define void @func() local_unnamed_addr #0 {
 ; CHECK-NEXT:    shrl $0, %esp
 ; CHECK-EMPTY:
 ; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    addl $12, %esp
+; CHECK-NEXT:    popl %ebp
 entry:
   %call = tail call i32 @static_func()
 ;; We test call, CALL, and jmp.
diff --git a/llvm/test/CodeGen/X86/x86-32-intrcc.ll b/llvm/test/CodeGen/X86/x86-32-intrcc.ll
index 3c3944c2082bd..a0f937e2c323b 100644
--- a/llvm/test/CodeGen/X86/x86-32-intrcc.ll
+++ b/llvm/test/CodeGen/X86/x86-32-intrcc.ll
@@ -108,8 +108,10 @@ define x86_intrcc void @test_isr_clobbers(ptr byval(%struct.interrupt_frame) %fr
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    andl $-16, %esp
 ; CHECK-NEXT:    cld
+; CHECK-NEXT:    pushl %ebp
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    leal -12(%ebp), %esp
 ; CHECK-NEXT:    popl %eax
 ; CHECK-NEXT:    popl %ebx
@@ -127,8 +129,10 @@ define x86_intrcc void @test_isr_clobbers(ptr byval(%struct.interrupt_frame) %fr
 ; CHECK0-NEXT:    pushl %eax
 ; CHECK0-NEXT:    andl $-16, %esp
 ; CHECK0-NEXT:    cld
+; CHECK0-NEXT:    pushl %ebp
 ; CHECK0-NEXT:    #APP
 ; CHECK0-NEXT:    #NO_APP
+; CHECK0-NEXT:    popl %ebp
 ; CHECK0-NEXT:    leal -12(%ebp), %esp
 ; CHECK0-NEXT:    popl %eax
 ; CHECK0-NEXT:    popl %ebx
diff --git a/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll b/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll
index 47aefdbf0e466..b4c18dd7f4573 100644
--- a/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll
@@ -94,6 +94,8 @@ define i64 @read_flags_reg_pressure() nounwind {
 ; WIN64-NEXT:    pushq %rbx
 ; WIN64-NEXT:    subq $16, %rsp
 ; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rbp
+; WIN64-NEXT:    pushq %rbp
+; WIN64-NEXT:    pushq %rax
 ; WIN64-NEXT:    #APP
 ; WIN64-NEXT:    #NO_APP
 ; WIN64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -103,6 +105,8 @@ define i64 @read_flags_reg_pressure() nounwind {
 ; WIN64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
 ; WIN64-NEXT:    #APP
 ; WIN64-NEXT:    #NO_APP
+; WIN64-NEXT:    addq $8, %rsp
+; WIN64-NEXT:    popq %rbp
 ; WIN64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
 ; WIN64-NEXT:    addq $16, %rsp
 ; WIN64-NEXT:    popq %rbx
@@ -177,6 +181,8 @@ define void @write_flags_reg_pressure(i64 noundef %0) nounwind {
 ; WIN64-NEXT:    subq $16, %rsp
 ; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rbp
 ; WIN64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; WIN64-NEXT:    pushq %rbp
+; WIN64-NEXT:    pushq %rax
 ; WIN64-NEXT:    #APP
 ; WIN64-NEXT:    #NO_APP
 ; WIN64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -186,6 +192,8 @@ define void @write_flags_reg_pressure(i64 noundef %0) nounwind {
 ; WIN64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
 ; WIN64-NEXT:    #APP
 ; WIN64-NEXT:    #NO_APP
+; WIN64-NEXT:    popq %rax
+; WIN64-NEXT:    popq %rbp
 ; WIN64-NEXT:    addq $16, %rsp
 ; WIN64-NEXT:    popq %rbx
 ; WIN64-NEXT:    popq %rdi

>From 8097e82daf03c8aca79d10c1c03310c3e9968c4c Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Fri, 2 Aug 2024 14:24:58 -0700
Subject: [PATCH 5/6] Refactor the code

Move the duplicated computation of physical register, register class and
stack adjustment to new function saveAndRestoreFPBPUsingSP.
---
 .../llvm/CodeGen/TargetFrameLowering.h        |   3 +-
 llvm/lib/CodeGen/PrologEpilogInserter.cpp     |   3 +
 llvm/lib/Target/X86/X86FrameLowering.cpp      | 101 ++++++++--------
 llvm/lib/Target/X86/X86FrameLowering.h        |   9 +-
 llvm/test/CodeGen/X86/clobber_frame_ptr.ll    | 111 +++++++++++++++++-
 5 files changed, 170 insertions(+), 57 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index 3cc0c7269148b..38260758189b1 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -469,8 +469,7 @@ class TargetFrameLowering {
 
   /// If frame pointer or base pointer is clobbered by an instruction, we should
   /// spill/restore it around that instruction.
-  virtual void spillFPBP(MachineFunction &MF) const {
-  }
+  virtual void spillFPBP(MachineFunction &MF) const {}
 };
 
 } // End llvm namespace
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index db16f01bf0a7a..13b5ca941efac 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -228,6 +228,9 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
   FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF);
   ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
 
+  // Spill frame pointer and/or base pointer registers if they are clobbered.
+  // It is placed before call frame instruction elimination so it will not mess
+  // with stack arguments.
   TFI->spillFPBP(MF);
 
   // Calculate the MaxCallFrameSize value for the function's frame
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 7d07a037b0c36..9202fffaccbbe 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -4232,9 +4232,11 @@ void X86FrameLowering::restoreWinEHStackPointersInParent(
   }
 }
 
-static int computeSPAdjust4SpillFPBP(MachineFunction &MF,
-                                     const TargetRegisterClass *RC,
-                                     unsigned NumSpilledRegs) {
+// Compute the alignment gap between current SP after spilling FP/BP and the
+// next properly aligned stack offset.
+static int computeFPBPAlignmentGap(MachineFunction &MF,
+                                   const TargetRegisterClass *RC,
+                                   unsigned NumSpilledRegs) {
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   unsigned AllocSize = TRI->getSpillSize(*RC) * NumSpilledRegs;
   Align StackAlign = MF.getSubtarget().getFrameLowering()->getStackAlign();
@@ -4244,47 +4246,33 @@ static int computeSPAdjust4SpillFPBP(MachineFunction &MF,
 
 void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF,
                                         MachineBasicBlock::iterator BeforeMI,
-                                        bool SpillFP, bool SpillBP) const {
-  assert(SpillFP || SpillBP);
+                                        Register FP, Register BP,
+                                        int SPAdjust) const {
+  assert(FP.isValid() || BP.isValid());
 
-  const TargetRegisterClass *RC;
-  unsigned NumRegs = 0;
   MachineBasicBlock *MBB = BeforeMI->getParent();
   DebugLoc DL = BeforeMI->getDebugLoc();
 
   // Spill FP.
-  if (SpillFP) {
-    Register FP = TRI->getFrameRegister(MF);
-    if (STI.isTarget64BitILP32())
-      FP = Register(getX86SubSuperRegister(FP, 64));
-    RC = TRI->getMinimalPhysRegClass(FP);
-    ++NumRegs;
-
+  if (FP.isValid()) {
     BuildMI(*MBB, BeforeMI, DL,
             TII.get(getPUSHOpcode(MF.getSubtarget<X86Subtarget>())))
         .addReg(FP);
   }
 
   // Spill BP.
-  if (SpillBP) {
-    Register BP = TRI->getBaseRegister();
-    if (STI.isTarget64BitILP32())
-      BP = Register(getX86SubSuperRegister(BP, 64));
-    RC = TRI->getMinimalPhysRegClass(BP);
-    ++NumRegs;
-
+  if (BP.isValid()) {
     BuildMI(*MBB, BeforeMI, DL,
             TII.get(getPUSHOpcode(MF.getSubtarget<X86Subtarget>())))
         .addReg(BP);
   }
 
   // Make sure SP is aligned.
-  int SPAdjust = computeSPAdjust4SpillFPBP(MF, RC, NumRegs);
   if (SPAdjust)
     emitSPUpdate(*MBB, BeforeMI, DL, -SPAdjust, false);
 
   // Emit unwinding information.
-  if (SpillFP && needsDwarfCFI(MF)) {
+  if (FP.isValid() && needsDwarfCFI(MF)) {
     // Emit .cfi_remember_state to remember old frame.
     unsigned CFIIndex =
         MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr));
@@ -4296,8 +4284,8 @@ void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF,
     SmallString<64> CfaExpr;
     uint8_t buffer[16];
     int Offset = SPAdjust;
-    if (SpillBP)
-      Offset += TRI->getSpillSize(*RC);
+    if (BP.isValid())
+      Offset += TRI->getSpillSize(*TRI->getMinimalPhysRegClass(BP));
     // If BeforeMI is a frame setup instruction, we need to adjust the position
     // and offset of the new cfi instruction.
     if (TII.isFrameSetup(*BeforeMI)) {
@@ -4327,43 +4315,25 @@ void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF,
 
 void X86FrameLowering::restoreFPBPUsingSP(MachineFunction &MF,
                                           MachineBasicBlock::iterator AfterMI,
-                                          bool SpillFP, bool SpillBP) const {
-  assert(SpillFP || SpillBP);
-
-  Register FP, BP;
-  const TargetRegisterClass *RC;
-  unsigned NumRegs = 0;
-  if (SpillFP) {
-    FP = TRI->getFrameRegister(MF);
-    if (STI.isTarget64BitILP32())
-      FP = Register(getX86SubSuperRegister(FP, 64));
-    RC = TRI->getMinimalPhysRegClass(FP);
-    ++NumRegs;
-  }
-  if (SpillBP) {
-    BP = TRI->getBaseRegister();
-    if (STI.isTarget64BitILP32())
-      BP = Register(getX86SubSuperRegister(BP, 64));
-    RC = TRI->getMinimalPhysRegClass(BP);
-    ++NumRegs;
-  }
+                                          Register FP, Register BP,
+                                          int SPAdjust) const {
+  assert(FP.isValid() || BP.isValid());
 
   // Adjust SP so it points to spilled FP or BP.
   MachineBasicBlock *MBB = AfterMI->getParent();
   MachineBasicBlock::iterator Pos = std::next(AfterMI);
   DebugLoc DL = AfterMI->getDebugLoc();
-  int SPAdjust = computeSPAdjust4SpillFPBP(MF, RC, NumRegs);
   if (SPAdjust)
     emitSPUpdate(*MBB, Pos, DL, SPAdjust, false);
 
   // Restore BP.
-  if (SpillBP) {
+  if (BP.isValid()) {
     BuildMI(*MBB, Pos, DL,
             TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), BP);
   }
 
   // Restore FP.
-  if (SpillFP) {
+  if (FP.isValid()) {
     BuildMI(*MBB, Pos, DL,
             TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), FP);
 
@@ -4378,6 +4348,36 @@ void X86FrameLowering::restoreFPBPUsingSP(MachineFunction &MF,
   }
 }
 
+void X86FrameLowering::saveAndRestoreFPBPUsingSP(MachineFunction &MF,
+                                        MachineBasicBlock::iterator BeforeMI,
+                                        MachineBasicBlock::iterator AfterMI,
+                                        bool SpillFP, bool SpillBP) const {
+  assert(SpillFP || SpillBP);
+
+  Register FP, BP;
+  const TargetRegisterClass *RC;
+  unsigned NumRegs = 0;
+
+  if (SpillFP) {
+    FP = TRI->getFrameRegister(MF);
+    if (STI.isTarget64BitILP32())
+      FP = Register(getX86SubSuperRegister(FP, 64));
+    RC = TRI->getMinimalPhysRegClass(FP);
+    ++NumRegs;
+  }
+  if (SpillBP) {
+    BP = TRI->getBaseRegister();
+    if (STI.isTarget64BitILP32())
+      BP = Register(getX86SubSuperRegister(BP, 64));
+    RC = TRI->getMinimalPhysRegClass(BP);
+    ++NumRegs;
+  }
+  int SPAdjust = computeFPBPAlignmentGap(MF, RC, NumRegs);
+
+  spillFPBPUsingSP(MF, BeforeMI, FP, BP, SPAdjust);
+  restoreFPBPUsingSP(MF, AfterMI, FP, BP, SPAdjust);
+}
+
 bool X86FrameLowering::skipSpillFPBP(
     MachineFunction &MF, MachineBasicBlock::reverse_iterator &MI) const {
   if (MI->getOpcode() == X86::LCMPXCHG16B_SAVE_RBX) {
@@ -4547,9 +4547,8 @@ void X86FrameLowering::spillFPBP(MachineFunction &MF) const {
         }
       }
 
-      // Call target functions to spill and restore FP and BP registers.
-      spillFPBPUsingSP(MF, &(*DefMI), SpillFP, SpillBP);
-      restoreFPBPUsingSP(MF, &(*KillMI), SpillFP, SpillBP);
+      // Call target function to spill and restore FP and BP registers.
+      saveAndRestoreFPBPUsingSP(MF, &(*DefMI), &(*KillMI), SpillFP, SpillBP);
     }
   }
 }
diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
index 2d1e35adc589b..31988a2d80c8b 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/llvm/lib/Target/X86/X86FrameLowering.h
@@ -274,13 +274,18 @@ class X86FrameLowering : public TargetFrameLowering {
   /// base pointer to stack using stack pointer register.
   void spillFPBPUsingSP(MachineFunction &MF,
                         const MachineBasicBlock::iterator BeforeMI,
-                        bool SpillFP, bool SpillBP) const;
+                        Register FP, Register BP, int SPAdjust) const;
 
   /// Issue instructions to restore frame pointer and/or base pointer from stack
   /// using stack pointer register, and free stack space.
   void restoreFPBPUsingSP(MachineFunction &MF,
                           const MachineBasicBlock::iterator AfterMI,
-                          bool SpillFP, bool SpillBP) const;
+                          Register FP, Register BP, int SPAdjust) const;
+
+  void saveAndRestoreFPBPUsingSP(MachineFunction &MF,
+                                 MachineBasicBlock::iterator BeforeMI,
+                                 MachineBasicBlock::iterator AfterMI,
+                                 bool SpillFP, bool SpillBP) const;
 
   // If MI uses fp/bp, but target can handle it, and doesn't want to be spilled
   // again, this function should return true, and update MI so we will not check
diff --git a/llvm/test/CodeGen/X86/clobber_frame_ptr.ll b/llvm/test/CodeGen/X86/clobber_frame_ptr.ll
index ba4fa42362809..6209e1a85e9e1 100644
--- a/llvm/test/CodeGen/X86/clobber_frame_ptr.ll
+++ b/llvm/test/CodeGen/X86/clobber_frame_ptr.ll
@@ -7,8 +7,9 @@
 
 declare ghccc i32 @external(i32)
 
-define i32 @foo(i32 %0, i32 %1) {
-; CHECK-LABEL: foo:
+; Basic test with ghccc calling convention.
+define i32 @test1(i32 %0, i32 %1) {
+; CHECK-LABEL: test1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pushq %rbp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
@@ -50,3 +51,109 @@ define i32 @foo(i32 %0, i32 %1) {
     %x = call ghccc i32 @external(i32 %0, i32 %1)
     ret i32 %x
 }
+
+; Calling convention hipe has similar behavior. It clobbers rbp but not rbx.
+
+declare cc 11 i64 @hipe1(i64)
+declare cc 11 i64 @hipe2(i64, i64, i64, i64, i64, i64, i64)
+
+; Basic test with hipe calling convention.
+define i64 @test2(i64 %a0, i64 %a1) {
+; CHECK-LABEL: test2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbp, -16
+; CHECK-NEXT:    movq %rsp, %rbp
+; CHECK-NEXT:    .cfi_def_cfa_register %rbp
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    pushq %r13
+; CHECK-NEXT:    pushq %r12
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    andq $-16, %rsp
+; CHECK-NEXT:    subq $16, %rsp
+; CHECK-NEXT:    .cfi_offset %rbx, -56
+; CHECK-NEXT:    .cfi_offset %r12, -48
+; CHECK-NEXT:    .cfi_offset %r13, -40
+; CHECK-NEXT:    .cfi_offset %r14, -32
+; CHECK-NEXT:    .cfi_offset %r15, -24
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_remember_state
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x06, 0x77, 0x08, 0x06, 0x11, 0x10, 0x22 #
+; CHECK-NEXT:    movq %rsi, %rbp
+; CHECK-NEXT:    movq %rdi, %r15
+; CHECK-NEXT:    callq hipe1 at PLT
+; CHECK-NEXT:    addq $8, %rsp
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_restore_state
+; CHECK-NEXT:    movq %r15, %rax
+; CHECK-NEXT:    leaq -40(%rbp), %rsp
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    popq %r12
+; CHECK-NEXT:    popq %r13
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
+; CHECK-NEXT:    retq
+  %x = call cc 11 i64 @hipe1(i64 %a0, i64 %a1)
+  ret i64 %x
+}
+
+; Test with more arguments, so some of them are passed from stack. The spilling
+; of rbp should not disturb stack arguments.
+; fixme: current generated code is wrong because rbp is used to load passed in
+;        argument after rbp is assigned argument for function call, it is caused
+;        by x86-cf-opt.
+define i64 @test3(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7) {
+; CHECK-LABEL: test3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbp, -16
+; CHECK-NEXT:    movq %rsp, %rbp
+; CHECK-NEXT:    .cfi_def_cfa_register %rbp
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    pushq %r13
+; CHECK-NEXT:    pushq %r12
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    andq $-16, %rsp
+; CHECK-NEXT:    subq $16, %rsp
+; CHECK-NEXT:    .cfi_offset %rbx, -56
+; CHECK-NEXT:    .cfi_offset %r12, -48
+; CHECK-NEXT:    .cfi_offset %r13, -40
+; CHECK-NEXT:    .cfi_offset %r14, -32
+; CHECK-NEXT:    .cfi_offset %r15, -24
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_remember_state
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x06, 0x77, 0x08, 0x06, 0x11, 0x10, 0x22 #
+; CHECK-NEXT:    movq %rsi, %rbp
+; CHECK-NEXT:    movq %rdi, %r15
+; CHECK-NEXT:    movq %rdx, %rsi
+; CHECK-NEXT:    movq %rcx, %rdx
+; CHECK-NEXT:    movq %r8, %rcx
+; CHECK-NEXT:    movq %r9, %r8
+; CHECK-NEXT:    pushq 24(%rbp)
+; CHECK-NEXT:    pushq 16(%rbp)
+; CHECK-NEXT:    callq hipe2 at PLT
+; CHECK-NEXT:    addq $8, %rsp
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_restore_state
+; CHECK-NEXT:    addq $16, %rsp
+; CHECK-NEXT:    movq %r15, %rax
+; CHECK-NEXT:    leaq -40(%rbp), %rsp
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    popq %r12
+; CHECK-NEXT:    popq %r13
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
+; CHECK-NEXT:    retq
+  %x = call cc 11 i64 @hipe2(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7)
+  ret i64 %x
+}

>From d271d0c65d3b40611b3fb932c38257429eb06b4c Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Sun, 4 Aug 2024 20:10:41 -0700
Subject: [PATCH 6/6] Replace report_fatal_error with MCContext::reportError

---
 llvm/lib/CodeGen/CFIInstrInserter.cpp    | 7 ++++---
 llvm/lib/Target/X86/X86FrameLowering.cpp | 7 +++----
 llvm/lib/Target/X86/X86FrameLowering.h   | 4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp
index d6fbb9c02ab97..6c84e5eeacb58 100644
--- a/llvm/lib/CodeGen/CFIInstrInserter.cpp
+++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -25,6 +25,7 @@
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDwarf.h"
 using namespace llvm;
 
@@ -236,7 +237,7 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
         // the same BB, so it will not impact outgoing CFA.
         ++RememberState;
         if (RememberState != 1)
-          report_fatal_error(
+          MF->getContext().reportError(SMLoc(),
               "Support for cfi_remember_state not implemented! Value of CFA "
               "may be incorrect!\n");
 #endif
@@ -246,7 +247,7 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
 #ifndef NDEBUG
         --RememberState;
         if (RememberState != 0)
-          report_fatal_error(
+          MF->getContext().reportError(SMLoc(),
               "Support for cfi_restore_state not implemented! Value of CFA may "
               "be incorrect!\n");
 #endif
@@ -276,7 +277,7 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
 
 #ifndef NDEBUG
   if (RememberState != 0)
-    report_fatal_error(
+    MF->getContext().reportError(SMLoc(),
         "Support for cfi_remember_state not implemented! Value of CFA "
         "may be incorrect!\n");
 #endif
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 9202fffaccbbe..78c6935bb567e 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -4348,10 +4348,9 @@ void X86FrameLowering::restoreFPBPUsingSP(MachineFunction &MF,
   }
 }
 
-void X86FrameLowering::saveAndRestoreFPBPUsingSP(MachineFunction &MF,
-                                        MachineBasicBlock::iterator BeforeMI,
-                                        MachineBasicBlock::iterator AfterMI,
-                                        bool SpillFP, bool SpillBP) const {
+void X86FrameLowering::saveAndRestoreFPBPUsingSP(
+    MachineFunction &MF, MachineBasicBlock::iterator BeforeMI,
+    MachineBasicBlock::iterator AfterMI, bool SpillFP, bool SpillBP) const {
   assert(SpillFP || SpillBP);
 
   Register FP, BP;
diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
index 31988a2d80c8b..f400da0ee7e74 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/llvm/lib/Target/X86/X86FrameLowering.h
@@ -279,8 +279,8 @@ class X86FrameLowering : public TargetFrameLowering {
   /// Issue instructions to restore frame pointer and/or base pointer from stack
   /// using stack pointer register, and free stack space.
   void restoreFPBPUsingSP(MachineFunction &MF,
-                          const MachineBasicBlock::iterator AfterMI,
-                          Register FP, Register BP, int SPAdjust) const;
+                          const MachineBasicBlock::iterator AfterMI, Register FP,
+                          Register BP, int SPAdjust) const;
 
   void saveAndRestoreFPBPUsingSP(MachineFunction &MF,
                                  MachineBasicBlock::iterator BeforeMI,