[llvm] Spill/restore FP/BP around instructions in which they are clobbered (PR #81048)

Tue Feb 13 17:02:58 PST 2024

https://github.com/weiguozhi updated https://github.com/llvm/llvm-project/pull/81048

>From 619b9e24202d50f120a8319d36a4980d017611aa Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Wed, 7 Feb 2024 22:23:45 +0000
Subject: [PATCH 1/2] Spill/restore FP/BP around instructions in which they are
 clobbered

This patch fixes https://github.com/llvm/llvm-project/issues/17204.

If a base pointer is used in a function, and it is clobbered by an
instruction (typically an inline asm), current register allocator can't
handle this situation, so BP becomes garbage after those instructions.
It can also occur to FP in theory.

We can spill and reload FP/BP registers around those instructions. But
normal spill/reload instructions also use FP/BP, so we can't spill them
into normal spill slots, instead we spill them into the top of stack by
using SP register.
---
 llvm/include/llvm/CodeGen/MachineFrameInfo.h  |   1 +
 .../llvm/CodeGen/TargetFrameLowering.h        |  25 ++++
 .../include/llvm/CodeGen/TargetRegisterInfo.h |  10 ++
 llvm/lib/CodeGen/PrologEpilogInserter.cpp     | 124 ++++++++++++++++++
 llvm/lib/Target/X86/X86FrameLowering.cpp      | 111 ++++++++++++++++
 llvm/lib/Target/X86/X86FrameLowering.h        |  14 ++
 llvm/lib/Target/X86/X86RegisterInfo.h         |   6 +
 llvm/test/CodeGen/X86/clobber_base_ptr.ll     |  59 +++++++++
 llvm/test/CodeGen/X86/i386-baseptr.ll         |   4 +
 llvm/test/CodeGen/X86/x86-64-baseptr.ll       |  12 ++
 10 files changed, 366 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/clobber_base_ptr.ll

diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index 7d11d63d4066f4..f4578a6ca6a119 100644
--- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -599,6 +599,7 @@ class MachineFrameInfo {
   /// Return the alignment in bytes that this function must be aligned to,
   /// which is greater than the default stack alignment provided by the target.
   Align getMaxAlign() const { return MaxAlignment; }
+  Align getStackAlignment() const { return StackAlignment; }
 
   /// Make sure the function is at least Align bytes aligned.
   void ensureMaxAlignment(Align Alignment);
diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index 0b9cacecc7cbe1..e8ff1d69b4eb18 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -466,6 +466,31 @@ class TargetFrameLowering {
   /// Return the frame base information to be encoded in the DWARF subprogram
   /// debug info.
   virtual DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const;
+
+  /// Issue instructions to allocate stack space and spill FP and/or BP to stack
+  /// using SP register.
+  virtual void spillFPBPUsingSP(MachineFunction &MF,
+                                const MachineBasicBlock::iterator BeforeMI,
+                                bool SpillFP, bool SpillBP) const {
+    report_fatal_error("spillFPBPUsingSP is not implemented for this target!");
+  }
+
+  /// Issue instructions to restore FP and/or BP from stack using SP register,
+  /// and free stack space.
+  virtual void restoreFPBPUsingSP(MachineFunction &MF,
+                                  const MachineBasicBlock::iterator AfterMI,
+                                  bool SpillFP, bool SpillBP) const {
+    report_fatal_error("restoreFPBPUsingSP is not implemented for this "
+                       "target!");
+  }
+
+  // If MI uses fp/bp, but target can handle it, and doesn't want PEI to spill
+  // it, this function should return true, and update MI so PEI will not check
+  // any instructions from it and later.
+  virtual bool skipSpillFPBP(MachineFunction &MF,
+                             MachineBasicBlock::reverse_iterator &MI) const {
+    return false;
+  }
 };
 
 } // End llvm namespace
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 5098fc68df3b20..f6de9c40335e85 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -963,6 +963,16 @@ class TargetRegisterInfo : public MCRegisterInfo {
     return false;
   }
 
+  /// Returns true if the target needs a base register.
+  virtual bool hasBaseRegister(const MachineFunction &MF) const {
+    return false;
+  }
+
+  /// Returns the physical base register used to access stack object.
+  virtual Register getBaseRegister(const MachineFunction &MF) const {
+    return 0;
+  }
+
   /// Return true if target has reserved a spill slot in the stack frame of
   /// the given function for the specified register. e.g. On x86, if the frame
   /// register is required, the first fixed stack object is reserved as its
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 8af17e63e25c75..040a3f4f951863 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -77,6 +77,15 @@ using MBBVector = SmallVector<MachineBasicBlock *, 4>;
 STATISTIC(NumLeafFuncWithSpills, "Number of leaf functions with CSRs");
 STATISTIC(NumFuncSeen, "Number of functions seen in PEI");
 
+static cl::opt<bool> SpillClobberedFP(
+    "spill-clobbered-fp",
+    cl::desc("Spill clobbered fp register to stack."),
+    cl::init(false), cl::Hidden);
+
+static cl::opt<bool> SpillClobberedBP(
+    "spill-clobbered-bp",
+    cl::desc("Spill clobbered bp register to stack."),
+    cl::init(true), cl::Hidden);
 
 namespace {
 
@@ -122,6 +131,7 @@ class PEI : public MachineFunctionPass {
   void calculateCallFrameInfo(MachineFunction &MF);
   void calculateSaveRestoreBlocks(MachineFunction &MF);
   void spillCalleeSavedRegs(MachineFunction &MF);
+  void spillFrameBasePointer(MachineFunction &MF);
 
   void calculateFrameObjectOffsets(MachineFunction &MF);
   void replaceFrameIndices(MachineFunction &MF);
@@ -228,6 +238,9 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
   FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF);
   ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
 
+  // Handle FP and BP spilling and restoring, for instructions that need it.
+  spillFrameBasePointer(MF);
+
   // Calculate the MaxCallFrameSize and AdjustsStack variables for the
   // function's frame information. Also eliminates call frame pseudo
   // instructions.
@@ -1587,3 +1600,114 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
       ++I;
   }
 }
+
+static bool accessFrameBasePointer(const MachineInstr &MI, Register FP,
+                                   Register BP, bool &AccessFP, bool &AccessBP,
+                                   const TargetRegisterInfo *TRI) {
+  AccessFP = AccessBP = false;
+  if (FP) {
+    if (MI.findRegisterUseOperandIdx(FP, false, TRI) != -1 ||
+        MI.findRegisterDefOperandIdx(FP, false, true, TRI) != -1)
+      AccessFP = true;
+  }
+  if (BP) {
+    if (MI.findRegisterUseOperandIdx(BP, false, TRI) != -1 ||
+        MI.findRegisterDefOperandIdx(BP, false, true, TRI) != -1)
+      AccessBP = true;
+  }
+  return AccessFP || AccessBP;
+}
+
+/// If a function uses base pointer and the base pointer is clobbered by inline
+/// asm, RA doesn't detect this case, and after the inline asm, the base pointer
+/// cotains garbage value.
+/// For example if a 32b x86 function uses base pointer esi, and esi is
+/// clobbered by following inline asm
+///     asm("rep movsb" : "+D"(ptr), "+S"(x), "+c"(c)::"memory");
+/// We need to save esi before the asm and restore it after the asm.
+///
+/// The problem can also occur to frame pointer if there is a function call, and
+/// the callee uses a different calling convention and clobbers the fp.
+///
+/// Because normal frame objects (spill slots) are accessed through fp/bp
+/// register, so we can't spill fp/bp to normal spill slots.
+///
+/// FIXME: There are 2 possible enhancements:
+/// 1. In many cases there are different physical registers not clobbered by
+/// inline asm, we can use one of them as base pointer. Or use a virtual
+/// register as base pointer and let RA allocate a physical register to it.
+/// 2. If there is no other instructions access stack with fp/bp from the
+/// inline asm to the epilog, we can skip the save and restore operations.
+void PEI::spillFrameBasePointer(MachineFunction &MF) {
+  Register FP, BP;
+  const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  if (TFI.hasFP(MF) && SpillClobberedFP)
+    FP = TRI->getFrameRegister(MF);
+  if (TRI->hasBaseRegister(MF) && SpillClobberedBP)
+    BP = TRI->getBaseRegister(MF);
+
+  for (MachineBasicBlock &MBB : MF) {
+    auto MI = MBB.rbegin(), ME = MBB.rend();
+    while (MI != ME) {
+      // Skip frame setup/destroy instructions.
+      // Skip instructions handled by target.
+      if (MI->getFlag(MachineInstr::MIFlag::FrameSetup) ||
+          MI->getFlag(MachineInstr::MIFlag::FrameDestroy) ||
+          TFI.skipSpillFPBP(MF, MI)) {
+        ++MI;
+        continue;
+      }
+
+      bool AccessFP, AccessBP;
+      // Check if fp or bp is used in MI.
+      if (!accessFrameBasePointer(*MI, FP, BP, AccessFP, AccessBP, TRI)) {
+        ++MI;
+        continue;
+      }
+
+      // Look for the range [Start, Stop] in which fp or bp is defined and used.
+      bool FPLive = false, BPLive = false;
+      bool SpillFP = false, SpillBP = false;
+      auto Start = MI, Stop = MI;
+      do {
+        SpillFP |= AccessFP;
+        SpillBP |= AccessBP;
+
+        // Maintain FPLive and BPLive.
+        if (FPLive && MI->findRegisterDefOperandIdx(FP, false, true, TRI) != -1)
+          FPLive = false;
+        if (FP && MI->findRegisterUseOperandIdx(FP, false, TRI) != -1)
+          FPLive = true;
+        if (BPLive && MI->findRegisterDefOperandIdx(BP, false, true, TRI) != -1)
+          BPLive = false;
+        if (BP && MI->findRegisterUseOperandIdx(BP, false, TRI) != -1)
+          BPLive = true;
+
+        Start = MI++;
+      } while ((MI != ME) && (FPLive || BPLive ||
+                accessFrameBasePointer(*MI, FP, BP, AccessFP, AccessBP, TRI)));
+
+      // If the bp is clobbered by a call, we should save and restore outside of
+      // the frame setup instructions.
+      if (Stop->isCall()) {
+        const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+        auto FrameSetup = std::next(Start);
+        while (FrameSetup != ME && !TII.isFrameSetup(*FrameSetup) &&
+               !FrameSetup->isCall())
+          ++FrameSetup;
+        if (FrameSetup != ME && TII.isFrameSetup(*FrameSetup)) {
+          while (!TII.isFrameInstr(*Stop))
+            --Stop;
+          Start = FrameSetup;
+          MI = Start;
+          ++MI;
+        }
+      }
+
+      // Call target functions to spill and restore FP and BP registers.
+      TFI.spillFPBPUsingSP(MF, &(*Start), SpillFP, SpillBP);
+      TFI.restoreFPBPUsingSP(MF, &(*Stop), SpillFP, SpillBP);
+    }
+  }
+}
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index be416fb0db0695..1fe874623bede6 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -4178,3 +4178,114 @@ void X86FrameLowering::restoreWinEHStackPointersInParent(
                                   /*RestoreSP=*/IsSEH);
   }
 }
+
+static int computeSPAdjust4SpillFPBP(MachineFunction &MF,
+                                     const TargetRegisterClass *RC,
+                                     unsigned SpillRegNum) {
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  unsigned AllocSize = TRI->getSpillSize(*RC) * SpillRegNum;
+  Align StackAlign = MF.getFrameInfo().getStackAlignment();
+  unsigned AlignedSize = alignTo(AllocSize, StackAlign);
+  return AlignedSize - AllocSize;
+}
+
+void X86FrameLowering::spillFPBPUsingSP(
+    MachineFunction &MF, MachineBasicBlock::iterator BeforeMI,
+    bool SpillFP, bool SpillBP) const {
+  const TargetRegisterClass *RC;
+  unsigned RegNum = 0;
+  MachineBasicBlock *MBB = BeforeMI->getParent();
+  DebugLoc DL = BeforeMI->getDebugLoc();
+
+  // Spill FP.
+  if (SpillFP) {
+    Register FP = TRI->getFrameRegister(MF);
+    if (STI.isTarget64BitILP32())
+      FP = Register(getX86SubSuperRegister(FP, 64));
+    RC = TRI->getMinimalPhysRegClass(FP);
+    ++RegNum;
+
+    BuildMI(*MBB, BeforeMI, DL,
+            TII.get(getPUSHOpcode(MF.getSubtarget<X86Subtarget>())))
+        .addReg(FP);
+  }
+
+  // Spill BP.
+  if (SpillBP) {
+    Register BP = TRI->getBaseRegister(MF);
+    if (STI.isTarget64BitILP32())
+      BP = Register(getX86SubSuperRegister(BP, 64));
+    RC = TRI->getMinimalPhysRegClass(BP);
+    ++RegNum;
+
+    BuildMI(*MBB, BeforeMI, DL,
+            TII.get(getPUSHOpcode(MF.getSubtarget<X86Subtarget>())))
+        .addReg(BP);
+  }
+
+  // Make sure SP is aligned.
+  int SPAdjust = computeSPAdjust4SpillFPBP(MF, RC, RegNum);
+  if (SPAdjust)
+    emitSPUpdate(*MBB, BeforeMI, DL, -SPAdjust, false);
+}
+
+void X86FrameLowering::restoreFPBPUsingSP(
+    MachineFunction &MF, MachineBasicBlock::iterator AfterMI,
+    bool SpillFP, bool SpillBP) const {
+  Register FP, BP;
+  const TargetRegisterClass *RC;
+  unsigned RegNum = 0;
+  if (SpillFP) {
+    FP = TRI->getFrameRegister(MF);
+    if (STI.isTarget64BitILP32())
+      FP = Register(getX86SubSuperRegister(FP, 64));
+    RC = TRI->getMinimalPhysRegClass(FP);
+    ++RegNum;
+  }
+  if (SpillBP) {
+    BP = TRI->getBaseRegister(MF);
+    if (STI.isTarget64BitILP32())
+      BP = Register(getX86SubSuperRegister(BP, 64));
+    RC = TRI->getMinimalPhysRegClass(BP);
+    ++RegNum;
+  }
+
+  // Adjust SP so it points to spilled FP or BP.
+  MachineBasicBlock *MBB = AfterMI->getParent();
+  MachineBasicBlock::iterator Pos = std::next(AfterMI);
+  DebugLoc DL = AfterMI->getDebugLoc();
+  int SPAdjust = computeSPAdjust4SpillFPBP(MF, RC, RegNum);
+  if (SPAdjust)
+    emitSPUpdate(*MBB, Pos, DL, SPAdjust, false);
+
+  // Restore BP.
+  if (SpillBP) {
+    BuildMI(*MBB, Pos, DL,
+            TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), BP);
+  }
+
+  // Restore FP.
+  if (SpillFP) {
+    BuildMI(*MBB, Pos, DL,
+            TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), FP);
+  }
+}
+
+bool X86FrameLowering::skipSpillFPBP(
+    MachineFunction &MF, MachineBasicBlock::reverse_iterator &MI) const {
+  if (MI->getOpcode() == X86::LCMPXCHG16B_SAVE_RBX) {
+    // The pseudo instruction LCMPXCHG16B_SAVE_RBX is generated in the form
+    //     SaveRbx = COPY RBX
+    //     SaveRbx = LCMPXCHG16B_SAVE_RBX ..., SaveRbx, implicit-def rbx
+    // And later LCMPXCHG16B_SAVE_RBX is expanded to restore RBX from SaveRbx.
+    // We should skip this instruction sequence.
+    int FI;
+    unsigned Reg;
+    while (!(MI->getOpcode() == TargetOpcode::COPY &&
+             MI->getOperand(1).getReg() == X86::RBX) &&
+           !((Reg = TII.isStoreToStackSlot(*MI, FI)) && Reg == X86::RBX))
+      ++MI;
+    return true;
+  }
+  return false;
+}
diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
index 2dc9ecc6109d78..0d77933363ce4f 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/llvm/lib/Target/X86/X86FrameLowering.h
@@ -103,6 +103,20 @@ class X86FrameLowering : public TargetFrameLowering {
                               MutableArrayRef<CalleeSavedInfo> CSI,
                               const TargetRegisterInfo *TRI) const override;
 
+  void
+  spillFPBPUsingSP(MachineFunction &MF,
+                   const MachineBasicBlock::iterator BeforeMI,
+                   bool SpillFP, bool SpillBP) const override;
+
+  void
+  restoreFPBPUsingSP(MachineFunction &MF,
+                     const MachineBasicBlock::iterator AfterMI,
+                     bool SpillFP, bool SpillBP) const override;
+
+  bool
+  skipSpillFPBP(MachineFunction &MF,
+                MachineBasicBlock::reverse_iterator &MI) const override;
+
   bool hasFP(const MachineFunction &MF) const override;
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
   bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override;
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h
index 7296a5f021e4ad..cccb731d243540 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -133,6 +133,9 @@ class X86RegisterInfo final : public X86GenRegisterInfo {
   void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
 
   bool hasBasePointer(const MachineFunction &MF) const;
+  bool hasBaseRegister(const MachineFunction &MF) const override {
+    return hasBasePointer(MF);
+  }
 
   bool canRealignStack(const MachineFunction &MF) const override;
 
@@ -164,6 +167,9 @@ class X86RegisterInfo final : public X86GenRegisterInfo {
   unsigned getPtrSizedStackRegister(const MachineFunction &MF) const;
   Register getStackRegister() const { return StackPtr; }
   Register getBaseRegister() const { return BasePtr; }
+  Register getBaseRegister(const MachineFunction &MF) const override {
+    return BasePtr;
+  }
   /// Returns physical register used as frame pointer.
   /// This will always returns the frame pointer register, contrary to
   /// getFrameRegister() which returns the "base pointer" in situations
diff --git a/llvm/test/CodeGen/X86/clobber_base_ptr.ll b/llvm/test/CodeGen/X86/clobber_base_ptr.ll
new file mode 100644
index 00000000000000..29894b6d389068
--- /dev/null
+++ b/llvm/test/CodeGen/X86/clobber_base_ptr.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i386-pc-windows-gnu"
+
+; This function uses esi as base pointer, the inline asm clobbers esi, so we
+; should save esi using esp before the inline asm, and restore esi after the
+; inline asm.
+
+define i32 @foo() {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    .cfi_offset %ebp, -8
+; CHECK-NEXT:    movl %esp, %ebp
+; CHECK-NEXT:    .cfi_def_cfa_register %ebp
+; CHECK-NEXT:    pushl %edi
+; CHECK-NEXT:    pushl %esi
+; CHECK-NEXT:    andl $-16, %esp
+; CHECK-NEXT:    subl $16, %esp
+; CHECK-NEXT:    movl %esp, %esi
+; CHECK-NEXT:    .cfi_offset %esi, -16
+; CHECK-NEXT:    .cfi_offset %edi, -12
+; CHECK-NEXT:    movl $4, 12(%esi)
+; CHECK-NEXT:    movl 12(%esi), %eax
+; CHECK-NEXT:    addl $3, %eax
+; CHECK-NEXT:    andl $-4, %eax
+; CHECK-NEXT:    calll __alloca
+; CHECK-NEXT:    movl %esp, %eax
+; CHECK-NEXT:    andl $-16, %eax
+; CHECK-NEXT:    movl %eax, %esp
+; CHECK-NEXT:    movl $1, (%eax)
+; CHECK-NEXT:    leal 8(%esi), %edi
+; CHECK-NEXT:    movl $4, %ecx
+; CHECK-NEXT:    pushl %esi
+; CHECK-NEXT:    movl %eax, %esi
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    rep movsb (%esi), %es:(%edi)
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    movl 8(%esi), %eax
+; CHECK-NEXT:    leal -8(%ebp), %esp
+; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    popl %edi
+; CHECK-NEXT:    popl %ebp
+; CHECK-NEXT:    retl
+entry:
+  %size = alloca i32, align 4
+  %g = alloca i32, align 4
+  store volatile i32 4, ptr %size, align 4
+  %len = load volatile i32, ptr %size, align 4
+  %var_array = alloca i8, i32 %len, align 16
+  store i32 1, ptr %var_array, align 16
+  %nil = call { ptr, ptr, i32 } asm "rep movsb", "={di},={si},={cx},0,1,2,~{memory},~{dirflag},~{fpsr},~{flags}"(ptr %g, ptr %var_array, i32 4)
+  %retval = load i32, ptr %g, align 4
+  ret i32 %retval
+}
diff --git a/llvm/test/CodeGen/X86/i386-baseptr.ll b/llvm/test/CodeGen/X86/i386-baseptr.ll
index 08e4bde7353a42..777eb838b84cc7 100644
--- a/llvm/test/CodeGen/X86/i386-baseptr.ll
+++ b/llvm/test/CodeGen/X86/i386-baseptr.ll
@@ -109,10 +109,14 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32
 ; CHECK-NEXT:    subl %eax, %edx
 ; CHECK-NEXT:    movl %edx, %esp
 ; CHECK-NEXT:    negl %eax
+; CHECK-NEXT:    pushl %esi
+; CHECK-NEXT:    subl $28, %esp
 ; CHECK-NEXT:    movl $405, %esi # imm = 0x195
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    addl $28, %esp
+; CHECK-NEXT:    popl %esi
 ; CHECK-NEXT:    movl $405, %ebx # imm = 0x195
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    nop
diff --git a/llvm/test/CodeGen/X86/x86-64-baseptr.ll b/llvm/test/CodeGen/X86/x86-64-baseptr.ll
index 8cda4ba2814bba..020004def6e7ad 100644
--- a/llvm/test/CodeGen/X86/x86-64-baseptr.ll
+++ b/llvm/test/CodeGen/X86/x86-64-baseptr.ll
@@ -136,10 +136,14 @@ define void @clobber_base() #0 {
 ; X32ABI-NEXT:    subl %eax, %edx
 ; X32ABI-NEXT:    negl %eax
 ; X32ABI-NEXT:    movl %edx, %esp
+; X32ABI-NEXT:    pushq %rbx
+; X32ABI-NEXT:    subl $24, %esp
 ; X32ABI-NEXT:    movl $405, %ebx # imm = 0x195
 ; X32ABI-NEXT:    #APP
 ; X32ABI-NEXT:    nop
 ; X32ABI-NEXT:    #NO_APP
+; X32ABI-NEXT:    addl $24, %esp
+; X32ABI-NEXT:    popq %rbx
 ; X32ABI-NEXT:    movl $8, %edx
 ; X32ABI-NEXT:    #APP
 ; X32ABI-NEXT:    movl %edx, (%ebx)
@@ -268,6 +272,8 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32
 ; X32ABI-NEXT:    subl %eax, %edx
 ; X32ABI-NEXT:    negl %eax
 ; X32ABI-NEXT:    movl %edx, %esp
+; X32ABI-NEXT:    pushq %rbx
+; X32ABI-NEXT:    subl $24, %esp
 ; X32ABI-NEXT:    movl $405, %ebx # imm = 0x195
 ; X32ABI-NEXT:    #APP
 ; X32ABI-NEXT:    nop
@@ -275,6 +281,8 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32
 ; X32ABI-NEXT:    #APP
 ; X32ABI-NEXT:    nop
 ; X32ABI-NEXT:    #NO_APP
+; X32ABI-NEXT:    addl $24, %esp
+; X32ABI-NEXT:    popq %rbx
 ; X32ABI-NEXT:    movl $8, %edx
 ; X32ABI-NEXT:    #APP
 ; X32ABI-NEXT:    movl %edx, (%ebx)
@@ -385,10 +393,14 @@ define void @vmw_host_printf(ptr %fmt, ...) nounwind {
 ; X32ABI-NEXT:    movl $48, (%eax)
 ; X32ABI-NEXT:    movl $8, (%eax)
 ; X32ABI-NEXT:    xorl %eax, %eax
+; X32ABI-NEXT:    pushq %rbx
+; X32ABI-NEXT:    subl $24, %esp
 ; X32ABI-NEXT:    xorl %ebx, %ebx
 ; X32ABI-NEXT:    xorl %ecx, %ecx
 ; X32ABI-NEXT:    #APP
 ; X32ABI-NEXT:    #NO_APP
+; X32ABI-NEXT:    addl $24, %esp
+; X32ABI-NEXT:    popq %rbx
 ; X32ABI-NEXT:    leal -8(%ebp), %esp
 ; X32ABI-NEXT:    popq %rbx
 ; X32ABI-NEXT:    popq %rbp

>From c70ab81cda57d5ea7131c569e99053b219495a26 Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Wed, 14 Feb 2024 01:01:48 +0000
Subject: [PATCH 2/2] Spill/restore FP/BP around instructions in which they are
 clobbered

Add a new test case to demostrate save/restore of FP register.
---
 llvm/test/CodeGen/X86/clobber_frame_ptr.ll | 49 ++++++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/clobber_frame_ptr.ll

diff --git a/llvm/test/CodeGen/X86/clobber_frame_ptr.ll b/llvm/test/CodeGen/X86/clobber_frame_ptr.ll
new file mode 100644
index 00000000000000..591676bf3f15cd
--- /dev/null
+++ b/llvm/test/CodeGen/X86/clobber_frame_ptr.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=x86_64-pc-linux -stackrealign -spill-clobbered-fp=true -verify-machineinstrs < %s | FileCheck %s
+
+; Calling convention ghccc uses ebp to pass parameter, so calling a function
+; using ghccc clobbers ebp. We should save and restore ebp around such a call
+; if ebp is used as frame pointer.
+
+declare ghccc i32 @external(i32)
+
+define i32 @foo(i32 %0, i32 %1) {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbp, -16
+; CHECK-NEXT:    movq %rsp, %rbp
+; CHECK-NEXT:    .cfi_def_cfa_register %rbp
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    pushq %r13
+; CHECK-NEXT:    pushq %r12
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    andq $-16, %rsp
+; CHECK-NEXT:    subq $16, %rsp
+; CHECK-NEXT:    .cfi_offset %rbx, -56
+; CHECK-NEXT:    .cfi_offset %r12, -48
+; CHECK-NEXT:    .cfi_offset %r13, -40
+; CHECK-NEXT:    .cfi_offset %r14, -32
+; CHECK-NEXT:    .cfi_offset %r15, -24
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    movl %esi, %ebp
+; CHECK-NEXT:    movq %rdi, %r13
+; CHECK-NEXT:    callq external at PLT
+; CHECK-NEXT:    addq $8, %rsp
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    leaq -40(%rbp), %rsp
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    popq %r12
+; CHECK-NEXT:    popq %r13
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
+; CHECK-NEXT:    retq
+    %x = call ghccc i32 @external(i32 %0, i32 %1)
+    ret i32 %x
+}