[llvm] [ARM] Optimise non-ABI frame pointers (PR #110286)

via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 27 08:31:10 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-arm

Author: Oliver Stannard (ostannard)

<details>
<summary>Changes</summary>

With -fomit-frame-pointer, even if we set up a frame pointer for other
reasons (e.g. variable-sized or over-aligned stack allocations), we
don't need to create an ABI-compliant frame record. This means that we
can save all of the general-purpose registers in one push, instead of
splitting it to ensure that the frame pointer and link register are
adjacent on the stack, saving two instructions per function.

The first 7 patches here are also included in #<!-- -->110283 and #<!-- -->110285, so the NFC and bug-fix parts can be reviewed separately.

---

Patch is 107.75 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/110286.diff


19 Files Affected:

- (modified) llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp (+18-11) 
- (modified) llvm/lib/Target/ARM/ARMBaseRegisterInfo.h (-74) 
- (modified) llvm/lib/Target/ARM/ARMCallingConv.td (+11-8) 
- (modified) llvm/lib/Target/ARM/ARMFrameLowering.cpp (+361-238) 
- (modified) llvm/lib/Target/ARM/ARMFrameLowering.h (+3-4) 
- (modified) llvm/lib/Target/ARM/ARMSubtarget.cpp (+35-5) 
- (modified) llvm/lib/Target/ARM/ARMSubtarget.h (+44-13) 
- (modified) llvm/lib/Target/ARM/Thumb1FrameLowering.cpp (+5-8) 
- (modified) llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll (+27-20) 
- (modified) llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll (+97-63) 
- (added) llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll (+390) 
- (modified) llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll (+21-15) 
- (modified) llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll (+67-59) 
- (modified) llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll (+106-92) 
- (modified) llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll (+38-26) 
- (modified) llvm/test/CodeGen/Thumb2/pacbti-m-stack-arg.ll (+3-6) 
- (modified) llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll (+45-28) 
- (modified) llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll (+51-34) 
- (modified) llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll (+84-26) 


``````````diff
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index c149db3144c7c2..d2e91b02e55b50 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -62,27 +62,30 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo()
 const MCPhysReg*
 ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>();
-  bool UseSplitPush = STI.splitFramePushPop(*MF);
+  ARMSubtarget::PushPopSplitVariation PushPopSplit =
+                                      STI.getPushPopSplitVariation(*MF);
   const Function &F = MF->getFunction();
 
   if (F.getCallingConv() == CallingConv::GHC) {
     // GHC set of callee saved regs is empty as all those regs are
     // used for passing STG regs around
     return CSR_NoRegs_SaveList;
-  } else if (STI.splitFramePointerPush(*MF)) {
+  } else if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
     return CSR_Win_SplitFP_SaveList;
   } else if (F.getCallingConv() == CallingConv::CFGuard_Check) {
     return CSR_Win_AAPCS_CFGuard_Check_SaveList;
   } else if (F.getCallingConv() == CallingConv::SwiftTail) {
-    return STI.isTargetDarwin()
-               ? CSR_iOS_SwiftTail_SaveList
-               : (UseSplitPush ? CSR_ATPCS_SplitPush_SwiftTail_SaveList
-                               : CSR_AAPCS_SwiftTail_SaveList);
+    return STI.isTargetDarwin() ? CSR_iOS_SwiftTail_SaveList
+                                : (PushPopSplit == ARMSubtarget::SplitR7
+                                       ? CSR_ATPCS_SplitPush_SwiftTail_SaveList
+                                       : CSR_AAPCS_SwiftTail_SaveList);
   } else if (F.hasFnAttribute("interrupt")) {
     if (STI.isMClass()) {
       // M-class CPUs have hardware which saves the registers needed to allow a
       // function conforming to the AAPCS to function as a handler.
-      return UseSplitPush ? CSR_ATPCS_SplitPush_SaveList : CSR_AAPCS_SaveList;
+      return PushPopSplit == ARMSubtarget::SplitR7
+                 ? CSR_ATPCS_SplitPush_SaveList
+                 : CSR_AAPCS_SaveList;
     } else if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") {
       // Fast interrupt mode gives the handler a private copy of R8-R14, so less
       // need to be saved to restore user-mode state.
@@ -99,8 +102,9 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     if (STI.isTargetDarwin())
       return CSR_iOS_SwiftError_SaveList;
 
-    return UseSplitPush ? CSR_ATPCS_SplitPush_SwiftError_SaveList :
-      CSR_AAPCS_SwiftError_SaveList;
+    return PushPopSplit == ARMSubtarget::SplitR7
+               ? CSR_ATPCS_SplitPush_SwiftError_SaveList
+               : CSR_AAPCS_SwiftError_SaveList;
   }
 
   if (STI.isTargetDarwin() && F.getCallingConv() == CallingConv::CXX_FAST_TLS)
@@ -111,10 +115,13 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   if (STI.isTargetDarwin())
     return CSR_iOS_SaveList;
 
-  if (UseSplitPush)
-    return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_SaveList
+  if (PushPopSplit == ARMSubtarget::SplitR7)
+    return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_R7_SaveList
                                        : CSR_ATPCS_SplitPush_SaveList;
 
+  if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
+    return CSR_AAPCS_SplitPush_R11_SaveList;
+
   return CSR_AAPCS_SaveList;
 }
 
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index 926d702b4092a5..5d465f51ed1d9e 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -41,80 +41,6 @@ namespace ARMRI {
 
 } // end namespace ARMRI
 
-/// isARMArea1Register - Returns true if the register is a low register (r0-r7)
-/// or a stack/pc register that we should push/pop.
-static inline bool isARMArea1Register(unsigned Reg, bool SplitFramePushPop) {
-  using namespace ARM;
-
-  switch (Reg) {
-    case R0:  case R1:  case R2:  case R3:
-    case R4:  case R5:  case R6:  case R7:
-    case LR:  case SP:  case PC:
-      return true;
-    case R8:  case R9:  case R10: case R11: case R12:
-      // For iOS we want r7 and lr to be next to each other.
-      return !SplitFramePushPop;
-    default:
-      return false;
-  }
-}
-
-static inline bool isARMArea2Register(unsigned Reg, bool SplitFramePushPop) {
-  using namespace ARM;
-
-  switch (Reg) {
-    case R8: case R9: case R10: case R11: case R12:
-      // iOS has this second area.
-      return SplitFramePushPop;
-    default:
-      return false;
-  }
-}
-
-static inline bool isSplitFPArea1Register(unsigned Reg,
-                                          bool SplitFramePushPop) {
-  using namespace ARM;
-
-  switch (Reg) {
-    case R0:  case R1:  case R2:  case R3:
-    case R4:  case R5:  case R6:  case R7:
-    case R8:  case R9:  case R10: case R12:
-    case SP:  case PC:
-      return true;
-    default:
-      return false;
-  }
-}
-
-static inline bool isSplitFPArea2Register(unsigned Reg,
-                                          bool SplitFramePushPop) {
-  using namespace ARM;
-
-  switch (Reg) {
-    case R11: case LR:
-      return true;
-    default:
-      return false;
-  }
-}
-
-static inline bool isARMArea3Register(unsigned Reg, bool SplitFramePushPop) {
-  using namespace ARM;
-
-  switch (Reg) {
-    case D15: case D14: case D13: case D12:
-    case D11: case D10: case D9:  case D8:
-    case D7:  case D6:  case D5:  case D4:
-    case D3:  case D2:  case D1:  case D0:
-    case D31: case D30: case D29: case D28:
-    case D27: case D26: case D25: case D24:
-    case D23: case D22: case D21: case D20:
-    case D19: case D18: case D17: case D16:
-      return true;
-    default:
-      return false;
-  }
-}
 
 static inline bool isCalleeSavedRegister(unsigned Reg,
                                          const MCPhysReg *CSRegs) {
diff --git a/llvm/lib/Target/ARM/ARMCallingConv.td b/llvm/lib/Target/ARM/ARMCallingConv.td
index d14424c2decac3..27f175a7003366 100644
--- a/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -301,14 +301,17 @@ def CSR_ATPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush,
 def CSR_ATPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush,
                                                      R10)>;
 
-// When enforcing an AAPCS compliant frame chain, R11 is used as the frame
-// pointer even for Thumb targets, where split pushes are necessary.
-// This AAPCS alternative makes sure the frame index slots match the push
-// order in that case.
-def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R11,
-                                               R7, R6, R5, R4,
-                                               R10, R9, R8,
-                                               (sequence "D%u", 15, 8))>;
+// Sometimes we need to split the push of the callee-saved GPRs into two
+// regions, to ensure that the frame chain record is set up correctly. These
+// list the callee-saved registers in the order they end up on the stack, which
+// depends on whether the frame pointer is r7 or r11.
+def CSR_AAPCS_SplitPush_R11 : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4,
+                                                   LR, R11,
+                                                   (sequence "D%u", 15, 8))>;
+def CSR_AAPCS_SplitPush_R7 : CalleeSavedRegs<(add LR, R11,
+                                                  R7, R6, R5, R4,
+                                                  R10, R9, R8,
+                                                  (sequence "D%u", 15, 8))>;
 
 // Constructors and destructors return 'this' in the ARM C++ ABI; since 'this'
 // and the pointer return value are both passed in R0 in these cases, this can
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 40354f99559896..e02bb170fd3f54 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -173,6 +173,101 @@ static MachineBasicBlock::iterator
 skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
                         unsigned NumAlignedDPRCS2Regs);
 
+enum class SpillArea {
+  GPRCS1,
+  GPRCS2,
+  DPRCS1,
+  DPRCS2,
+  FPCXT,
+};
+
+/// Get the spill area that Reg should be saved into in the prologue.
+SpillArea getSpillArea(Register Reg,
+                       ARMSubtarget::PushPopSplitVariation Variation,
+                       unsigned NumAlignedDPRCS2Regs,
+                       const ARMBaseRegisterInfo *RegInfo) {
+  // NoSplit:
+  // push {r0-r12, lr}   GPRCS1
+  // vpush {r8-d15}      DPRCS1
+  //
+  // SplitR7:
+  // push {r0-r7, lr}    GPRCS1
+  // push {r8-r12}       GPRCS2
+  // vpush {r8-d15}      DPRCS1
+  //
+  // SplitR11WindowsSEH:
+  // push {r0-r10, r12}  GPRCS1
+  // vpush {r8-d15}      DPRCS1
+  // push {r11, lr}      GPRCS2
+  //
+  // SplitR11AAPCSSignRA:
+  // push {r0-r10, r12}  GPRSC1
+  // push {r11, lr}      GPRCS2
+  // vpush {r8-d15}      DPRCS1
+
+  // If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
+  // the top of the stack frame.
+  // The DPRCS2 region is used for ABIs which only guarantee 4-byte alignment
+  // of SP. If used, it will be below the other save areas, after the stack has
+  // been re-aligned.
+
+  switch (Reg) {
+  default:
+    dbgs() << "Don't know where to spill " << printReg(Reg, RegInfo) << "\n";
+    llvm_unreachable("Don't know where to spill this register");
+    break;
+
+  case ARM::FPCXTNS:
+    return SpillArea::FPCXT;
+
+  case ARM::R0: case ARM::R1: case ARM::R2: case ARM::R3:
+  case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7:
+    return SpillArea::GPRCS1;
+
+  case ARM::R8: case ARM::R9: case ARM::R10:
+    if (Variation == ARMSubtarget::SplitR7)
+      return SpillArea::GPRCS2;
+    else
+      return SpillArea::GPRCS1;
+
+  case ARM::R11:
+    if (Variation == ARMSubtarget::NoSplit)
+      return SpillArea::GPRCS1;
+    else
+      return SpillArea::GPRCS2;
+
+  case ARM::R12:
+    if (Variation == ARMSubtarget::SplitR7)
+      return SpillArea::GPRCS2;
+    else
+      return SpillArea::GPRCS1;
+
+  case ARM::LR:
+    if (Variation == ARMSubtarget::SplitR11WindowsSEH ||
+        Variation == ARMSubtarget::SplitR11AAPCSSignRA)
+      return SpillArea::GPRCS2;
+    else
+      return SpillArea::GPRCS1;
+
+  case ARM::D0: case ARM::D1: case ARM::D2: case ARM::D3:
+  case ARM::D4: case ARM::D5: case ARM::D6: case ARM::D7:
+    return SpillArea::DPRCS1;
+
+  case ARM::D8:  case ARM::D9:  case ARM::D10: case ARM::D11:
+  case ARM::D12: case ARM::D13: case ARM::D14: case ARM::D15:
+    if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
+      return SpillArea::DPRCS2;
+    else
+      return SpillArea::DPRCS1;
+
+  case ARM::D16: case ARM::D17: case ARM::D18: case ARM::D19:
+  case ARM::D20: case ARM::D21: case ARM::D22: case ARM::D23:
+  case ARM::D24: case ARM::D25: case ARM::D26: case ARM::D27:
+  case ARM::D28: case ARM::D29: case ARM::D30: case ARM::D31:
+    return SpillArea::DPRCS1;
+  }
+}
+
 ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
     : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),
       STI(sti) {}
@@ -600,6 +695,14 @@ struct StackAdjustingInsts {
     MachineBasicBlock::iterator I;
     unsigned SPAdjust;
     bool BeforeFPSet;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+    void dump() {
+      dbgs() << "  " << (BeforeFPSet ? "before-fp " : "          ")
+             << "sp-adjust=" << SPAdjust;
+      I->dump();
+    }
+#endif
   };
 
   SmallVector<InstInfo, 4> Insts;
@@ -634,6 +737,14 @@ struct StackAdjustingInsts {
               .setMIFlags(MachineInstr::FrameSetup);
     }
   }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  void dump() {
+    dbgs() << "StackAdjustingInsts:\n";
+    for (auto &Info : Insts)
+      Info.dump();
+  }
+#endif
 };
 
 } // end anonymous namespace
@@ -713,6 +824,8 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
 /// this to produce a conservative estimate that we check in an assert() later.
 static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
                           const MachineFunction &MF) {
+  ARMSubtarget::PushPopSplitVariation PushPopSplit =
+                                      STI.getPushPopSplitVariation(MF);
   // For Thumb1, push.w isn't available, so the first push will always push
   // r7 and lr onto the stack first.
   if (AFI.isThumb1OnlyFunction())
@@ -720,9 +833,11 @@ static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
   // This is a conservative estimation: Assume the frame pointer being r7 and
   // pc("r15") up to r8 getting spilled before (= 8 registers).
   int MaxRegBytes = 8 * 4;
-  if (STI.splitFramePointerPush(MF)) {
-    // Here, r11 can be stored below all of r4-r15 (3 registers more than
-    // above), plus d8-d15.
+  if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
+    // Here, r11 can be stored below all of r4-r15.
+    MaxRegBytes = 11 * 4;
+  if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
+    // Here, r11 can be stored below all of r4-r15 plus d8-d15.
     MaxRegBytes = 11 * 4 + 8 * 8;
   }
   int FPCXTSaveSize =
@@ -749,6 +864,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
   int FPCXTSaveSize = 0;
   bool NeedsWinCFI = needsWinCFI(MF);
+  ARMSubtarget::PushPopSplitVariation PushPopSplit =
+                                      STI.getPushPopSplitVariation(MF);
+
+  LLVM_DEBUG(dbgs() << "Emitting prologue for " << MF.getName() << "\n");
 
   // Debug location must be unknown since the first debug location is used
   // to determine the end of the prologue.
@@ -788,81 +907,38 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
     return;
   }
 
-  // Determine spill area sizes.
-  if (STI.splitFramePointerPush(MF)) {
-    for (const CalleeSavedInfo &I : CSI) {
-      Register Reg = I.getReg();
-      int FI = I.getFrameIdx();
-      switch (Reg) {
-      case ARM::R11:
-      case ARM::LR:
-        if (Reg == FramePtr)
-          FramePtrSpillFI = FI;
-        GPRCS2Size += 4;
-        break;
-      case ARM::R0:
-      case ARM::R1:
-      case ARM::R2:
-      case ARM::R3:
-      case ARM::R4:
-      case ARM::R5:
-      case ARM::R6:
-      case ARM::R7:
-      case ARM::R8:
-      case ARM::R9:
-      case ARM::R10:
-      case ARM::R12:
-        GPRCS1Size += 4;
-        break;
-      case ARM::FPCXTNS:
-        FPCXTSaveSize = 4;
-        break;
-      default:
-        // This is a DPR. Exclude the aligned DPRCS2 spills.
-        if (Reg == ARM::D8)
-          D8SpillFI = FI;
-        if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
-          DPRCSSize += 8;
-      }
+  // Determine spill area sizes, and some important frame indices.
+  SpillArea FramePtrSpillArea;
+  bool BeforeFPPush = true;
+  for (const CalleeSavedInfo &I : CSI) {
+    Register Reg = I.getReg();
+    int FI = I.getFrameIdx();
+
+    SpillArea Area = getSpillArea(Reg, PushPopSplit,
+                                  AFI->getNumAlignedDPRCS2Regs(), RegInfo);
+
+    if (Reg == FramePtr) {
+      FramePtrSpillFI = FI;
+      FramePtrSpillArea = Area;
     }
-  } else {
-    for (const CalleeSavedInfo &I : CSI) {
-      Register Reg = I.getReg();
-      int FI = I.getFrameIdx();
-      switch (Reg) {
-      case ARM::R8:
-      case ARM::R9:
-      case ARM::R10:
-      case ARM::R11:
-      case ARM::R12:
-        if (STI.splitFramePushPop(MF)) {
-          GPRCS2Size += 4;
-          break;
-        }
-        [[fallthrough]];
-      case ARM::R0:
-      case ARM::R1:
-      case ARM::R2:
-      case ARM::R3:
-      case ARM::R4:
-      case ARM::R5:
-      case ARM::R6:
-      case ARM::R7:
-      case ARM::LR:
-        if (Reg == FramePtr)
-          FramePtrSpillFI = FI;
-        GPRCS1Size += 4;
-        break;
-      case ARM::FPCXTNS:
-        FPCXTSaveSize = 4;
-        break;
-      default:
-        // This is a DPR. Exclude the aligned DPRCS2 spills.
-        if (Reg == ARM::D8)
-          D8SpillFI = FI;
-        if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
-          DPRCSSize += 8;
-      }
+    if (Reg == ARM::D8)
+      D8SpillFI = FI;
+
+    switch (Area) {
+    case SpillArea::FPCXT:
+      FPCXTSaveSize += 4;
+      break;
+    case SpillArea::GPRCS1:
+      GPRCS1Size += 4;
+      break;
+    case SpillArea::GPRCS2:
+      GPRCS2Size += 4;
+      break;
+    case SpillArea::DPRCS1:
+      DPRCSSize += 8;
+      break;
+    case SpillArea::DPRCS2:
+      break;
     }
   }
 
@@ -875,7 +951,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   // Move past FPCXT area.
   if (FPCXTSaveSize > 0) {
     LastPush = MBBI++;
-    DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true);
+    DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, BeforeFPPush);
   }
 
   // Allocate the vararg register save area.
@@ -883,39 +959,45 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
     emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
                  MachineInstr::FrameSetup);
     LastPush = std::prev(MBBI);
-    DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true);
+    DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, BeforeFPPush);
   }
 
   // Move past area 1.
   if (GPRCS1Size > 0) {
     GPRCS1Push = LastPush = MBBI++;
-    DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
+    DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, BeforeFPPush);
+    if (FramePtrSpillArea == SpillArea::GPRCS1)
+      BeforeFPPush = false;
   }
 
-  // Determine starting offsets of spill areas.
+  // Determine starting offsets of spill areas. These offsets are all positive
+  // offsets from the bottom of the lowest-addressed callee-save area
+  // (excluding DPRCS2, which is th the re-aligned stack region) to the bottom
+  // of the spill area in question.
   unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
   unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
   unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
   Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);
   unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
-  if (!STI.splitFramePointerPush(MF)) {
+  if (PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
     DPRGapSize += GPRCS2Size;
   }
   DPRGapSize %= DPRAlign.value();
 
   unsigned DPRCSOffset;
-  if (STI.splitFramePointerPush(MF)) {
+  if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
     DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
     GPRCS2Offset = DPRCSOffset - GPRCS2Size;
   } else {
     DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
   }
-  int FramePtrOffsetInPush = 0;
   if (HasFP) {
+    // Offset from the CFA to the saved frame pointer, will be negative.
     int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
+    LLVM_DEBUG(dbgs() << "FramePtrSpillFI: " << FramePtrSpillFI
+                      << ", FPOffset: " << FPOffset << "\n");
     assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
            "Max FP estimation is wrong");
-    FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
     AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
                                 NumBytes);
   }
@@ -923,10 +1005,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
   AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
 
-  // Move past area 2.
-  if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) {
+  // Move GPRCS2, unless using SplitR11WindowsSEH, in which case it will be
+  // after DPRCS1.
+  if (GPRCS2Size > 0 && PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
     GPRCS2Push = LastPush = MBBI++;
-    DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
+    DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush);
+    if (FramePtrSpillArea == SpillArea::GPRCS2)
+      BeforeFPPush = false;
   }
 
   // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
@@ -939,16 +1024,16 @@ void ARMFrameLowering::e...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/110286


More information about the llvm-commits mailing list