[llvm] [ARM] R11 not pushed adjacent to link register with PAC-M and AAPCS frame chain fix (PR #82801)

James Westwood via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 4 01:51:35 PST 2024


https://github.com/jwestwood921 updated https://github.com/llvm/llvm-project/pull/82801

>From d77e7ac07f9ba74a8604ba82bed50a1610f3f156 Mon Sep 17 00:00:00 2001
From: James Westwood <james.westwood at arm.com>
Date: Fri, 23 Feb 2024 16:40:27 +0000
Subject: [PATCH 1/3] R11 not pushed adjacent to lr with PAC-M and AAPCS frame
 chain fix

When code for M class architecture was compiled with AAPCS and PAC enabled, the frame pointer, r11, was not pushed to the stack adjacent
to the link register. Due to PAC being enabled, r12 was placed between r11 and lr. This patch fixes this by adding an extra case to the
already existing code that splits the GPR push in two when R11 is the frame pointer and certain paremeters are met. The differential
revision for this previous change can be found here: https://reviews.llvm.org/D125649. This now ensures that r11 and lr are pushed in a
separate push instruction to the other GPRs when PAC and AAPCS are enabled, meaning the frame pointer and link register are now pushed
onto the stack adjacent to each other.
---
 llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp   |   4 +-
 llvm/lib/Target/ARM/ARMFrameLowering.cpp      | 186 ++++++++++++------
 llvm/lib/Target/ARM/ARMSubtarget.cpp          |  38 +++-
 llvm/lib/Target/ARM/ARMSubtarget.h            |  22 ++-
 llvm/lib/Target/ARM/Thumb1FrameLowering.cpp   |   4 +-
 .../CodeGen/Thumb2/pacbti-m-frame-chain.ll    |  82 ++++++++
 6 files changed, 263 insertions(+), 73 deletions(-)
 create mode 100644 llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll

diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 9adf758b46c481..42e2b89260e16a 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -62,14 +62,14 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo()
 const MCPhysReg*
 ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>();
-  bool UseSplitPush = STI.splitFramePushPop(*MF);
+  bool UseSplitPush = STI.splitFramePushPopR7(*MF);
   const Function &F = MF->getFunction();
 
   if (F.getCallingConv() == CallingConv::GHC) {
     // GHC set of callee saved regs is empty as all those regs are
     // used for passing STG regs around
     return CSR_NoRegs_SaveList;
-  } else if (STI.splitFramePointerPush(*MF)) {
+  } else if (STI.framePointerRequiredForSEHUnwind(*MF)) {
     return CSR_Win_SplitFP_SaveList;
   } else if (F.getCallingConv() == CallingConv::CFGuard_Check) {
     return CSR_Win_AAPCS_CFGuard_Check_SaveList;
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index eeb7f64aa5810e..f288e8be7d3816 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -718,9 +718,13 @@ static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
   // This is a conservative estimation: Assume the frame pointer being r7 and
   // pc("r15") up to r8 getting spilled before (= 8 registers).
   int MaxRegBytes = 8 * 4;
-  if (STI.splitFramePointerPush(MF)) {
+  if (STI.r11AndLRNotAdjacent(MF) &&
+      STI.getRegisterInfo()->getFrameRegister(MF) == ARM::R11)
     // Here, r11 can be stored below all of r4-r15 (3 registers more than
-    // above), plus d8-d15.
+    // above).
+    MaxRegBytes = 11 * 4;
+  if (STI.framePointerRequiredForSEHUnwind(MF)) {
+    // Here, r11 can be stored below all of r4-r15 plus d8-d15.
     MaxRegBytes = 11 * 4 + 8 * 8;
   }
   int FPCXTSaveSize =
@@ -788,7 +792,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   }
 
   // Determine spill area sizes.
-  if (STI.splitFramePointerPush(MF)) {
+  if (STI.splitFramePushPopR11(MF)) {
     for (const CalleeSavedInfo &I : CSI) {
       Register Reg = I.getReg();
       int FI = I.getFrameIdx();
@@ -834,7 +838,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
       case ARM::R10:
       case ARM::R11:
       case ARM::R12:
-        if (STI.splitFramePushPop(MF)) {
+        if (STI.splitFramePushPopR7(MF)) {
           GPRCS2Size += 4;
           break;
         }
@@ -897,13 +901,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
   Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);
   unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
-  if (!STI.splitFramePointerPush(MF)) {
+  if (!STI.framePointerRequiredForSEHUnwind(MF)) {
     DPRGapSize += GPRCS2Size;
   }
   DPRGapSize %= DPRAlign.value();
 
   unsigned DPRCSOffset;
-  if (STI.splitFramePointerPush(MF)) {
+  if (STI.framePointerRequiredForSEHUnwind(MF)) {
     DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
     GPRCS2Offset = DPRCSOffset - GPRCS2Size;
   } else {
@@ -922,8 +926,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
   AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
 
-  // Move past area 2.
-  if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) {
+  // Move past area 2, unless following Win_AAPCS_CFGuard calling convention.
+  if (GPRCS2Size > 0 && !STI.framePointerRequiredForSEHUnwind(MF)) {
     GPRCS2Push = LastPush = MBBI++;
     DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
   }
@@ -963,13 +967,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   } else
     NumBytes = DPRCSOffset;
 
-  if (GPRCS2Size > 0 && STI.splitFramePointerPush(MF)) {
+  // Move past area 2 if following Win_AAPCS_CFGuard calling convention.
+  if (GPRCS2Size > 0 && STI.framePointerRequiredForSEHUnwind(MF)) {
     GPRCS2Push = LastPush = MBBI++;
     DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
   }
 
   bool NeedsWinCFIStackAlloc = NeedsWinCFI;
-  if (STI.splitFramePointerPush(MF) && HasFP)
+  if (STI.framePointerRequiredForSEHUnwind(MF) && HasFP)
     NeedsWinCFIStackAlloc = false;
 
   if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
@@ -1074,7 +1079,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
     AfterPush = std::next(GPRCS1Push);
     unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
     int FPOffset = PushSize + FramePtrOffsetInPush;
-    if (STI.splitFramePointerPush(MF)) {
+    if (STI.splitFramePushPopR11(MF)) {
       AfterPush = std::next(GPRCS2Push);
       emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
                            FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);
@@ -1106,7 +1111,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   // instructions below don't need to be replayed to unwind the stack.
   if (NeedsWinCFI && MBBI != MBB.begin()) {
     MachineBasicBlock::iterator End = MBBI;
-    if (HasFP && STI.splitFramePointerPush(MF))
+    if (HasFP && STI.framePointerRequiredForSEHUnwind(MF))
       End = AfterPush;
     insertSEHRange(MBB, {}, End, TII, MachineInstr::FrameSetup);
     BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))
@@ -1118,61 +1123,114 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   // the necessary DWARF cf instructions to describe the situation. Start by
   // recording where each register ended up:
   if (GPRCS1Size > 0 && !NeedsWinCFI) {
-    MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
-    int CFIIndex;
-    for (const auto &Entry : CSI) {
-      Register Reg = Entry.getReg();
-      int FI = Entry.getFrameIdx();
-      switch (Reg) {
-      case ARM::R8:
-      case ARM::R9:
-      case ARM::R10:
-      case ARM::R11:
-      case ARM::R12:
-        if (STI.splitFramePushPop(MF))
+    if (STI.r11AndLRNotAdjacent(MF) &&
+        RegInfo->getFrameRegister(MF) == ARM::R11) {
+      MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
+      int CFIIndex;
+      for (const auto &Entry : CSI) {
+        Register Reg = Entry.getReg();
+        int FI = Entry.getFrameIdx();
+        switch (Reg) {
+        case ARM::R0:
+        case ARM::R1:
+        case ARM::R2:
+        case ARM::R3:
+        case ARM::R4:
+        case ARM::R5:
+        case ARM::R6:
+        case ARM::R7:
+        case ARM::R8:
+        case ARM::R9:
+        case ARM::R10:
+        case ARM::R12:
+          CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+              nullptr, MRI->getDwarfRegNum(Reg, true),
+              MFI.getObjectOffset(FI)));
+          BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+              .addCFIIndex(CFIIndex)
+              .setMIFlags(MachineInstr::FrameSetup);
           break;
-        [[fallthrough]];
-      case ARM::R0:
-      case ARM::R1:
-      case ARM::R2:
-      case ARM::R3:
-      case ARM::R4:
-      case ARM::R5:
-      case ARM::R6:
-      case ARM::R7:
-      case ARM::LR:
-        CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
-            nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
-        BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
-            .addCFIIndex(CFIIndex)
-            .setMIFlags(MachineInstr::FrameSetup);
-        break;
+        }
+      }
+    } else {
+      MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
+      int CFIIndex;
+      for (const auto &Entry : CSI) {
+        Register Reg = Entry.getReg();
+        int FI = Entry.getFrameIdx();
+        switch (Reg) {
+        case ARM::R8:
+        case ARM::R9:
+        case ARM::R10:
+        case ARM::R11:
+        case ARM::R12:
+          if (STI.splitFramePushPopR7(MF))
+            break;
+          [[fallthrough]];
+        case ARM::R0:
+        case ARM::R1:
+        case ARM::R2:
+        case ARM::R3:
+        case ARM::R4:
+        case ARM::R5:
+        case ARM::R6:
+        case ARM::R7:
+        case ARM::LR:
+          CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+              nullptr, MRI->getDwarfRegNum(Reg, true),
+              MFI.getObjectOffset(FI)));
+          BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+              .addCFIIndex(CFIIndex)
+              .setMIFlags(MachineInstr::FrameSetup);
+          break;
+        }
       }
     }
   }
 
   if (GPRCS2Size > 0 && !NeedsWinCFI) {
     MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
-    for (const auto &Entry : CSI) {
-      Register Reg = Entry.getReg();
-      int FI = Entry.getFrameIdx();
-      switch (Reg) {
-      case ARM::R8:
-      case ARM::R9:
-      case ARM::R10:
-      case ARM::R11:
-      case ARM::R12:
-        if (STI.splitFramePushPop(MF)) {
-          unsigned DwarfReg = MRI->getDwarfRegNum(
-              Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true);
+    if (STI.r11AndLRNotAdjacent(MF) &&
+        RegInfo->getFrameRegister(MF) == ARM::R11) {
+      for (const auto &Entry : CSI) {
+        Register Reg = Entry.getReg();
+        int FI = Entry.getFrameIdx();
+        switch (Reg) {
+        case ARM::R11:
+        case ARM::LR:
+          unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
           unsigned Offset = MFI.getObjectOffset(FI);
           unsigned CFIIndex = MF.addFrameInst(
               MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
           BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
               .addCFIIndex(CFIIndex)
               .setMIFlags(MachineInstr::FrameSetup);
+          break;
+        }
+      }
+    } else {
+      MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
+      for (const auto &Entry : CSI) {
+        Register Reg = Entry.getReg();
+        int FI = Entry.getFrameIdx();
+        switch (Reg) {
+        case ARM::R8:
+        case ARM::R9:
+        case ARM::R10:
+        case ARM::R11:
+        case ARM::R12:
+          if (STI.splitFramePushPopR7(MF)) {
+            unsigned DwarfReg = MRI->getDwarfRegNum(
+                Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true);
+            unsigned Offset = MFI.getObjectOffset(FI);
+            unsigned CFIIndex = MF.addFrameInst(
+                MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
+            BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+                .addCFIIndex(CFIIndex)
+                .setMIFlags(MachineInstr::FrameSetup);
+          }
+          break;
         }
-        break;
       }
     }
   }
@@ -1382,7 +1440,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
                    MachineInstr::FrameDestroy);
 
     // Increment past our save areas.
-    if (AFI->getGPRCalleeSavedArea2Size() && STI.splitFramePointerPush(MF))
+    if (AFI->getGPRCalleeSavedArea2Size() &&
+        STI.framePointerRequiredForSEHUnwind(MF))
       MBBI++;
 
     if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
@@ -1399,7 +1458,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
                    MachineInstr::FrameDestroy);
     }
 
-    if (AFI->getGPRCalleeSavedArea2Size() && !STI.splitFramePointerPush(MF))
+    if (AFI->getGPRCalleeSavedArea2Size() &&
+        !STI.framePointerRequiredForSEHUnwind(MF))
       MBBI++;
     if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
 
@@ -1539,7 +1599,8 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
     unsigned LastReg = 0;
     for (; i != 0; --i) {
       Register Reg = CSI[i-1].getReg();
-      if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
+      if (!(Func)(Reg, STI.splitFramePushPopR7(MF)))
+        continue;
 
       // D-registers in the aligned area DPRCS2 are NOT spilled here.
       if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
@@ -1632,7 +1693,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
     for (; i != 0; --i) {
       CalleeSavedInfo &Info = CSI[i-1];
       Register Reg = Info.getReg();
-      if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
+      if (!(Func)(Reg, STI.splitFramePushPopR7(MF)))
+        continue;
 
       // The aligned reloads from area DPRCS2 are not inserted here.
       if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
@@ -1640,7 +1702,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
       if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
           !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
           STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
-          !STI.splitFramePointerPush(MF)) {
+          !STI.splitFramePushPopR11(MF)) {
         Reg = ARM::PC;
         // Fold the return instruction into the LDM.
         DeleteRet = true;
@@ -2001,7 +2063,7 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(
         .addImm(-4)
         .add(predOps(ARMCC::AL));
   }
-  if (STI.splitFramePointerPush(MF)) {
+  if (STI.splitFramePushPopR11(MF)) {
     emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
                  &isSplitFPArea1Register, 0, MachineInstr::FrameSetup);
     emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
@@ -2046,7 +2108,7 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(
   unsigned LdrOpc =
       AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
   unsigned FltOpc = ARM::VLDMDIA_UPD;
-  if (STI.splitFramePointerPush(MF)) {
+  if (STI.splitFramePushPopR11(MF)) {
     emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
                 &isSplitFPArea2Register, 0);
     emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
@@ -2362,7 +2424,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
     if (Spilled) {
       NumGPRSpills++;
 
-      if (!STI.splitFramePushPop(MF)) {
+      if (!STI.splitFramePushPopR7(MF)) {
         if (Reg == ARM::LR)
           LRSpilled = true;
         CS1Spilled = true;
@@ -2384,7 +2446,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
         break;
       }
     } else {
-      if (!STI.splitFramePushPop(MF)) {
+      if (!STI.splitFramePushPopR7(MF)) {
         UnspilledCS1GPRs.push_back(Reg);
         continue;
       }
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 922fa93226f298..cc86afb835d0c4 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -494,11 +494,47 @@ bool ARMSubtarget::ignoreCSRForAllocationOrder(const MachineFunction &MF,
          ARM::GPRRegClass.contains(PhysReg);
 }
 
-bool ARMSubtarget::splitFramePointerPush(const MachineFunction &MF) const {
+bool ARMSubtarget::r11AndLRNotAdjacent(const MachineFunction &MF) const {
+  const std::vector<CalleeSavedInfo> CSI =
+      MF.getFrameInfo().getCalleeSavedInfo();
+
+  if (CSI.size() > 1 &&
+      MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress()) {
+
+    bool r11InCSI = false;
+    bool lrInCSI = false;
+    unsigned long r11Idx = 0;
+    unsigned long lrIdx = 0;
+
+    for (unsigned long i = 0; i < CSI.size(); i++) {
+      if (CSI[i].getReg() == ARM::LR) {
+        lrIdx = i;
+        lrInCSI = true;
+      } else if (CSI[i].getReg() == ARM::R11) {
+        r11Idx = i;
+        r11InCSI = true;
+      }
+    }
+    if (lrIdx + 1 != r11Idx && r11InCSI && lrInCSI)
+      return true;
+  }
+  return false;
+}
+
+bool ARMSubtarget::framePointerRequiredForSEHUnwind(
+    const MachineFunction &MF) const {
   const Function &F = MF.getFunction();
+  const std::vector<CalleeSavedInfo> CSI =
+      MF.getFrameInfo().getCalleeSavedInfo();
+
   if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI() ||
       !F.needsUnwindTableEntry())
     return false;
   const MachineFrameInfo &MFI = MF.getFrameInfo();
   return MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF);
 }
+
+bool ARMSubtarget::splitFramePushPopR11(const MachineFunction &MF) const {
+  return (r11AndLRNotAdjacent(MF) && getFramePointerReg() == ARM::R11) ||
+         framePointerRequiredForSEHUnwind(MF);
+}
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 91f3978b041a3a..210d8fdb1440ee 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -441,18 +441,28 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
   }
 
   /// Returns true if the frame setup is split into two separate pushes (first
-  /// r0-r7,lr then r8-r11), principally so that the frame pointer is adjacent
-  /// to lr. This is always required on Thumb1-only targets, as the push and
-  /// pop instructions can't access the high registers.
-  bool splitFramePushPop(const MachineFunction &MF) const {
-    if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress())
+  /// r0-r7,lr then r8-r11), principally so that the frame pointer r7 is
+  /// adjacent to lr. This is always required on Thumb1-only targets, as the
+  /// push and pop instructions can't access the high registers.
+  bool splitFramePushPopR7(const MachineFunction &MF) const {
+    if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
+        !createAAPCSFrameChain())
       return true;
     return (getFramePointerReg() == ARM::R7 &&
             MF.getTarget().Options.DisableFramePointerElim(MF)) ||
            isThumb1Only();
   }
 
-  bool splitFramePointerPush(const MachineFunction &MF) const;
+  bool framePointerRequiredForSEHUnwind(const MachineFunction &MF) const;
+
+  // Returns true if R11 and lr are not adjacent to each other in the list of
+  // callee saved registers in a frame.
+  bool r11AndLRNotAdjacent(const MachineFunction &MF) const;
+
+  // Returns true if the frame setup is split into two separate pushes (first
+  // r0-r10,r12 then r11,lr), principally so that the frame pointer r11 is
+  // adjacent to lr.
+  bool splitFramePushPopR11(const MachineFunction &MF) const;
 
   bool useStride4VFPs() const;
 
diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index 0f4ece64bff532..0ad7b96453d1c5 100644
--- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -222,7 +222,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
     case ARM::R8:
     case ARM::R9:
     case ARM::R10:
-      if (STI.splitFramePushPop(MF)) {
+      if (STI.splitFramePushPopR7(MF)) {
         GPRCS2Size += 4;
         break;
       }
@@ -366,7 +366,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
       case ARM::R10:
       case ARM::R11:
       case ARM::R12:
-        if (STI.splitFramePushPop(MF))
+        if (STI.splitFramePushPopR7(MF))
           break;
         [[fallthrough]];
       case ARM::R0:
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll
new file mode 100644
index 00000000000000..77759355e576a2
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll
@@ -0,0 +1,82 @@
+; RUN: llc -filetype asm -o - %s --frame-pointer=all -mattr=+aapcs-frame-chain -mattr=+aapcs-frame-chain-leaf -force-dwarf-frame-section | FileCheck %s
+target triple = "thumbv8m.main-none-none-eabi"
+
+; int f() {
+;     return 0;
+; }
+;
+; int x(int, char *);
+; int y(int n) {
+; char a[n];
+; return 1 + x(n, a);
+; }
+
+define hidden i32 @f() local_unnamed_addr {
+entry:
+    ret i32 0;
+}
+
+define hidden i32 @x(i32 noundef %n) local_unnamed_addr {
+entry:
+  %vla = alloca i8, i32 %n, align 1
+  %call = call i32 @y(i32 noundef %n, ptr noundef nonnull %vla)
+  %add = add nsw i32 %call, 1
+  ret i32 %add
+}
+
+declare dso_local i32 @y(i32 noundef, ptr noundef) local_unnamed_addr
+
+; CHECK-LABEL: f:
+; CHECK:       pac     r12, lr, sp
+; CHECK-NEXT:  .save   {ra_auth_code}
+; CHECK-NEXT:  str     r12, [sp, #-4]!
+; CHECK-NEXT:  .cfi_def_cfa_offset 4
+; CHECK-NEXT:  .cfi_offset r12, -8
+; CHECK-NEXT:  .save   {r11, lr}
+; CHECK-NEXT:  push.w  {r11, lr}
+; CHECK-NEXT:  .cfi_offset lr, -4
+; CHECK-NEXT:  .cfi_offset r11, -12
+; CHECK-NEXT:  .setfp  r11, sp
+; CHECK-NEXT:  mov     r11, sp
+; CHECK-NEXT:  .cfi_def_cfa r11, 12
+; CHECK-NEXT:  movs    r0, #0
+; CHECK-NEXT:  pop.w   {r11, lr}
+; CHECK-NEXT:  ldr     r12, [sp], #4
+; CHECK-NEXT:  aut     r12, lr, sp
+; CHECK-NEXT:  bx      lr
+
+; CHECK-LABEL: x:
+; CHECK:       pac     r12, lr, sp
+; CHECK-NEXT:  .save   {r4, r7, ra_auth_code}
+; CHECK-NEXT:  push.w  {r4, r7, r12}
+; CHECK-NEXT:  .cfi_def_cfa_offset 12
+; CHECK-NEXT:  .cfi_offset r12, -8
+; CHECK-NEXT:  .cfi_offset r7, -16
+; CHECK-NEXT:  .cfi_offset r4, -20
+; CHECK-NEXT:  .save   {r11, lr}
+; CHECK-NEXT:  push.w  {r11, lr}
+; CHECK-NEXT:  .cfi_offset lr, -4
+; CHECK-NEXT:  .cfi_offset r11, -12
+; CHECK-NEXT:  .setfp  r11, sp
+; CHECK-NEXT:  mov     r11, sp
+; CHECK-NEXT:  .cfi_def_cfa_register r11
+; CHECK-NEXT:  .pad    #4
+; CHECK-NEXT:  sub     sp, #4
+; CHECK-NEXT:  adds    r1, r0, #7
+; CHECK-NEXT:  bic     r1, r1, #7
+; CHECK-NEXT:  sub.w   r1, sp, r1
+; CHECK-NEXT:  mov     sp, r1
+; CHECK-NEXT:  bl      y
+; CHECK-NEXT:  sub.w   r4, r11, #8
+; CHECK-NEXT:  adds    r0, #1
+; CHECK-NEXT:  mov     sp, r4
+; CHECK-NEXT:  pop.w   {r11, lr}
+; CHECK-NEXT:  pop.w   {r4, r7, r12}
+; CHECK-NEXT:  aut     r12, lr, sp
+; CHECK-NEXT:  bx      lr
+
+!llvm.module.flags = !{!0, !1, !2}
+
+!0 = !{i32 8, !"sign-return-address", i32 1}
+!1 = !{i32 8, !"sign-return-address-all", i32 0}
+!2 = !{i32 8, !"branch-target-enforcement", i32 0}

>From b0438c97ba87797f967c9c85e82c25bb54d06015 Mon Sep 17 00:00:00 2001
From: James Westwood <james.westwood at arm.com>
Date: Thu, 29 Feb 2024 16:37:55 +0000
Subject: [PATCH 2/3] Replaced individual boolean methods to determine how the
 register pushes should be split with a single enum.

---
 llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp |  6 +-
 llvm/lib/Target/ARM/ARMFrameLowering.cpp    | 90 ++++++++++++++-------
 llvm/lib/Target/ARM/ARMSubtarget.cpp        | 56 +++++++------
 llvm/lib/Target/ARM/ARMSubtarget.h          | 42 +++++-----
 llvm/lib/Target/ARM/Thumb1FrameLowering.cpp |  6 +-
 5 files changed, 122 insertions(+), 78 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 42e2b89260e16a..4971281ec2388b 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -62,14 +62,16 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo()
 const MCPhysReg*
 ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>();
-  bool UseSplitPush = STI.splitFramePushPopR7(*MF);
+  bool UseSplitPush = (STI.getPushPopSplitVariation(*MF) ==
+                       ARMSubtarget::PushPopSplitVariation::R7Split);
   const Function &F = MF->getFunction();
 
   if (F.getCallingConv() == CallingConv::GHC) {
     // GHC set of callee saved regs is empty as all those regs are
     // used for passing STG regs around
     return CSR_NoRegs_SaveList;
-  } else if (STI.framePointerRequiredForSEHUnwind(*MF)) {
+  } else if (STI.getPushPopSplitVariation(*MF) ==
+             ARMSubtarget::PushPopSplitVariation::R11SplitWindowsSEHUnwind) {
     return CSR_Win_SplitFP_SaveList;
   } else if (F.getCallingConv() == CallingConv::CFGuard_Check) {
     return CSR_Win_AAPCS_CFGuard_Check_SaveList;
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index f288e8be7d3816..d7fc3c7ba4cbf6 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -718,12 +718,13 @@ static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
   // This is a conservative estimation: Assume the frame pointer being r7 and
   // pc("r15") up to r8 getting spilled before (= 8 registers).
   int MaxRegBytes = 8 * 4;
-  if (STI.r11AndLRNotAdjacent(MF) &&
-      STI.getRegisterInfo()->getFrameRegister(MF) == ARM::R11)
+  if (STI.getPushPopSplitVariation(MF) ==
+      ARMSubtarget::PushPopSplitVariation::R11SplitAAPCSBranchSigning)
     // Here, r11 can be stored below all of r4-r15 (3 registers more than
     // above).
     MaxRegBytes = 11 * 4;
-  if (STI.framePointerRequiredForSEHUnwind(MF)) {
+  if (STI.getPushPopSplitVariation(MF) ==
+      ARMSubtarget::PushPopSplitVariation::R11SplitWindowsSEHUnwind) {
     // Here, r11 can be stored below all of r4-r15 plus d8-d15.
     MaxRegBytes = 11 * 4 + 8 * 8;
   }
@@ -792,7 +793,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   }
 
   // Determine spill area sizes.
-  if (STI.splitFramePushPopR11(MF)) {
+  if (STI.getPushPopSplitVariation(MF) ==
+          ARMSubtarget::PushPopSplitVariation::R11SplitAAPCSBranchSigning ||
+      STI.getPushPopSplitVariation(MF) ==
+          ARMSubtarget::PushPopSplitVariation::R11SplitWindowsSEHUnwind) {
     for (const CalleeSavedInfo &I : CSI) {
       Register Reg = I.getReg();
       int FI = I.getFrameIdx();
@@ -838,7 +842,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
       case ARM::R10:
       case ARM::R11:
       case ARM::R12:
-        if (STI.splitFramePushPopR7(MF)) {
+        if (STI.getPushPopSplitVariation(MF) ==
+            ARMSubtarget::PushPopSplitVariation::R7Split) {
           GPRCS2Size += 4;
           break;
         }
@@ -901,13 +906,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
   Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);
   unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
-  if (!STI.framePointerRequiredForSEHUnwind(MF)) {
+  if (STI.getPushPopSplitVariation(MF) !=
+      ARMSubtarget::PushPopSplitVariation::R11SplitWindowsSEHUnwind) {
     DPRGapSize += GPRCS2Size;
   }
   DPRGapSize %= DPRAlign.value();
 
   unsigned DPRCSOffset;
-  if (STI.framePointerRequiredForSEHUnwind(MF)) {
+  if (STI.getPushPopSplitVariation(MF) ==
+      ARMSubtarget::PushPopSplitVariation::R11SplitWindowsSEHUnwind) {
     DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
     GPRCS2Offset = DPRCSOffset - GPRCS2Size;
   } else {
@@ -927,7 +934,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
 
   // Move past area 2, unless following Win_AAPCS_CFGuard calling convention.
-  if (GPRCS2Size > 0 && !STI.framePointerRequiredForSEHUnwind(MF)) {
+  if (GPRCS2Size > 0 &&
+      STI.getPushPopSplitVariation(MF) !=
+          ARMSubtarget::PushPopSplitVariation::R11SplitWindowsSEHUnwind) {
     GPRCS2Push = LastPush = MBBI++;
     DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
   }
@@ -968,13 +977,17 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
     NumBytes = DPRCSOffset;
 
   // Move past area 2 if following Win_AAPCS_CFGuard calling convention.
-  if (GPRCS2Size > 0 && STI.framePointerRequiredForSEHUnwind(MF)) {
+  if (GPRCS2Size > 0 &&
+      STI.getPushPopSplitVariation(MF) ==
+          ARMSubtarget::PushPopSplitVariation::R11SplitWindowsSEHUnwind) {
     GPRCS2Push = LastPush = MBBI++;
     DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
   }
 
   bool NeedsWinCFIStackAlloc = NeedsWinCFI;
-  if (STI.framePointerRequiredForSEHUnwind(MF) && HasFP)
+  if (STI.getPushPopSplitVariation(MF) ==
+          ARMSubtarget::PushPopSplitVariation::R11SplitWindowsSEHUnwind &&
+      HasFP)
     NeedsWinCFIStackAlloc = false;
 
   if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
@@ -1079,7 +1092,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
     AfterPush = std::next(GPRCS1Push);
     unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
     int FPOffset = PushSize + FramePtrOffsetInPush;
-    if (STI.splitFramePushPopR11(MF)) {
+    if (STI.getPushPopSplitVariation(MF) ==
+            ARMSubtarget::PushPopSplitVariation::R11SplitAAPCSBranchSigning ||
+        STI.getPushPopSplitVariation(MF) ==
+            ARMSubtarget::PushPopSplitVariation::R11SplitWindowsSEHUnwind) {
       AfterPush = std::next(GPRCS2Push);
       emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
                            FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);
@@ -1111,7 +1127,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   // instructions below don't need to be replayed to unwind the stack.
   if (NeedsWinCFI && MBBI != MBB.begin()) {
     MachineBasicBlock::iterator End = MBBI;
-    if (HasFP && STI.framePointerRequiredForSEHUnwind(MF))
+    if (HasFP &&
+        STI.getPushPopSplitVariation(MF) ==
+            ARMSubtarget::PushPopSplitVariation::R11SplitWindowsSEHUnwind)
       End = AfterPush;
     insertSEHRange(MBB, {}, End, TII, MachineInstr::FrameSetup);
     BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))
@@ -1123,8 +1141,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   // the necessary DWARF cf instructions to describe the situation. Start by
   // recording where each register ended up:
   if (GPRCS1Size > 0 && !NeedsWinCFI) {
-    if (STI.r11AndLRNotAdjacent(MF) &&
-        RegInfo->getFrameRegister(MF) == ARM::R11) {
+    if (STI.getPushPopSplitVariation(MF) ==
+        ARMSubtarget::PushPopSplitVariation::R11SplitAAPCSBranchSigning) {
       MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
       int CFIIndex;
       for (const auto &Entry : CSI) {
@@ -1164,7 +1182,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
         case ARM::R10:
         case ARM::R11:
         case ARM::R12:
-          if (STI.splitFramePushPopR7(MF))
+          if (STI.getPushPopSplitVariation(MF) ==
+              ARMSubtarget::PushPopSplitVariation::R7Split)
             break;
           [[fallthrough]];
         case ARM::R0:
@@ -1190,8 +1209,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
 
   if (GPRCS2Size > 0 && !NeedsWinCFI) {
     MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
-    if (STI.r11AndLRNotAdjacent(MF) &&
-        RegInfo->getFrameRegister(MF) == ARM::R11) {
+    if (STI.getPushPopSplitVariation(MF) ==
+        ARMSubtarget::PushPopSplitVariation::R11SplitAAPCSBranchSigning) {
       for (const auto &Entry : CSI) {
         Register Reg = Entry.getReg();
         int FI = Entry.getFrameIdx();
@@ -1219,7 +1238,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
         case ARM::R10:
         case ARM::R11:
         case ARM::R12:
-          if (STI.splitFramePushPopR7(MF)) {
+          if (STI.getPushPopSplitVariation(MF) ==
+              ARMSubtarget::PushPopSplitVariation::R7Split) {
             unsigned DwarfReg = MRI->getDwarfRegNum(
                 Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true);
             unsigned Offset = MFI.getObjectOffset(FI);
@@ -1441,7 +1461,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
 
     // Increment past our save areas.
     if (AFI->getGPRCalleeSavedArea2Size() &&
-        STI.framePointerRequiredForSEHUnwind(MF))
+        STI.getPushPopSplitVariation(MF) ==
+            ARMSubtarget::PushPopSplitVariation::R11SplitWindowsSEHUnwind)
       MBBI++;
 
     if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
@@ -1459,7 +1480,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
     }
 
     if (AFI->getGPRCalleeSavedArea2Size() &&
-        !STI.framePointerRequiredForSEHUnwind(MF))
+        STI.getPushPopSplitVariation(MF) !=
+            ARMSubtarget::PushPopSplitVariation::R11SplitWindowsSEHUnwind)
       MBBI++;
     if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
 
@@ -1599,7 +1621,8 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
     unsigned LastReg = 0;
     for (; i != 0; --i) {
       Register Reg = CSI[i-1].getReg();
-      if (!(Func)(Reg, STI.splitFramePushPopR7(MF)))
+      if (!(Func)(Reg, STI.getPushPopSplitVariation(MF) ==
+                           ARMSubtarget::PushPopSplitVariation::R7Split))
         continue;
 
       // D-registers in the aligned area DPRCS2 are NOT spilled here.
@@ -1693,7 +1716,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
     for (; i != 0; --i) {
       CalleeSavedInfo &Info = CSI[i-1];
       Register Reg = Info.getReg();
-      if (!(Func)(Reg, STI.splitFramePushPopR7(MF)))
+      if (!(Func)(Reg, STI.getPushPopSplitVariation(MF) ==
+                           ARMSubtarget::PushPopSplitVariation::R7Split))
         continue;
 
       // The aligned reloads from area DPRCS2 are not inserted here.
@@ -1702,7 +1726,11 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
       if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
           !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
           STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
-          !STI.splitFramePushPopR11(MF)) {
+          (STI.getPushPopSplitVariation(MF) !=
+               ARMSubtarget::PushPopSplitVariation::
+                   R11SplitAAPCSBranchSigning &&
+           STI.getPushPopSplitVariation(MF) !=
+               ARMSubtarget::PushPopSplitVariation::R11SplitWindowsSEHUnwind)) {
         Reg = ARM::PC;
         // Fold the return instruction into the LDM.
         DeleteRet = true;
@@ -2063,7 +2091,10 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(
         .addImm(-4)
         .add(predOps(ARMCC::AL));
   }
-  if (STI.splitFramePushPopR11(MF)) {
+  if (STI.getPushPopSplitVariation(MF) ==
+          ARMSubtarget::PushPopSplitVariation::R11SplitAAPCSBranchSigning ||
+      STI.getPushPopSplitVariation(MF) ==
+          ARMSubtarget::PushPopSplitVariation::R11SplitWindowsSEHUnwind) {
     emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
                  &isSplitFPArea1Register, 0, MachineInstr::FrameSetup);
     emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
@@ -2108,7 +2139,10 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(
   unsigned LdrOpc =
       AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
   unsigned FltOpc = ARM::VLDMDIA_UPD;
-  if (STI.splitFramePushPopR11(MF)) {
+  if (STI.getPushPopSplitVariation(MF) ==
+          ARMSubtarget::PushPopSplitVariation::R11SplitAAPCSBranchSigning ||
+      STI.getPushPopSplitVariation(MF) ==
+          ARMSubtarget::PushPopSplitVariation::R11SplitWindowsSEHUnwind) {
     emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
                 &isSplitFPArea2Register, 0);
     emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
@@ -2424,7 +2458,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
     if (Spilled) {
       NumGPRSpills++;
 
-      if (!STI.splitFramePushPopR7(MF)) {
+      if (STI.getPushPopSplitVariation(MF) !=
+          ARMSubtarget::PushPopSplitVariation::R7Split) {
         if (Reg == ARM::LR)
           LRSpilled = true;
         CS1Spilled = true;
@@ -2446,7 +2481,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
         break;
       }
     } else {
-      if (!STI.splitFramePushPopR7(MF)) {
+      if (STI.getPushPopSplitVariation(MF) !=
+          ARMSubtarget::PushPopSplitVariation::R7Split) {
         UnspilledCS1GPRs.push_back(Reg);
         continue;
       }
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index cc86afb835d0c4..5e350608d8fab7 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -494,13 +494,39 @@ bool ARMSubtarget::ignoreCSRForAllocationOrder(const MachineFunction &MF,
          ARM::GPRRegClass.contains(PhysReg);
 }
 
-bool ARMSubtarget::r11AndLRNotAdjacent(const MachineFunction &MF) const {
+ARMSubtarget::PushPopSplitVariation
+ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
+  const Function &F = MF.getFunction();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   const std::vector<CalleeSavedInfo> CSI =
       MF.getFrameInfo().getCalleeSavedInfo();
-
+  // Returns R7Split if the frame setup must be split into two separate pushes
+  // of r0-r7,lr and another containing r8-r11 (+r12 if necessary). This is
+  // always required on Thumb1-only targets, as the push and pop instructions
+  // can't access the high registers. This is also required when R7 is the frame
+  // pointer and frame pointer elimiination is disabled, or branch signing is
+  // enabled and AAPCS is disabled.
+  if ((MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
+       !createAAPCSFrameChain()) ||
+      ((getFramePointerReg() == ARM::R7 &&
+        MF.getTarget().Options.DisableFramePointerElim(MF)) ||
+       isThumb1Only()))
+    return R7Split;
+  // Returns R11SplitWindowsSEHUnwind when the stack pointer needs to be
+  // restored from the frame pointer r11 + an offset and Windows CFI is enabled.
+  // This stack unwinding cannot be expressed with SEH unwind opcodes when done
+  // with a single push, making it necessary to split the push into r4-r10, and
+  // another containing r11+lr.
+  if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+      F.needsUnwindTableEntry() &&
+      (MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF)))
+    return R11SplitWindowsSEHUnwind;
+  // Returns R11SplitAAPCSBranchSigning if R11 and lr are not adjacent to each
+  // other in the list of callee saved registers in a frame, and branch
+  // signing is enabled.
   if (CSI.size() > 1 &&
-      MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress()) {
-
+      MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
+      getFramePointerReg() == ARM::R11) {
     bool r11InCSI = false;
     bool lrInCSI = false;
     unsigned long r11Idx = 0;
@@ -516,25 +542,7 @@ bool ARMSubtarget::r11AndLRNotAdjacent(const MachineFunction &MF) const {
       }
     }
     if (lrIdx + 1 != r11Idx && r11InCSI && lrInCSI)
-      return true;
+      return R11SplitAAPCSBranchSigning;
   }
-  return false;
-}
-
-bool ARMSubtarget::framePointerRequiredForSEHUnwind(
-    const MachineFunction &MF) const {
-  const Function &F = MF.getFunction();
-  const std::vector<CalleeSavedInfo> CSI =
-      MF.getFrameInfo().getCalleeSavedInfo();
-
-  if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI() ||
-      !F.needsUnwindTableEntry())
-    return false;
-  const MachineFrameInfo &MFI = MF.getFrameInfo();
-  return MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF);
-}
-
-bool ARMSubtarget::splitFramePushPopR11(const MachineFunction &MF) const {
-  return (r11AndLRNotAdjacent(MF) && getFramePointerReg() == ARM::R11) ||
-         framePointerRequiredForSEHUnwind(MF);
+  return NoSplit;
 }
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 210d8fdb1440ee..3d58994859b6e9 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -150,6 +150,23 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
     SingleIssuePlusExtras,
   };
 
+  /// How the pushing and popping of callee saved registers to and from the
+  /// stack should be split.
+  enum PushPopSplitVariation {
+    /// r4-r11+lr (+r12 if necessary) can be pushed in a single instruction.
+    NoSplit,
+    /// The registers need to be split into a push of r4-r7+lr and another
+    /// containing r8-r11 (+r12 if necessary).
+    R7Split,
+    /// The registers need to be split into a push containing r4-r10+r12 and
+    /// another containing r11 + lr.
+    R11SplitAAPCSBranchSigning,
+    /// The registers need to be split into a push containing r4-r10 and another
+    /// containing r11 + lr. In this case, the floating point registers are
+    /// pushed between these two pushes.
+    R11SplitWindowsSEHUnwind
+  };
+
 protected:
 // Bool members corresponding to the SubtargetFeatures defined in tablegen
 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
@@ -440,29 +457,8 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
     return ARM::R11;
   }
 
-  /// Returns true if the frame setup is split into two separate pushes (first
-  /// r0-r7,lr then r8-r11), principally so that the frame pointer r7 is
-  /// adjacent to lr. This is always required on Thumb1-only targets, as the
-  /// push and pop instructions can't access the high registers.
-  bool splitFramePushPopR7(const MachineFunction &MF) const {
-    if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
-        !createAAPCSFrameChain())
-      return true;
-    return (getFramePointerReg() == ARM::R7 &&
-            MF.getTarget().Options.DisableFramePointerElim(MF)) ||
-           isThumb1Only();
-  }
-
-  bool framePointerRequiredForSEHUnwind(const MachineFunction &MF) const;
-
-  // Returns true if R11 and lr are not adjacent to each other in the list of
-  // callee saved registers in a frame.
-  bool r11AndLRNotAdjacent(const MachineFunction &MF) const;
-
-  // Returns true if the frame setup is split into two separate pushes (first
-  // r0-r10,r12 then r11,lr), principally so that the frame pointer r11 is
-  // adjacent to lr.
-  bool splitFramePushPopR11(const MachineFunction &MF) const;
+  enum PushPopSplitVariation
+  getPushPopSplitVariation(const MachineFunction &MF) const;
 
   bool useStride4VFPs() const;
 
diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index 0ad7b96453d1c5..d4733fb26b0e1a 100644
--- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -222,7 +222,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
     case ARM::R8:
     case ARM::R9:
     case ARM::R10:
-      if (STI.splitFramePushPopR7(MF)) {
+      if (STI.getPushPopSplitVariation(MF) ==
+          ARMSubtarget::PushPopSplitVariation::R7Split) {
         GPRCS2Size += 4;
         break;
       }
@@ -366,7 +367,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
       case ARM::R10:
       case ARM::R11:
       case ARM::R12:
-        if (STI.splitFramePushPopR7(MF))
+        if (STI.getPushPopSplitVariation(MF) ==
+            ARMSubtarget::PushPopSplitVariation::R7Split)
           break;
         [[fallthrough]];
       case ARM::R0:

>From a281e71ec152bbeec084c4e89c9959ac4c9d4511 Mon Sep 17 00:00:00 2001
From: James Westwood <james.westwood at arm.com>
Date: Fri, 1 Mar 2024 15:59:26 +0000
Subject: [PATCH 3/3] Fixes for review comments: removed unnecessary if
 statements and amended comments.

---
 llvm/lib/Target/ARM/ARMFrameLowering.cpp    | 23 +++++++---------
 llvm/lib/Target/ARM/ARMSubtarget.cpp        | 29 +++++----------------
 llvm/lib/Target/ARM/ARMSubtarget.h          |  5 +++-
 llvm/lib/Target/ARM/Thumb1FrameLowering.cpp | 17 ++----------
 4 files changed, 22 insertions(+), 52 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index d7fc3c7ba4cbf6..9cafc2189116c2 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -933,7 +933,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
   AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
 
-  // Move past area 2, unless following Win_AAPCS_CFGuard calling convention.
+  // Move past area 2, unless following the CSR_Win_SplitFP calling convention.
   if (GPRCS2Size > 0 &&
       STI.getPushPopSplitVariation(MF) !=
           ARMSubtarget::PushPopSplitVariation::R11SplitWindowsSEHUnwind) {
@@ -976,7 +976,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   } else
     NumBytes = DPRCSOffset;
 
-  // Move past area 2 if following Win_AAPCS_CFGuard calling convention.
+  // Move past area 2 if following the CSR_Win_SplitFP calling convention.
   if (GPRCS2Size > 0 &&
       STI.getPushPopSplitVariation(MF) ==
           ARMSubtarget::PushPopSplitVariation::R11SplitWindowsSEHUnwind) {
@@ -1238,17 +1238,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
         case ARM::R10:
         case ARM::R11:
         case ARM::R12:
-          if (STI.getPushPopSplitVariation(MF) ==
-              ARMSubtarget::PushPopSplitVariation::R7Split) {
-            unsigned DwarfReg = MRI->getDwarfRegNum(
-                Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true);
-            unsigned Offset = MFI.getObjectOffset(FI);
-            unsigned CFIIndex = MF.addFrameInst(
-                MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
-            BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
-                .addCFIIndex(CFIIndex)
-                .setMIFlags(MachineInstr::FrameSetup);
-          }
+          unsigned DwarfReg = MRI->getDwarfRegNum(
+              Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true);
+          unsigned Offset = MFI.getObjectOffset(FI);
+          unsigned CFIIndex = MF.addFrameInst(
+              MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
+          BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+              .addCFIIndex(CFIIndex)
+              .setMIFlags(MachineInstr::FrameSetup);
           break;
         }
       }
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 5e350608d8fab7..9da0415e244009 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -508,9 +508,9 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
   // enabled and AAPCS is disabled.
   if ((MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
        !createAAPCSFrameChain()) ||
-      ((getFramePointerReg() == ARM::R7 &&
-        MF.getTarget().Options.DisableFramePointerElim(MF)) ||
-       isThumb1Only()))
+      (getFramePointerReg() == ARM::R7 &&
+       MF.getTarget().Options.DisableFramePointerElim(MF)) ||
+      isThumb1Only())
     return R7Split;
   // Returns R11SplitWindowsSEHUnwind when the stack pointer needs to be
   // restored from the frame pointer r11 + an offset and Windows CFI is enabled.
@@ -524,25 +524,8 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
   // Returns R11SplitAAPCSBranchSigning if R11 and lr are not adjacent to each
   // other in the list of callee saved registers in a frame, and branch
   // signing is enabled.
-  if (CSI.size() > 1 &&
-      MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
-      getFramePointerReg() == ARM::R11) {
-    bool r11InCSI = false;
-    bool lrInCSI = false;
-    unsigned long r11Idx = 0;
-    unsigned long lrIdx = 0;
-
-    for (unsigned long i = 0; i < CSI.size(); i++) {
-      if (CSI[i].getReg() == ARM::LR) {
-        lrIdx = i;
-        lrInCSI = true;
-      } else if (CSI[i].getReg() == ARM::R11) {
-        r11Idx = i;
-        r11InCSI = true;
-      }
-    }
-    if (lrIdx + 1 != r11Idx && r11InCSI && lrInCSI)
-      return R11SplitAAPCSBranchSigning;
-  }
+  if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
+      getFramePointerReg() == ARM::R11)
+    return R11SplitAAPCSBranchSigning;
   return NoSplit;
 }
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 3d58994859b6e9..27dfa8e09d7400 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -156,7 +156,10 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
     /// r4-r11+lr (+r12 if necessary) can be pushed in a single instruction.
     NoSplit,
     /// The registers need to be split into a push of r4-r7+lr and another
-    /// containing r8-r11 (+r12 if necessary).
+    /// containing r8-r11 (+r12 if necessary). Due to Thumb1FrameLowering
+    /// having separate handling of AAPCS being enabled, and therefore r11+lr
+    /// needing to be in a separate push, this value is also used as the push
+    /// variation in that case.
     R7Split,
     /// The registers need to be split into a push containing r4-r10+r12 and
     /// another containing r11 + lr.
diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index d4733fb26b0e1a..f1558e64ed3eed 100644
--- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -222,12 +222,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
     case ARM::R8:
     case ARM::R9:
     case ARM::R10:
-      if (STI.getPushPopSplitVariation(MF) ==
-          ARMSubtarget::PushPopSplitVariation::R7Split) {
-        GPRCS2Size += 4;
-        break;
-      }
-      [[fallthrough]];
+      GPRCS2Size += 4;
+      break;
     case ARM::LR:
       if (HasFrameRecordArea) {
         FRSize += 4;
@@ -362,15 +358,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
       Register Reg = I.getReg();
       int FI = I.getFrameIdx();
       switch (Reg) {
-      case ARM::R8:
-      case ARM::R9:
-      case ARM::R10:
-      case ARM::R11:
-      case ARM::R12:
-        if (STI.getPushPopSplitVariation(MF) ==
-            ARMSubtarget::PushPopSplitVariation::R7Split)
-          break;
-        [[fallthrough]];
       case ARM::R0:
       case ARM::R1:
       case ARM::R2:



More information about the llvm-commits mailing list