[llvm] [ARM] Refactor ARMFrameLowering (NFC) (PR #110283)

Oliver Stannard via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 7 03:46:39 PDT 2024


https://github.com/ostannard updated https://github.com/llvm/llvm-project/pull/110283

>From 02f008ae83e16131b58b75f4befef2be4bcedbc4 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Wed, 25 Sep 2024 17:14:18 +0100
Subject: [PATCH 1/9] [ARM] Tidy up stack frame strategy code (NFC)

We have two different ways of splitting the pushes of callee-saved
registers onto the stack, controlled by the confusingly similar names
STI.splitFramePushPop() and STI.splitFramePointerPush(). This removes
those functions and replaces them with a single function which returns
an enum. This is in preparation for adding another value to that enum.

The original work of this patch was done by James Westwood, reviewed as
 #82801 and #81249, with some tidy-ups done by Mark Murray and myself.
---
 llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp | 24 ++++---
 llvm/lib/Target/ARM/ARMFrameLowering.cpp    | 71 +++++++++++++--------
 llvm/lib/Target/ARM/ARMSubtarget.cpp        | 33 ++++++++--
 llvm/lib/Target/ARM/ARMSubtarget.h          | 41 ++++++++----
 llvm/lib/Target/ARM/Thumb1FrameLowering.cpp |  6 +-
 5 files changed, 120 insertions(+), 55 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index c149db3144c7c2..9bb1e9754217a1 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -62,27 +62,30 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo()
 const MCPhysReg*
 ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>();
-  bool UseSplitPush = STI.splitFramePushPop(*MF);
+  ARMSubtarget::PushPopSplitVariation PushPopSplit =
+                                      STI.getPushPopSplitVariation(*MF);
   const Function &F = MF->getFunction();
 
   if (F.getCallingConv() == CallingConv::GHC) {
     // GHC set of callee saved regs is empty as all those regs are
     // used for passing STG regs around
     return CSR_NoRegs_SaveList;
-  } else if (STI.splitFramePointerPush(*MF)) {
+  } else if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
     return CSR_Win_SplitFP_SaveList;
   } else if (F.getCallingConv() == CallingConv::CFGuard_Check) {
     return CSR_Win_AAPCS_CFGuard_Check_SaveList;
   } else if (F.getCallingConv() == CallingConv::SwiftTail) {
-    return STI.isTargetDarwin()
-               ? CSR_iOS_SwiftTail_SaveList
-               : (UseSplitPush ? CSR_ATPCS_SplitPush_SwiftTail_SaveList
-                               : CSR_AAPCS_SwiftTail_SaveList);
+    return STI.isTargetDarwin() ? CSR_iOS_SwiftTail_SaveList
+                                : (PushPopSplit == ARMSubtarget::SplitR7
+                                       ? CSR_ATPCS_SplitPush_SwiftTail_SaveList
+                                       : CSR_AAPCS_SwiftTail_SaveList);
   } else if (F.hasFnAttribute("interrupt")) {
     if (STI.isMClass()) {
       // M-class CPUs have hardware which saves the registers needed to allow a
       // function conforming to the AAPCS to function as a handler.
-      return UseSplitPush ? CSR_ATPCS_SplitPush_SaveList : CSR_AAPCS_SaveList;
+      return PushPopSplit == ARMSubtarget::SplitR7
+                 ? CSR_ATPCS_SplitPush_SaveList
+                 : CSR_AAPCS_SaveList;
     } else if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") {
       // Fast interrupt mode gives the handler a private copy of R8-R14, so less
       // need to be saved to restore user-mode state.
@@ -99,8 +102,9 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     if (STI.isTargetDarwin())
       return CSR_iOS_SwiftError_SaveList;
 
-    return UseSplitPush ? CSR_ATPCS_SplitPush_SwiftError_SaveList :
-      CSR_AAPCS_SwiftError_SaveList;
+    return PushPopSplit == ARMSubtarget::SplitR7
+               ? CSR_ATPCS_SplitPush_SwiftError_SaveList
+               : CSR_AAPCS_SwiftError_SaveList;
   }
 
   if (STI.isTargetDarwin() && F.getCallingConv() == CallingConv::CXX_FAST_TLS)
@@ -111,7 +115,7 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   if (STI.isTargetDarwin())
     return CSR_iOS_SaveList;
 
-  if (UseSplitPush)
+  if (PushPopSplit == ARMSubtarget::SplitR7)
     return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_SaveList
                                        : CSR_ATPCS_SplitPush_SaveList;
 
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 40354f99559896..2df9756b9cd753 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -713,6 +713,8 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
 /// this to produce a conservative estimate that we check in an assert() later.
 static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
                           const MachineFunction &MF) {
+  ARMSubtarget::PushPopSplitVariation PushPopSplit =
+                                      STI.getPushPopSplitVariation(MF);
   // For Thumb1, push.w isn't available, so the first push will always push
   // r7 and lr onto the stack first.
   if (AFI.isThumb1OnlyFunction())
@@ -720,9 +722,8 @@ static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
   // This is a conservative estimation: Assume the frame pointer being r7 and
   // pc("r15") up to r8 getting spilled before (= 8 registers).
   int MaxRegBytes = 8 * 4;
-  if (STI.splitFramePointerPush(MF)) {
-    // Here, r11 can be stored below all of r4-r15 (3 registers more than
-    // above), plus d8-d15.
+  if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
+    // Here, r11 can be stored below all of r4-r15 plus d8-d15.
     MaxRegBytes = 11 * 4 + 8 * 8;
   }
   int FPCXTSaveSize =
@@ -749,6 +750,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
   int FPCXTSaveSize = 0;
   bool NeedsWinCFI = needsWinCFI(MF);
+  ARMSubtarget::PushPopSplitVariation PushPopSplit =
+                                      STI.getPushPopSplitVariation(MF);
 
   // Debug location must be unknown since the first debug location is used
   // to determine the end of the prologue.
@@ -789,7 +792,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   }
 
   // Determine spill area sizes.
-  if (STI.splitFramePointerPush(MF)) {
+  if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
     for (const CalleeSavedInfo &I : CSI) {
       Register Reg = I.getReg();
       int FI = I.getFrameIdx();
@@ -835,7 +838,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
       case ARM::R10:
       case ARM::R11:
       case ARM::R12:
-        if (STI.splitFramePushPop(MF)) {
+        if (PushPopSplit == ARMSubtarget::SplitR7) {
           GPRCS2Size += 4;
           break;
         }
@@ -898,13 +901,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
   Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);
   unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
-  if (!STI.splitFramePointerPush(MF)) {
+  if (PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
     DPRGapSize += GPRCS2Size;
   }
   DPRGapSize %= DPRAlign.value();
 
   unsigned DPRCSOffset;
-  if (STI.splitFramePointerPush(MF)) {
+  if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
     DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
     GPRCS2Offset = DPRCSOffset - GPRCS2Size;
   } else {
@@ -923,8 +926,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
   AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
 
-  // Move past area 2.
-  if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) {
+  // Move GPRCS2, unless using SplitR11WindowsSEH, in which case it will be
+  // after DPRCS1.
+  if (GPRCS2Size > 0 && PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
     GPRCS2Push = LastPush = MBBI++;
     DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
   }
@@ -943,7 +947,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
     }
   }
 
-  // Move past area 3.
+  // Move past DPRCS1.
   if (DPRCSSize > 0) {
     // Since vpush register list cannot have gaps, there may be multiple vpush
     // instructions in the prologue.
@@ -964,13 +968,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   } else
     NumBytes = DPRCSOffset;
 
-  if (GPRCS2Size > 0 && STI.splitFramePointerPush(MF)) {
+  // Move GPRCS2, if using using SplitR11WindowsSEH.
+  if (GPRCS2Size > 0 && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
     GPRCS2Push = LastPush = MBBI++;
     DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
   }
 
   bool NeedsWinCFIStackAlloc = NeedsWinCFI;
-  if (STI.splitFramePointerPush(MF) && HasFP)
+  if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH && HasFP)
     NeedsWinCFIStackAlloc = false;
 
   if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
@@ -1075,7 +1080,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
     AfterPush = std::next(GPRCS1Push);
     unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
     int FPOffset = PushSize + FramePtrOffsetInPush;
-    if (STI.splitFramePointerPush(MF)) {
+    if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
       AfterPush = std::next(GPRCS2Push);
       emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
                            FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);
@@ -1107,7 +1112,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   // instructions below don't need to be replayed to unwind the stack.
   if (NeedsWinCFI && MBBI != MBB.begin()) {
     MachineBasicBlock::iterator End = MBBI;
-    if (HasFP && STI.splitFramePointerPush(MF))
+    if (HasFP && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
       End = AfterPush;
     insertSEHRange(MBB, {}, End, TII, MachineInstr::FrameSetup);
     BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))
@@ -1130,7 +1135,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
       case ARM::R10:
       case ARM::R11:
       case ARM::R12:
-        if (STI.splitFramePushPop(MF))
+        if (PushPopSplit == ARMSubtarget::SplitR7)
           break;
         [[fallthrough]];
       case ARM::R0:
@@ -1163,7 +1168,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
       case ARM::R10:
       case ARM::R11:
       case ARM::R12:
-        if (STI.splitFramePushPop(MF)) {
+        if (PushPopSplit == ARMSubtarget::SplitR7) {
           unsigned DwarfReg = MRI->getDwarfRegNum(
               Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true);
           int64_t Offset = MFI.getObjectOffset(FI);
@@ -1280,6 +1285,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
   assert(!AFI->isThumb1OnlyFunction() &&
          "This emitEpilogue does not support Thumb1!");
   bool isARM = !AFI->isThumbFunction();
+  ARMSubtarget::PushPopSplitVariation PushPopSplit =
+                                      STI.getPushPopSplitVariation(MF);
 
   // Amount of stack space we reserved next to incoming args for either
   // varargs registers or stack arguments in tail calls made by this function.
@@ -1383,7 +1390,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
                    MachineInstr::FrameDestroy);
 
     // Increment past our save areas.
-    if (AFI->getGPRCalleeSavedArea2Size() && STI.splitFramePointerPush(MF))
+    if (AFI->getGPRCalleeSavedArea2Size() &&
+        PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
       MBBI++;
 
     if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
@@ -1400,7 +1408,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
                    MachineInstr::FrameDestroy);
     }
 
-    if (AFI->getGPRCalleeSavedArea2Size() && !STI.splitFramePointerPush(MF))
+    if (AFI->getGPRCalleeSavedArea2Size() &&
+        PushPopSplit != ARMSubtarget::SplitR11WindowsSEH)
       MBBI++;
     if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
 
@@ -1529,6 +1538,8 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
   MachineFunction &MF = *MBB.getParent();
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+  ARMSubtarget::PushPopSplitVariation PushPopSplit =
+                                      STI.getPushPopSplitVariation(MF);
 
   DebugLoc DL;
 
@@ -1540,7 +1551,8 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
     unsigned LastReg = 0;
     for (; i != 0; --i) {
       Register Reg = CSI[i-1].getReg();
-      if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
+      if (!(Func)(Reg, PushPopSplit == ARMSubtarget::SplitR7))
+        continue;
 
       // D-registers in the aligned area DPRCS2 are NOT spilled here.
       if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
@@ -1613,6 +1625,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
   bool isInterrupt = false;
   bool isTrap = false;
   bool isCmseEntry = false;
+  ARMSubtarget::PushPopSplitVariation PushPopSplit =
+                                      STI.getPushPopSplitVariation(MF);
   if (MBB.end() != MI) {
     DL = MI->getDebugLoc();
     unsigned RetOpcode = MI->getOpcode();
@@ -1635,7 +1649,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
     for (; i != 0; --i) {
       CalleeSavedInfo &Info = CSI[i-1];
       Register Reg = Info.getReg();
-      if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
+      if (!(Func)(Reg, PushPopSplit == ARMSubtarget::SplitR7))
+        continue;
 
       // The aligned reloads from area DPRCS2 are not inserted here.
       if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
@@ -1643,7 +1658,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
       if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
           !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
           STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
-          !STI.splitFramePointerPush(MF)) {
+          PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
         Reg = ARM::PC;
         // Fold the return instruction into the LDM.
         DeleteRet = true;
@@ -1983,6 +1998,8 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(
 
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  ARMSubtarget::PushPopSplitVariation PushPopSplit =
+                                      STI.getPushPopSplitVariation(MF);
 
   unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
   unsigned PushOneOpc = AFI->isThumbFunction() ?
@@ -2004,7 +2021,7 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(
         .addImm(-4)
         .add(predOps(ARMCC::AL));
   }
-  if (STI.splitFramePointerPush(MF)) {
+  if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
     emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
                  &isSplitFPArea1Register, 0, MachineInstr::FrameSetup);
     emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
@@ -2039,6 +2056,8 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   bool isVarArg = AFI->getArgRegsSaveSize() > 0;
   unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
+  ARMSubtarget::PushPopSplitVariation PushPopSplit =
+                                      STI.getPushPopSplitVariation(MF);
 
   // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
   // registers. Do that here instead.
@@ -2049,7 +2068,7 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(
   unsigned LdrOpc =
       AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
   unsigned FltOpc = ARM::VLDMDIA_UPD;
-  if (STI.splitFramePointerPush(MF)) {
+  if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
     emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
                 &isSplitFPArea2Register, 0);
     emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
@@ -2287,6 +2306,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   (void)TRI;  // Silence unused warning in non-assert builds.
   Register FramePtr = RegInfo->getFrameRegister(MF);
+  ARMSubtarget::PushPopSplitVariation PushPopSplit =
+                                      STI.getPushPopSplitVariation(MF);
 
   // Spill R4 if Thumb2 function requires stack realignment - it will be used as
   // scratch register. Also spill R4 if Thumb2 function has varsized objects,
@@ -2365,7 +2386,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
     if (Spilled) {
       NumGPRSpills++;
 
-      if (!STI.splitFramePushPop(MF)) {
+      if (PushPopSplit != ARMSubtarget::SplitR7) {
         if (Reg == ARM::LR)
           LRSpilled = true;
         CS1Spilled = true;
@@ -2387,7 +2408,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
         break;
       }
     } else {
-      if (!STI.splitFramePushPop(MF)) {
+      if (PushPopSplit != ARMSubtarget::SplitR7) {
         UnspilledCS1GPRs.push_back(Reg);
         continue;
       }
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index f9d822873bb004..c4a782bc40910a 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -485,11 +485,34 @@ bool ARMSubtarget::ignoreCSRForAllocationOrder(const MachineFunction &MF,
          ARM::GPRRegClass.contains(PhysReg);
 }
 
-bool ARMSubtarget::splitFramePointerPush(const MachineFunction &MF) const {
+ARMSubtarget::PushPopSplitVariation
+ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
   const Function &F = MF.getFunction();
-  if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI() ||
-      !F.needsUnwindTableEntry())
-    return false;
   const MachineFrameInfo &MFI = MF.getFrameInfo();
-  return MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF);
+  const std::vector<CalleeSavedInfo> CSI =
+      MF.getFrameInfo().getCalleeSavedInfo();
+
+  // Returns SplitR7 if the frame setup must be split into two separate pushes
+  // of r0-r7,lr and another containing r8-r11 (+r12 if necessary). This is
+  // always required on Thumb1-only targets, as the push and pop instructions
+  // can't access the high registers. This is also required when R7 is the frame
+  // pointer and frame pointer elimiination is disabled, or branch signing is
+  // enabled and AAPCS is disabled.
+  if ((MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
+       !createAAPCSFrameChain()) ||
+      (getFramePointerReg() == ARM::R7 &&
+       MF.getTarget().Options.DisableFramePointerElim(MF)) ||
+      isThumb1Only())
+    return SplitR7;
+
+  // Returns SplitR11WindowsSEH when the stack pointer needs to be
+  // restored from the frame pointer r11 + an offset and Windows CFI is enabled.
+  // This stack unwinding cannot be expressed with SEH unwind opcodes when done
+  // with a single push, making it necessary to split the push into r4-r10, and
+  // another containing r11+lr.
+  if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+      F.needsUnwindTableEntry() &&
+      (MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF)))
+    return SplitR11WindowsSEH;
+  return NoSplit;
 }
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 1ca5bd4a620c4c..bea3e1441eaaed 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -81,6 +81,32 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
     SingleIssuePlusExtras,
   };
 
+  /// How the push and pop instructions of callee saved general-purpose
+  /// registers should be split.
+  enum PushPopSplitVariation {
+    /// All GPRs can be pushed in a single instruction.
+    /// push {r0-r12, lr}
+    /// vpush {d8-d15}
+    NoSplit,
+
+    /// R7 and LR must be adjacent, because R7 is the frame pointer, and must
+    /// point to a frame record consisting of the previous frame pointer and the
+    /// retun address.
+    /// push {r0-r7, lr}
+    /// push {r8-r12}
+    /// vpush {d8-d15}
+    SplitR7,
+
+    /// When the stack frame size if now known (because of variable-sized
+    /// objects or realignment), Windows SEH requires the callee-saved registers
+    /// to be stored in three regions, with R11 and LR below the floating-point
+    /// registers.
+    /// push {r0-r10, r12}
+    /// vpush {d8-d15}
+    /// push {r11, lr}
+    SplitR11WindowsSEH,
+  };
+
 protected:
 // Bool members corresponding to the SubtargetFeatures defined in tablegen
 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
@@ -371,19 +397,8 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
     return ARM::R11;
   }
 
-  /// Returns true if the frame setup is split into two separate pushes (first
-  /// r0-r7,lr then r8-r11), principally so that the frame pointer is adjacent
-  /// to lr. This is always required on Thumb1-only targets, as the push and
-  /// pop instructions can't access the high registers.
-  bool splitFramePushPop(const MachineFunction &MF) const {
-    if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress())
-      return true;
-    return (getFramePointerReg() == ARM::R7 &&
-            MF.getTarget().Options.DisableFramePointerElim(MF)) ||
-           isThumb1Only();
-  }
-
-  bool splitFramePointerPush(const MachineFunction &MF) const;
+  enum PushPopSplitVariation
+  getPushPopSplitVariation(const MachineFunction &MF) const;
 
   bool useStride4VFPs() const;
 
diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index cb9ded7dee57b9..ad39539232a8ca 100644
--- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -160,6 +160,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
   assert(NumBytes >= ArgRegsSaveSize &&
          "ArgRegsSaveSize is included in NumBytes");
   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+  ARMSubtarget::PushPopSplitVariation PushPopSplit =
+                                      STI.getPushPopSplitVariation(MF);
 
   // Debug location must be unknown since the first debug location is used
   // to determine the end of the prologue.
@@ -221,7 +223,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
     case ARM::R8:
     case ARM::R9:
     case ARM::R10:
-      if (STI.splitFramePushPop(MF)) {
+      if (PushPopSplit == ARMSubtarget::SplitR7) {
         GPRCS2Size += 4;
         break;
       }
@@ -365,7 +367,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
       case ARM::R10:
       case ARM::R11:
       case ARM::R12:
-        if (STI.splitFramePushPop(MF))
+        if (PushPopSplit == ARMSubtarget::SplitR7)
           break;
         [[fallthrough]];
       case ARM::R0:

>From a48e68623503138ac88d3d0fb45e0000d663335a Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Wed, 25 Sep 2024 16:17:17 +0100
Subject: [PATCH 2/9] [ARM] Factor out code to determine spill areas (NFC)

There were multiple loops in ARMFrameLowering which sort the callee
saved registers into spill areas, which were hard to understand and
modify. This splits the information about which register is in which
save area into a separate function.
---
 llvm/lib/Target/ARM/ARMFrameLowering.cpp | 294 ++++++++++++-----------
 1 file changed, 150 insertions(+), 144 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 2df9756b9cd753..6307b4f4b3bb34 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -173,6 +173,95 @@ static MachineBasicBlock::iterator
 skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
                         unsigned NumAlignedDPRCS2Regs);
 
+enum class SpillArea {
+  GPRCS1,
+  GPRCS2,
+  DPRCS1,
+  DPRCS2,
+  FPCXT,
+};
+
+/// Get the spill area that Reg should be saved into in the prologue.
+SpillArea getSpillArea(Register Reg,
+                       ARMSubtarget::PushPopSplitVariation Variation,
+                       unsigned NumAlignedDPRCS2Regs,
+                       const ARMBaseRegisterInfo *RegInfo) {
+  // NoSplit:
+  // push {r0-r12, lr}   GPRCS1
+  // vpush {r8-d15}      DPRCS1
+  //
+  // SplitR7:
+  // push {r0-r7, lr}    GPRCS1
+  // push {r8-r12}       GPRCS2
+  // vpush {r8-d15}      DPRCS1
+  //
+  // SplitR11WindowsSEH:
+  // push {r0-r10, r12}  GPRCS1
+  // vpush {r8-d15}      DPRCS1
+  // push {r11, lr}      GPRCS2
+
+  // If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
+  // the top of the stack frame.
+  // The DPRCS2 region is used for ABIs which only guarantee 4-byte alignment
+  // of SP. If used, it will be below the other save areas, after the stack has
+  // been re-aligned.
+
+  switch (Reg) {
+  default:
+    dbgs() << "Don't know where to spill " << printReg(Reg, RegInfo) << "\n";
+    llvm_unreachable("Don't know where to spill this register");
+    break;
+
+  case ARM::FPCXTNS:
+    return SpillArea::FPCXT;
+
+  case ARM::R0: case ARM::R1: case ARM::R2: case ARM::R3:
+  case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7:
+    return SpillArea::GPRCS1;
+
+  case ARM::R8: case ARM::R9: case ARM::R10:
+    if (Variation == ARMSubtarget::SplitR7)
+      return SpillArea::GPRCS2;
+    else
+      return SpillArea::GPRCS1;
+
+  case ARM::R11:
+    if (Variation == ARMSubtarget::NoSplit)
+      return SpillArea::GPRCS1;
+    else
+      return SpillArea::GPRCS2;
+
+  case ARM::R12:
+    if (Variation == ARMSubtarget::SplitR7)
+      return SpillArea::GPRCS2;
+    else
+      return SpillArea::GPRCS1;
+
+  case ARM::LR:
+    if (Variation == ARMSubtarget::SplitR11WindowsSEH)
+      return SpillArea::GPRCS2;
+    else
+      return SpillArea::GPRCS1;
+
+  case ARM::D0: case ARM::D1: case ARM::D2: case ARM::D3:
+  case ARM::D4: case ARM::D5: case ARM::D6: case ARM::D7:
+    return SpillArea::DPRCS1;
+
+  case ARM::D8:  case ARM::D9:  case ARM::D10: case ARM::D11:
+  case ARM::D12: case ARM::D13: case ARM::D14: case ARM::D15:
+    if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
+      return SpillArea::DPRCS2;
+    else
+      return SpillArea::DPRCS1;
+
+  case ARM::D16: case ARM::D17: case ARM::D18: case ARM::D19:
+  case ARM::D20: case ARM::D21: case ARM::D22: case ARM::D23:
+  case ARM::D24: case ARM::D25: case ARM::D26: case ARM::D27:
+  case ARM::D28: case ARM::D29: case ARM::D30: case ARM::D31:
+    return SpillArea::DPRCS1;
+  }
+}
+
 ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
     : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),
       STI(sti) {}
@@ -791,81 +880,32 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
     return;
   }
 
-  // Determine spill area sizes.
-  if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
-    for (const CalleeSavedInfo &I : CSI) {
-      Register Reg = I.getReg();
-      int FI = I.getFrameIdx();
-      switch (Reg) {
-      case ARM::R11:
-      case ARM::LR:
-        if (Reg == FramePtr)
-          FramePtrSpillFI = FI;
-        GPRCS2Size += 4;
-        break;
-      case ARM::R0:
-      case ARM::R1:
-      case ARM::R2:
-      case ARM::R3:
-      case ARM::R4:
-      case ARM::R5:
-      case ARM::R6:
-      case ARM::R7:
-      case ARM::R8:
-      case ARM::R9:
-      case ARM::R10:
-      case ARM::R12:
-        GPRCS1Size += 4;
-        break;
-      case ARM::FPCXTNS:
-        FPCXTSaveSize = 4;
-        break;
-      default:
-        // This is a DPR. Exclude the aligned DPRCS2 spills.
-        if (Reg == ARM::D8)
-          D8SpillFI = FI;
-        if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
-          DPRCSSize += 8;
-      }
-    }
-  } else {
-    for (const CalleeSavedInfo &I : CSI) {
-      Register Reg = I.getReg();
-      int FI = I.getFrameIdx();
-      switch (Reg) {
-      case ARM::R8:
-      case ARM::R9:
-      case ARM::R10:
-      case ARM::R11:
-      case ARM::R12:
-        if (PushPopSplit == ARMSubtarget::SplitR7) {
-          GPRCS2Size += 4;
-          break;
-        }
-        [[fallthrough]];
-      case ARM::R0:
-      case ARM::R1:
-      case ARM::R2:
-      case ARM::R3:
-      case ARM::R4:
-      case ARM::R5:
-      case ARM::R6:
-      case ARM::R7:
-      case ARM::LR:
-        if (Reg == FramePtr)
-          FramePtrSpillFI = FI;
-        GPRCS1Size += 4;
-        break;
-      case ARM::FPCXTNS:
-        FPCXTSaveSize = 4;
-        break;
-      default:
-        // This is a DPR. Exclude the aligned DPRCS2 spills.
-        if (Reg == ARM::D8)
-          D8SpillFI = FI;
-        if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
-          DPRCSSize += 8;
-      }
+  // Determine spill area sizes, and some important frame indices.
+  for (const CalleeSavedInfo &I : CSI) {
+    Register Reg = I.getReg();
+    int FI = I.getFrameIdx();
+
+    if (Reg == FramePtr)
+      FramePtrSpillFI = FI;
+    if (Reg == ARM::D8)
+      D8SpillFI = FI;
+
+    switch (getSpillArea(Reg, PushPopSplit, AFI->getNumAlignedDPRCS2Regs(),
+                         RegInfo)) {
+    case SpillArea::FPCXT:
+      FPCXTSaveSize += 4;
+      break;
+    case SpillArea::GPRCS1:
+      GPRCS1Size += 4;
+      break;
+    case SpillArea::GPRCS2:
+      GPRCS2Size += 4;
+      break;
+    case SpillArea::DPRCS1:
+      DPRCSSize += 8;
+      break;
+    case SpillArea::DPRCS2:
+      break;
     }
   }
 
@@ -895,7 +935,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
     DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
   }
 
-  // Determine starting offsets of spill areas.
+  // Determine starting offsets of spill areas. These offsets are all positive
+  // offsets from the bottom of the lowest-addressed callee-save area
+  // (excluding DPRCS2, which is th the re-aligned stack region) to the bottom
+  // of the spill area in question.
   unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
   unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
   unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
@@ -915,10 +958,19 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   }
   int FramePtrOffsetInPush = 0;
   if (HasFP) {
+    // Offset from the CFA to the saved frame pointer, will be negative.
     int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
+    LLVM_DEBUG(dbgs() << "FramePtrSpillFI: " << FramePtrSpillFI
+                      << ", FPOffset: " << FPOffset << "\n");
     assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
            "Max FP estimation is wrong");
+    // Offset from the top of the GPRCS1 area to the saved frame pointer, will
+    // be negative.
     FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
+    LLVM_DEBUG(dbgs() << "FramePtrOffsetInPush=" << FramePtrOffsetInPush
+                      << ", FramePtrSpillOffset="
+                      << (MFI.getObjectOffset(FramePtrSpillFI) + NumBytes)
+                      << "\n");
     AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
                                 NumBytes);
   }
@@ -1123,80 +1175,34 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   // Now that the prologue's actual instructions are finalised, we can insert
   // the necessary DWARF cf instructions to describe the situation. Start by
   // recording where each register ended up:
-  if (GPRCS1Size > 0 && !NeedsWinCFI) {
-    MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
-    int CFIIndex;
-    for (const auto &Entry : CSI) {
+  if (!NeedsWinCFI) {
+    for (const auto &Entry : reverse(CSI)) {
       Register Reg = Entry.getReg();
       int FI = Entry.getFrameIdx();
-      switch (Reg) {
-      case ARM::R8:
-      case ARM::R9:
-      case ARM::R10:
-      case ARM::R11:
-      case ARM::R12:
-        if (PushPopSplit == ARMSubtarget::SplitR7)
-          break;
-        [[fallthrough]];
-      case ARM::R0:
-      case ARM::R1:
-      case ARM::R2:
-      case ARM::R3:
-      case ARM::R4:
-      case ARM::R5:
-      case ARM::R6:
-      case ARM::R7:
-      case ARM::LR:
-        CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
-            nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
-        BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
-            .addCFIIndex(CFIIndex)
-            .setMIFlags(MachineInstr::FrameSetup);
+      MachineBasicBlock::iterator CFIPos;
+      switch (getSpillArea(Reg, PushPopSplit, AFI->getNumAlignedDPRCS2Regs(),
+                           RegInfo)) {
+      case SpillArea::GPRCS1:
+        CFIPos = std::next(GPRCS1Push);
         break;
-      }
-    }
-  }
-
-  if (GPRCS2Size > 0 && !NeedsWinCFI) {
-    MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
-    for (const auto &Entry : CSI) {
-      Register Reg = Entry.getReg();
-      int FI = Entry.getFrameIdx();
-      switch (Reg) {
-      case ARM::R8:
-      case ARM::R9:
-      case ARM::R10:
-      case ARM::R11:
-      case ARM::R12:
-        if (PushPopSplit == ARMSubtarget::SplitR7) {
-          unsigned DwarfReg = MRI->getDwarfRegNum(
-              Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true);
-          int64_t Offset = MFI.getObjectOffset(FI);
-          unsigned CFIIndex = MF.addFrameInst(
-              MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
-          BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
-              .addCFIIndex(CFIIndex)
-              .setMIFlags(MachineInstr::FrameSetup);
-        }
+      case SpillArea::GPRCS2:
+        CFIPos = std::next(GPRCS2Push);
+        break;
+      case SpillArea::DPRCS1:
+        CFIPos = std::next(LastPush);
+        break;
+      case SpillArea::FPCXT:
+      case SpillArea::DPRCS2:
+        // FPCXT and DPRCS2 are not represented in the DWARF info.
         break;
       }
-    }
-  }
 
-  if (DPRCSSize > 0 && !NeedsWinCFI) {
-    // Since vpush register list cannot have gaps, there may be multiple vpush
-    // instructions in the prologue.
-    MachineBasicBlock::iterator Pos = std::next(LastPush);
-    for (const auto &Entry : CSI) {
-      Register Reg = Entry.getReg();
-      int FI = Entry.getFrameIdx();
-      if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
-          (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
-        unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
-        int64_t Offset = MFI.getObjectOffset(FI);
-        unsigned CFIIndex = MF.addFrameInst(
-            MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
-        BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+      if (CFIPos.isValid()) {
+        int CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+            nullptr,
+            MRI->getDwarfRegNum(Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true),
+            MFI.getObjectOffset(FI)));
+        BuildMI(MBB, CFIPos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
             .addCFIIndex(CFIIndex)
             .setMIFlags(MachineInstr::FrameSetup);
       }

>From 8eac8ccb448789319ed11d8b2f05c0f7b6d41424 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Fri, 27 Sep 2024 15:25:39 +0100
Subject: [PATCH 3/9] [ARM] Refactor generation of push/pop instructions (NFC)

These used a set of callback functions to check which callee-save area a
register is in, refactor them to use the same data as other parts of
ARMFrameLowering. This will make it easier to add a new variant to the
register splitting.
---
 llvm/lib/Target/ARM/ARMBaseRegisterInfo.h | 74 -----------------
 llvm/lib/Target/ARM/ARMFrameLowering.cpp  | 99 +++++++++++++----------
 llvm/lib/Target/ARM/ARMFrameLowering.h    |  7 +-
 3 files changed, 60 insertions(+), 120 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index 926d702b4092a5..5d465f51ed1d9e 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -41,80 +41,6 @@ namespace ARMRI {
 
 } // end namespace ARMRI
 
-/// isARMArea1Register - Returns true if the register is a low register (r0-r7)
-/// or a stack/pc register that we should push/pop.
-static inline bool isARMArea1Register(unsigned Reg, bool SplitFramePushPop) {
-  using namespace ARM;
-
-  switch (Reg) {
-    case R0:  case R1:  case R2:  case R3:
-    case R4:  case R5:  case R6:  case R7:
-    case LR:  case SP:  case PC:
-      return true;
-    case R8:  case R9:  case R10: case R11: case R12:
-      // For iOS we want r7 and lr to be next to each other.
-      return !SplitFramePushPop;
-    default:
-      return false;
-  }
-}
-
-static inline bool isARMArea2Register(unsigned Reg, bool SplitFramePushPop) {
-  using namespace ARM;
-
-  switch (Reg) {
-    case R8: case R9: case R10: case R11: case R12:
-      // iOS has this second area.
-      return SplitFramePushPop;
-    default:
-      return false;
-  }
-}
-
-static inline bool isSplitFPArea1Register(unsigned Reg,
-                                          bool SplitFramePushPop) {
-  using namespace ARM;
-
-  switch (Reg) {
-    case R0:  case R1:  case R2:  case R3:
-    case R4:  case R5:  case R6:  case R7:
-    case R8:  case R9:  case R10: case R12:
-    case SP:  case PC:
-      return true;
-    default:
-      return false;
-  }
-}
-
-static inline bool isSplitFPArea2Register(unsigned Reg,
-                                          bool SplitFramePushPop) {
-  using namespace ARM;
-
-  switch (Reg) {
-    case R11: case LR:
-      return true;
-    default:
-      return false;
-  }
-}
-
-static inline bool isARMArea3Register(unsigned Reg, bool SplitFramePushPop) {
-  using namespace ARM;
-
-  switch (Reg) {
-    case D15: case D14: case D13: case D12:
-    case D11: case D10: case D9:  case D8:
-    case D7:  case D6:  case D5:  case D4:
-    case D3:  case D2:  case D1:  case D0:
-    case D31: case D30: case D29: case D28:
-    case D27: case D26: case D25: case D24:
-    case D23: case D22: case D21: case D20:
-    case D19: case D18: case D17: case D16:
-      return true;
-    default:
-      return false;
-  }
-}
 
 static inline bool isCalleeSavedRegister(unsigned Reg,
                                          const MCPhysReg *CSRegs) {
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 6307b4f4b3bb34..d80749601eb231 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -1538,14 +1538,11 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MI,
                                     ArrayRef<CalleeSavedInfo> CSI,
                                     unsigned StmOpc, unsigned StrOpc,
-                                    bool NoGap, bool (*Func)(unsigned, bool),
-                                    unsigned NumAlignedDPRCS2Regs,
-                                    unsigned MIFlags) const {
+                                    bool NoGap,
+                                    std::function<bool(unsigned)> Func) const {
   MachineFunction &MF = *MBB.getParent();
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
-  ARMSubtarget::PushPopSplitVariation PushPopSplit =
-                                      STI.getPushPopSplitVariation(MF);
 
   DebugLoc DL;
 
@@ -1557,11 +1554,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
     unsigned LastReg = 0;
     for (; i != 0; --i) {
       Register Reg = CSI[i-1].getReg();
-      if (!(Func)(Reg, PushPopSplit == ARMSubtarget::SplitR7))
-        continue;
-
-      // D-registers in the aligned area DPRCS2 are NOT spilled here.
-      if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
+      if (!Func(Reg))
         continue;
 
       const MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -1592,7 +1585,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
     if (Regs.size() > 1 || StrOpc== 0) {
       MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
                                     .addReg(ARM::SP)
-                                    .setMIFlags(MIFlags)
+                                    .setMIFlags(MachineInstr::FrameSetup)
                                     .add(predOps(ARMCC::AL));
       for (unsigned i = 0, e = Regs.size(); i < e; ++i)
         MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
@@ -1600,7 +1593,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
       BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
           .addReg(Regs[0].first, getKillRegState(Regs[0].second))
           .addReg(ARM::SP)
-          .setMIFlags(MIFlags)
+          .setMIFlags(MachineInstr::FrameSetup)
           .addImm(-4)
           .add(predOps(ARMCC::AL));
     }
@@ -1619,8 +1612,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
                                    MutableArrayRef<CalleeSavedInfo> CSI,
                                    unsigned LdmOpc, unsigned LdrOpc,
                                    bool isVarArg, bool NoGap,
-                                   bool (*Func)(unsigned, bool),
-                                   unsigned NumAlignedDPRCS2Regs) const {
+                                   std::function<bool(unsigned)> Func) const {
   MachineFunction &MF = *MBB.getParent();
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
@@ -1655,12 +1647,9 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
     for (; i != 0; --i) {
       CalleeSavedInfo &Info = CSI[i-1];
       Register Reg = Info.getReg();
-      if (!(Func)(Reg, PushPopSplit == ARMSubtarget::SplitR7))
+      if (!Func(Reg))
         continue;
 
-      // The aligned reloads from area DPRCS2 are not inserted here.
-      if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
-        continue;
       if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
           !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
           STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
@@ -2006,6 +1995,7 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   ARMSubtarget::PushPopSplitVariation PushPopSplit =
                                       STI.getPushPopSplitVariation(MF);
+  const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
 
   unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
   unsigned PushOneOpc = AFI->isThumbFunction() ?
@@ -2027,20 +2017,33 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(
         .addImm(-4)
         .add(predOps(ARMCC::AL));
   }
+
+  auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
+                       RegInfo](unsigned Reg, SpillArea TestArea) {
+    return getSpillArea(Reg, PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
+           TestArea;
+  };
+  auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
+    return CheckRegArea(Reg, SpillArea::GPRCS1);
+  };
+  auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
+    return CheckRegArea(Reg, SpillArea::GPRCS2);
+  };
+  auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
+    return CheckRegArea(Reg, SpillArea::DPRCS1);
+  };
+
+  // Windows SEH requires the floating-point registers to be pushed between the
+  // two blocks of GPRs in some situations. In all other cases, they are pushed
+  // below the GPRs.
   if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
-    emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
-                 &isSplitFPArea1Register, 0, MachineInstr::FrameSetup);
-    emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
-                 NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
-    emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
-                 &isSplitFPArea2Register, 0, MachineInstr::FrameSetup);
+    emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS1);
+    emitPushInst(MBB, MI, CSI, FltOpc, 0, true, IsDPRCS1);
+    emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS2);
   } else {
-    emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
-                 0, MachineInstr::FrameSetup);
-    emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,
-                 0, MachineInstr::FrameSetup);
-    emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
-                 NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
+    emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS1);
+    emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS2);
+    emitPushInst(MBB, MI, CSI, FltOpc, 0, true, IsDPRCS1);
   }
 
   // The code above does not insert spill code for the aligned DPRCS2 registers.
@@ -2060,6 +2063,8 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(
 
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
+
   bool isVarArg = AFI->getArgRegsSaveSize() > 0;
   unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
   ARMSubtarget::PushPopSplitVariation PushPopSplit =
@@ -2074,20 +2079,30 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(
   unsigned LdrOpc =
       AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
   unsigned FltOpc = ARM::VLDMDIA_UPD;
+
+  auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
+                       RegInfo](unsigned Reg, SpillArea TestArea) {
+    return getSpillArea(Reg, PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
+           TestArea;
+  };
+  auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
+    return CheckRegArea(Reg, SpillArea::GPRCS1);
+  };
+  auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
+    return CheckRegArea(Reg, SpillArea::GPRCS2);
+  };
+  auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
+    return CheckRegArea(Reg, SpillArea::DPRCS1);
+  };
+
   if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
-    emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
-                &isSplitFPArea2Register, 0);
-    emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
-                NumAlignedDPRCS2Regs);
-    emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
-                &isSplitFPArea1Register, 0);
+    emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS2);
+    emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, IsDPRCS1);
+    emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS1);
   } else {
-    emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
-                NumAlignedDPRCS2Regs);
-    emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
-                &isARMArea2Register, 0);
-    emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
-                &isARMArea1Register, 0);
+    emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, IsDPRCS1);
+    emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS2);
+    emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS1);
   }
 
   return true;
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h
index 6a31b73957f134..0866d2a9c68274 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.h
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -90,13 +90,12 @@ class ARMFrameLowering : public TargetFrameLowering {
 private:
   void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                     ArrayRef<CalleeSavedInfo> CSI, unsigned StmOpc,
-                    unsigned StrOpc, bool NoGap, bool (*Func)(unsigned, bool),
-                    unsigned NumAlignedDPRCS2Regs, unsigned MIFlags = 0) const;
+                    unsigned StrOpc, bool NoGap,
+                    std::function<bool(unsigned)> Func) const;
   void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                    MutableArrayRef<CalleeSavedInfo> CSI, unsigned LdmOpc,
                    unsigned LdrOpc, bool isVarArg, bool NoGap,
-                   bool (*Func)(unsigned, bool),
-                   unsigned NumAlignedDPRCS2Regs) const;
+                   std::function<bool(unsigned)> Func) const;
 
   MachineBasicBlock::iterator
   eliminateCallFramePseudoInstr(MachineFunction &MF,

>From cdfecdb72dfa340eab1463d35115d07b15532675 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Fri, 27 Sep 2024 11:24:30 +0100
Subject: [PATCH 4/9] [ARM] Add debug dump for StackAdjustingInsts (NFC)

---
 llvm/lib/Target/ARM/ARMFrameLowering.cpp | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index d80749601eb231..8334ea94b85d5f 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -689,6 +689,14 @@ struct StackAdjustingInsts {
     MachineBasicBlock::iterator I;
     unsigned SPAdjust;
     bool BeforeFPSet;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+    void dump() {
+      dbgs() << "  " << (BeforeFPSet ? "before-fp " : "          ")
+             << "sp-adjust=" << SPAdjust;
+      I->dump();
+    }
+#endif
   };
 
   SmallVector<InstInfo, 4> Insts;
@@ -723,6 +731,14 @@ struct StackAdjustingInsts {
               .setMIFlags(MachineInstr::FrameSetup);
     }
   }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  void dump() {
+    dbgs() << "StackAdjustingInsts:\n";
+    for (auto &Info : Insts)
+      Info.dump();
+  }
+#endif
 };
 
 } // end anonymous namespace
@@ -842,6 +858,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   ARMSubtarget::PushPopSplitVariation PushPopSplit =
                                       STI.getPushPopSplitVariation(MF);
 
+  LLVM_DEBUG(dbgs() << "Emitting prologue for " << MF.getName() << "\n");
+
   // Debug location must be unknown since the first debug location is used
   // to determine the end of the prologue.
   DebugLoc dl;
@@ -1213,8 +1231,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   // throughout the process. If we have a frame pointer, it takes over the job
   // half-way through, so only the first few .cfi_def_cfa_offset instructions
   // actually get emitted.
-  if (!NeedsWinCFI)
+  if (!NeedsWinCFI) {
+    LLVM_DEBUG(DefCFAOffsetCandidates.dump());
     DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
+  }
 
   if (STI.isTargetELF() && hasFP(MF))
     MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
@@ -1294,6 +1314,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
   ARMSubtarget::PushPopSplitVariation PushPopSplit =
                                       STI.getPushPopSplitVariation(MF);
 
+  LLVM_DEBUG(dbgs() << "Emitting epilogue for " << MF.getName() << "\n");
+
   // Amount of stack space we reserved next to incoming args for either
   // varargs registers or stack arguments in tail calls made by this function.
   unsigned ReservedArgStack = AFI->getArgRegsSaveSize();

>From 3ed954d9c60209ebf95ca8d7da387398df6220c0 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Fri, 27 Sep 2024 11:34:22 +0100
Subject: [PATCH 5/9] [ARM] Remove always-true checks from Thumb1 frame
 lowering (NFC)

For Thumb1, we always split the callee-saved register pushes at R7, so
we don't need to check for this.
---
 llvm/lib/Target/ARM/Thumb1FrameLowering.cpp | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index ad39539232a8ca..e7d0a14da6dc9d 100644
--- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -160,8 +160,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
   assert(NumBytes >= ArgRegsSaveSize &&
          "ArgRegsSaveSize is included in NumBytes");
   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
-  ARMSubtarget::PushPopSplitVariation PushPopSplit =
-                                      STI.getPushPopSplitVariation(MF);
+  assert(STI.getPushPopSplitVariation(MF) == ARMSubtarget::SplitR7 &&
+         "Must use R7 spilt for Thumb1");
 
   // Debug location must be unknown since the first debug location is used
   // to determine the end of the prologue.
@@ -223,11 +223,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
     case ARM::R8:
     case ARM::R9:
     case ARM::R10:
-      if (PushPopSplit == ARMSubtarget::SplitR7) {
-        GPRCS2Size += 4;
-        break;
-      }
-      [[fallthrough]];
+      GPRCS2Size += 4;
+      break;
     case ARM::LR:
       if (HasFrameRecordArea) {
         FRSize += 4;
@@ -367,9 +364,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
       case ARM::R10:
       case ARM::R11:
       case ARM::R12:
-        if (PushPopSplit == ARMSubtarget::SplitR7)
-          break;
-        [[fallthrough]];
+        break;
       case ARM::R0:
       case ARM::R1:
       case ARM::R2:

>From 0f21d49840502c94ca6ccf13f7eb2f1a96fa95e0 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Mon, 30 Sep 2024 09:16:21 +0100
Subject: [PATCH 6/9] fixup! [ARM] Tidy up stack frame strategy code (NFC)

Fix typo
---
 llvm/lib/Target/ARM/ARMSubtarget.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index bea3e1441eaaed..74ef38e1f01ca6 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -97,7 +97,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
     /// vpush {d8-d15}
     SplitR7,
 
-    /// When the stack frame size if now known (because of variable-sized
+    /// When the stack frame size if not known (because of variable-sized
     /// objects or realignment), Windows SEH requires the callee-saved registers
     /// to be stored in three regions, with R11 and LR below the floating-point
     /// registers.

>From d6d22cf47127d0d36548eb73d0a40d1ed62632bd Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Mon, 30 Sep 2024 09:19:56 +0100
Subject: [PATCH 7/9] fixup! [ARM] Refactor generation of push/pop instructions
 (NFC)

Use llvm::function_ref
---
 llvm/lib/Target/ARM/ARMFrameLowering.cpp | 4 ++--
 llvm/lib/Target/ARM/ARMFrameLowering.h   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 8334ea94b85d5f..9378e17f974810 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -1561,7 +1561,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
                                     ArrayRef<CalleeSavedInfo> CSI,
                                     unsigned StmOpc, unsigned StrOpc,
                                     bool NoGap,
-                                    std::function<bool(unsigned)> Func) const {
+                                    function_ref<bool(unsigned)> Func) const {
   MachineFunction &MF = *MBB.getParent();
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
@@ -1634,7 +1634,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
                                    MutableArrayRef<CalleeSavedInfo> CSI,
                                    unsigned LdmOpc, unsigned LdrOpc,
                                    bool isVarArg, bool NoGap,
-                                   std::function<bool(unsigned)> Func) const {
+                                   function_ref<bool(unsigned)> Func) const {
   MachineFunction &MF = *MBB.getParent();
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h
index 0866d2a9c68274..3f55884f80a43d 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.h
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -91,11 +91,11 @@ class ARMFrameLowering : public TargetFrameLowering {
   void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                     ArrayRef<CalleeSavedInfo> CSI, unsigned StmOpc,
                     unsigned StrOpc, bool NoGap,
-                    std::function<bool(unsigned)> Func) const;
+                    function_ref<bool(unsigned)> Func) const;
   void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                    MutableArrayRef<CalleeSavedInfo> CSI, unsigned LdmOpc,
                    unsigned LdrOpc, bool isVarArg, bool NoGap,
-                   std::function<bool(unsigned)> Func) const;
+                   function_ref<bool(unsigned)> Func) const;
 
   MachineBasicBlock::iterator
   eliminateCallFramePseudoInstr(MachineFunction &MF,

>From d772ce41e70d723a2f161dc16d42f92d549adf10 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Mon, 30 Sep 2024 09:23:52 +0100
Subject: [PATCH 8/9] clang-format

---
 llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp |  2 +-
 llvm/lib/Target/ARM/ARMBaseRegisterInfo.h   |  1 -
 llvm/lib/Target/ARM/ARMFrameLowering.cpp    | 71 +++++++++++++++------
 3 files changed, 53 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 9bb1e9754217a1..3f28ce8ca4b559 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -63,7 +63,7 @@ const MCPhysReg*
 ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>();
   ARMSubtarget::PushPopSplitVariation PushPopSplit =
-                                      STI.getPushPopSplitVariation(*MF);
+      STI.getPushPopSplitVariation(*MF);
   const Function &F = MF->getFunction();
 
   if (F.getCallingConv() == CallingConv::GHC) {
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index 5d465f51ed1d9e..478c32fa724ffc 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -41,7 +41,6 @@ namespace ARMRI {
 
 } // end namespace ARMRI
 
-
 static inline bool isCalleeSavedRegister(unsigned Reg,
                                          const MCPhysReg *CSRegs) {
   for (unsigned i = 0; CSRegs[i]; ++i)
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 9378e17f974810..60062a2422e484 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -215,11 +215,19 @@ SpillArea getSpillArea(Register Reg,
   case ARM::FPCXTNS:
     return SpillArea::FPCXT;
 
-  case ARM::R0: case ARM::R1: case ARM::R2: case ARM::R3:
-  case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7:
+  case ARM::R0:
+  case ARM::R1:
+  case ARM::R2:
+  case ARM::R3:
+  case ARM::R4:
+  case ARM::R5:
+  case ARM::R6:
+  case ARM::R7:
     return SpillArea::GPRCS1;
 
-  case ARM::R8: case ARM::R9: case ARM::R10:
+  case ARM::R8:
+  case ARM::R9:
+  case ARM::R10:
     if (Variation == ARMSubtarget::SplitR7)
       return SpillArea::GPRCS2;
     else
@@ -243,21 +251,45 @@ SpillArea getSpillArea(Register Reg,
     else
       return SpillArea::GPRCS1;
 
-  case ARM::D0: case ARM::D1: case ARM::D2: case ARM::D3:
-  case ARM::D4: case ARM::D5: case ARM::D6: case ARM::D7:
+  case ARM::D0:
+  case ARM::D1:
+  case ARM::D2:
+  case ARM::D3:
+  case ARM::D4:
+  case ARM::D5:
+  case ARM::D6:
+  case ARM::D7:
     return SpillArea::DPRCS1;
 
-  case ARM::D8:  case ARM::D9:  case ARM::D10: case ARM::D11:
-  case ARM::D12: case ARM::D13: case ARM::D14: case ARM::D15:
+  case ARM::D8:
+  case ARM::D9:
+  case ARM::D10:
+  case ARM::D11:
+  case ARM::D12:
+  case ARM::D13:
+  case ARM::D14:
+  case ARM::D15:
     if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
       return SpillArea::DPRCS2;
     else
       return SpillArea::DPRCS1;
 
-  case ARM::D16: case ARM::D17: case ARM::D18: case ARM::D19:
-  case ARM::D20: case ARM::D21: case ARM::D22: case ARM::D23:
-  case ARM::D24: case ARM::D25: case ARM::D26: case ARM::D27:
-  case ARM::D28: case ARM::D29: case ARM::D30: case ARM::D31:
+  case ARM::D16:
+  case ARM::D17:
+  case ARM::D18:
+  case ARM::D19:
+  case ARM::D20:
+  case ARM::D21:
+  case ARM::D22:
+  case ARM::D23:
+  case ARM::D24:
+  case ARM::D25:
+  case ARM::D26:
+  case ARM::D27:
+  case ARM::D28:
+  case ARM::D29:
+  case ARM::D30:
+  case ARM::D31:
     return SpillArea::DPRCS1;
   }
 }
@@ -819,7 +851,7 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
 static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
                           const MachineFunction &MF) {
   ARMSubtarget::PushPopSplitVariation PushPopSplit =
-                                      STI.getPushPopSplitVariation(MF);
+      STI.getPushPopSplitVariation(MF);
   // For Thumb1, push.w isn't available, so the first push will always push
   // r7 and lr onto the stack first.
   if (AFI.isThumb1OnlyFunction())
@@ -856,7 +888,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   int FPCXTSaveSize = 0;
   bool NeedsWinCFI = needsWinCFI(MF);
   ARMSubtarget::PushPopSplitVariation PushPopSplit =
-                                      STI.getPushPopSplitVariation(MF);
+      STI.getPushPopSplitVariation(MF);
 
   LLVM_DEBUG(dbgs() << "Emitting prologue for " << MF.getName() << "\n");
 
@@ -1218,7 +1250,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
       if (CFIPos.isValid()) {
         int CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
             nullptr,
-            MRI->getDwarfRegNum(Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true),
+            MRI->getDwarfRegNum(Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg,
+                                true),
             MFI.getObjectOffset(FI)));
         BuildMI(MBB, CFIPos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
             .addCFIIndex(CFIIndex)
@@ -1312,7 +1345,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
          "This emitEpilogue does not support Thumb1!");
   bool isARM = !AFI->isThumbFunction();
   ARMSubtarget::PushPopSplitVariation PushPopSplit =
-                                      STI.getPushPopSplitVariation(MF);
+      STI.getPushPopSplitVariation(MF);
 
   LLVM_DEBUG(dbgs() << "Emitting epilogue for " << MF.getName() << "\n");
 
@@ -1646,7 +1679,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
   bool isTrap = false;
   bool isCmseEntry = false;
   ARMSubtarget::PushPopSplitVariation PushPopSplit =
-                                      STI.getPushPopSplitVariation(MF);
+      STI.getPushPopSplitVariation(MF);
   if (MBB.end() != MI) {
     DL = MI->getDebugLoc();
     unsigned RetOpcode = MI->getOpcode();
@@ -2016,7 +2049,7 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   ARMSubtarget::PushPopSplitVariation PushPopSplit =
-                                      STI.getPushPopSplitVariation(MF);
+      STI.getPushPopSplitVariation(MF);
   const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
 
   unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
@@ -2090,7 +2123,7 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(
   bool isVarArg = AFI->getArgRegsSaveSize() > 0;
   unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
   ARMSubtarget::PushPopSplitVariation PushPopSplit =
-                                      STI.getPushPopSplitVariation(MF);
+      STI.getPushPopSplitVariation(MF);
 
   // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
   // registers. Do that here instead.
@@ -2350,7 +2383,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
   (void)TRI;  // Silence unused warning in non-assert builds.
   Register FramePtr = RegInfo->getFrameRegister(MF);
   ARMSubtarget::PushPopSplitVariation PushPopSplit =
-                                      STI.getPushPopSplitVariation(MF);
+      STI.getPushPopSplitVariation(MF);
 
   // Spill R4 if Thumb2 function requires stack realignment - it will be used as
   // scratch register. Also spill R4 if Thumb2 function has varsized objects,

>From 6632a0b00d8a99a23e0a30cf97d81057f73225b0 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Mon, 7 Oct 2024 11:45:45 +0100
Subject: [PATCH 9/9] Fix typos

---
 llvm/lib/Target/ARM/ARMSubtarget.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 74ef38e1f01ca6..e68e7833327116 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -91,13 +91,13 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
 
     /// R7 and LR must be adjacent, because R7 is the frame pointer, and must
     /// point to a frame record consisting of the previous frame pointer and the
-    /// retun address.
+    /// return address.
     /// push {r0-r7, lr}
     /// push {r8-r12}
     /// vpush {d8-d15}
     SplitR7,
 
-    /// When the stack frame size if not known (because of variable-sized
+    /// When the stack frame size is not known (because of variable-sized
     /// objects or realignment), Windows SEH requires the callee-saved registers
     /// to be stored in three regions, with R11 and LR below the floating-point
     /// registers.



More information about the llvm-commits mailing list