[llvm-branch-commits] [llvm] [AArch64] Prepare for split ZPR and PPR area allocation (NFCI) (PR #142391)
    Benjamin Maxwell via llvm-branch-commits 
    llvm-branch-commits at lists.llvm.org
       
    Tue Jun  3 08:15:31 PDT 2025
    
    
  
https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/142391
>From 42af819d001699ae6361d51e34d76a06fe956250 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 8 May 2025 17:38:27 +0000
Subject: [PATCH] [AArch64] Prepare for split ZPR and PPR area allocation
 (NFCI)
This patch attempts to refactor AArch64FrameLowering to allow the size
of the ZPR and PPR areas to be calculated separately. This will be used
by a subsequent patch to support allocating ZPRs and PPRs to separate
areas. This patch should be an NFC and is split out to make later
functional changes easier to spot.
---
 .../Target/AArch64/AArch64FrameLowering.cpp   | 316 ++++++++++++------
 .../lib/Target/AArch64/AArch64FrameLowering.h |  12 +-
 .../AArch64/AArch64MachineFunctionInfo.h      |  49 +--
 .../Target/AArch64/AArch64RegisterInfo.cpp    |   7 +-
 4 files changed, 252 insertions(+), 132 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index b8a7b620740c2..964818d8b48b4 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -330,7 +330,10 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
 
 static bool produceCompactUnwindFrame(MachineFunction &MF);
 static bool needsWinCFI(const MachineFunction &MF);
+static StackOffset getZPRStackSize(const MachineFunction &MF);
+static StackOffset getPPRStackSize(const MachineFunction &MF);
 static StackOffset getSVEStackSize(const MachineFunction &MF);
+static bool hasSVEStackSize(const MachineFunction &MF);
 static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);
 
 /// Returns true if a homogeneous prolog or epilog code can be emitted
@@ -349,7 +352,7 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
   if (needsWinCFI(MF))
     return false;
   // TODO: SVE is not supported yet.
-  if (getSVEStackSize(MF))
+  if (hasSVEStackSize(MF))
     return false;
 
   // Bail on stack adjustment needed on return for simplicity.
@@ -449,10 +452,36 @@ static unsigned getFixedObjectSize(const MachineFunction &MF,
   }
 }
 
-/// Returns the size of the entire SVE stackframe (calleesaves + spills).
+static unsigned getStackHazardSize(const MachineFunction &MF) {
+  return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
+}
+
+/// Returns the size of the entire ZPR stackframe (calleesaves + spills).
+static StackOffset getZPRStackSize(const MachineFunction &MF) {
+  const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+  return StackOffset::getScalable(AFI->getStackSizeZPR());
+}
+
+/// Returns the size of the entire PPR stackframe (calleesaves + spills).
+static StackOffset getPPRStackSize(const MachineFunction &MF) {
+  const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+  return StackOffset::getScalable(AFI->getStackSizePPR());
+}
+
+/// Returns the size of the entire SVE stackframe (PPRs + ZPRs).
 static StackOffset getSVEStackSize(const MachineFunction &MF) {
+  return getZPRStackSize(MF) + getPPRStackSize(MF);
+}
+
+static bool hasSVEStackSize(const MachineFunction &MF) {
   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
-  return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE());
+  return AFI->getStackSizeZPR() > 0 || AFI->getStackSizePPR() > 0;
+}
+
+/// Returns true if PPRs are spilled as ZPRs.
+static bool arePPRsSpilledAsZPR(const MachineFunction &MF) {
+  return MF.getSubtarget().getRegisterInfo()->getSpillSize(
+             AArch64::PPRRegClass) == 16;
 }
 
 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
@@ -480,7 +509,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
                                  !Subtarget.hasSVE();
 
   return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize ||
-           getSVEStackSize(MF) || LowerQRegCopyThroughMem);
+           hasSVEStackSize(MF) || LowerQRegCopyThroughMem);
 }
 
 /// hasFPImpl - Return true if the specified function should have a dedicated
@@ -1148,7 +1177,7 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
 
   // When there is an SVE area on the stack, always allocate the
   // callee-saves and spills/locals separately.
-  if (getSVEStackSize(MF))
+  if (hasSVEStackSize(MF))
     return false;
 
   return true;
@@ -1592,25 +1621,19 @@ static bool isTargetWindows(const MachineFunction &MF) {
   return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
 }
 
-static unsigned getStackHazardSize(const MachineFunction &MF) {
-  return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
-}
-
 // Convenience function to determine whether I is an SVE callee save.
-static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
+static bool IsZPRCalleeSave(MachineBasicBlock::iterator I) {
   switch (I->getOpcode()) {
   default:
     return false;
-  case AArch64::PTRUE_C_B:
   case AArch64::LD1B_2Z_IMM:
   case AArch64::ST1B_2Z_IMM:
   case AArch64::STR_ZXI:
-  case AArch64::STR_PXI:
   case AArch64::LDR_ZXI:
-  case AArch64::LDR_PXI:
-  case AArch64::PTRUE_B:
   case AArch64::CPY_ZPzI_B:
   case AArch64::CMPNE_PPzZI_B:
+  case AArch64::PTRUE_C_B:
+  case AArch64::PTRUE_B:
     return I->getFlag(MachineInstr::FrameSetup) ||
            I->getFlag(MachineInstr::FrameDestroy);
   case AArch64::SEH_SavePReg:
@@ -1619,6 +1642,22 @@ static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
   }
 }
 
+// Convenience function to determine whether I is an SVE predicate callee save.
+static bool IsPPRCalleeSave(MachineBasicBlock::iterator I) {
+  switch (I->getOpcode()) {
+  default:
+    return false;
+  case AArch64::STR_PXI:
+  case AArch64::LDR_PXI:
+    return I->getFlag(MachineInstr::FrameSetup) ||
+           I->getFlag(MachineInstr::FrameDestroy);
+  }
+}
+
+static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
+  return IsZPRCalleeSave(I) || IsPPRCalleeSave(I);
+}
+
 static void emitShadowCallStackPrologue(const TargetInstrInfo &TII,
                                         MachineFunction &MF,
                                         MachineBasicBlock &MBB,
@@ -1850,8 +1889,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   else
     AFI->setTaggedBasePointerOffset(MFI.getStackSize());
 
-  const StackOffset &SVEStackSize = getSVEStackSize(MF);
-
   // getStackSize() includes all the locals in its size calculation. We don't
   // include these locals when computing the stack size of a funclet, as they
   // are allocated in the parent's stack frame and accessed via the frame
@@ -1862,7 +1899,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
       IsFunclet ? getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
   if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
     assert(!HasFP && "unexpected function without stack frame but with FP");
-    assert(!SVEStackSize &&
+    assert(!hasSVEStackSize(MF) &&
            "unexpected function without stack frame but with SVE objects");
     // All of the stack allocation is for locals.
     AFI->setLocalStackSize(NumBytes);
@@ -1943,7 +1980,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
         &HasWinCFI, EmitAsyncCFI);
     NumBytes -= AFI->getCalleeSavedStackSize();
   } else if (CombineSPBump) {
-    assert(!SVEStackSize && "Cannot combine SP bump with SVE");
+    assert(!hasSVEStackSize(MF) && "Cannot combine SP bump with SVE");
     emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
                     StackOffset::getFixed(-NumBytes), TII,
                     MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
@@ -2172,36 +2209,62 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
     }
   }
 
-  StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
   MachineBasicBlock::iterator CalleeSavesEnd = MBBI;
 
+  StackOffset PPRCalleeSavesSize =
+      StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
+  StackOffset ZPRCalleeSavesSize =
+      StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
+  StackOffset SVECalleeSavesSize = PPRCalleeSavesSize + ZPRCalleeSavesSize;
+  StackOffset PPRLocalsSize = getPPRStackSize(MF) - PPRCalleeSavesSize;
+  StackOffset ZPRLocalsSize = getZPRStackSize(MF) - ZPRCalleeSavesSize;
+
   StackOffset CFAOffset =
       StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
+  if (!FPAfterSVECalleeSaves) {
+    MachineBasicBlock::iterator ZPRCalleeSavesBegin = MBBI,
+                                ZPRCalleeSavesEnd = MBBI;
+    MachineBasicBlock::iterator PPRCalleeSavesBegin = MBBI,
+                                PPRCalleeSavesEnd = MBBI;
 
-  // Process the SVE callee-saves to determine what space needs to be
-  // allocated.
-  if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
-    LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize
-                      << "\n");
-    SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
-    SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
-    // Find callee save instructions in frame.
-    // Note: With FPAfterSVECalleeSaves the callee saves have already been
+    // Process the SVE callee-saves to determine what space needs to be
     // allocated.
-    if (!FPAfterSVECalleeSaves) {
-      MachineBasicBlock::iterator CalleeSavesBegin = MBBI;
-      assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
-      while (IsSVECalleeSave(MBBI) && MBBI != MBB.getFirstTerminator())
+
+    if (PPRCalleeSavesSize) {
+      LLVM_DEBUG(dbgs() << "PPRCalleeSavedStackSize = "
+                        << PPRCalleeSavesSize.getScalable() << "\n");
+
+      PPRCalleeSavesBegin = MBBI;
+      assert(IsPPRCalleeSave(PPRCalleeSavesBegin) && "Unexpected instruction");
+      while (IsPPRCalleeSave(MBBI) && MBBI != MBB.getFirstTerminator())
         ++MBBI;
-      CalleeSavesEnd = MBBI;
+      PPRCalleeSavesEnd = MBBI;
+    }
 
-      StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
-      // Allocate space for the callee saves (if any).
-      allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false,
-                         nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
-                         MFI.hasVarSizedObjects() || LocalsSize);
+    if (ZPRCalleeSavesSize) {
+      LLVM_DEBUG(dbgs() << "ZPRCalleeSavedStackSize = "
+                        << ZPRCalleeSavesSize.getScalable() << "\n");
+      ZPRCalleeSavesBegin = MBBI;
+      assert(IsZPRCalleeSave(ZPRCalleeSavesBegin) && "Unexpected instruction");
+      while (IsZPRCalleeSave(MBBI) && MBBI != MBB.getFirstTerminator())
+        ++MBBI;
+      ZPRCalleeSavesEnd = MBBI;
     }
+
+    // Allocate space for the callee saves (if any).
+    StackOffset LocalsSize =
+        PPRLocalsSize + ZPRLocalsSize + StackOffset::getFixed(NumBytes);
+    MachineBasicBlock::iterator CalleeSavesBegin =
+        AFI->getPPRCalleeSavedStackSize() ? PPRCalleeSavesBegin
+                                          : ZPRCalleeSavesBegin;
+    allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false,
+                       nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
+                       MFI.hasVarSizedObjects() || LocalsSize);
+
+    CalleeSavesEnd = AFI->getZPRCalleeSavedStackSize() ? ZPRCalleeSavesEnd
+                                                       : PPRCalleeSavesEnd;
   }
+
   CFAOffset += SVECalleeSavesSize;
 
   if (EmitAsyncCFI)
@@ -2215,6 +2278,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
     // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
     // the correct value here, as NumBytes also includes padding bytes,
     // which shouldn't be counted here.
+    StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize;
     allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding,
                        SVELocalsSize + StackOffset::getFixed(NumBytes),
                        NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
@@ -2264,7 +2328,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
       emitDefineCFAWithFP(MF, MBB, MBBI, FixedObject);
     } else {
       StackOffset TotalSize =
-          SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
+          getSVEStackSize(MF) +
+          StackOffset::getFixed((int64_t)MFI.getStackSize());
       CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
       CFIBuilder.insertCFIInst(
           createDefCFA(*RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
@@ -2465,7 +2530,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
     }
   }
 
-  const StackOffset &SVEStackSize = getSVEStackSize(MF);
+  StackOffset SVEStackSize = getSVEStackSize(MF);
 
   // If there is a single SP update, insert it before the ret and we're done.
   if (CombineSPBump) {
@@ -2490,7 +2555,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   // deallocated.
   StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
   MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI;
-  if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
+  int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize();
+  int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize();
+  int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize;
+
+  if (SVECalleeSavedSize) {
     if (FPAfterSVECalleeSaves)
       RestoreEnd = MBB.getFirstTerminator();
 
@@ -2503,7 +2572,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
            IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
 
     StackOffset CalleeSavedSizeAsOffset =
-        StackOffset::getScalable(CalleeSavedSize);
+        StackOffset::getScalable(SVECalleeSavedSize);
     DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
     DeallocateAfter = CalleeSavedSizeAsOffset;
   }
@@ -2538,16 +2607,16 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
     // restore the stack pointer from the frame pointer prior to SVE CSR
     // restoration.
     if (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) {
-      if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
+      if (SVECalleeSavedSize) {
         // Set SP to start of SVE callee-save area from which they can
         // be reloaded. The code below will deallocate the stack space
         // space by moving FP -> SP.
         emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP,
-                        StackOffset::getScalable(-CalleeSavedSize), TII,
+                        StackOffset::getScalable(-SVECalleeSavedSize), TII,
                         MachineInstr::FrameDestroy);
       }
     } else {
-      if (AFI->getSVECalleeSavedStackSize()) {
+      if (SVECalleeSavedSize) {
         // Deallocate the non-SVE locals first before we can deallocate (and
         // restore callee saves) from the SVE area.
         emitFrameOffset(
@@ -2676,7 +2745,9 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
   const auto &MFI = MF.getFrameInfo();
 
   int64_t ObjectOffset = MFI.getObjectOffset(FI);
-  StackOffset SVEStackSize = getSVEStackSize(MF);
+  StackOffset ZPRStackSize = getZPRStackSize(MF);
+  StackOffset PPRStackSize = getPPRStackSize(MF);
+  StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
 
   // For VLA-area objects, just emit an offset at the end of the stack frame.
   // Whilst not quite correct, these objects do live at the end of the frame and
@@ -2777,7 +2848,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
   bool isCSR =
       !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));
 
-  const StackOffset &SVEStackSize = getSVEStackSize(MF);
+  const StackOffset SVEStackSize = getSVEStackSize(MF);
 
   // Use frame pointer to reference fixed objects. Use it for locals if
   // there are VLAs or a dynamically realigned SP (and thus the SP isn't
@@ -2940,7 +3011,7 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) {
          !(Subtarget.getTargetLowering()->supportSwiftError() &&
            Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&
          MF.getFunction().getCallingConv() != CallingConv::SwiftTail &&
-         !requiresSaveVG(MF) && AFI->getSVECalleeSavedStackSize() == 0;
+         !requiresSaveVG(MF) && (AFI->getSVECalleeSavedStackSize()) == 0;
 }
 
 static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
@@ -3073,10 +3144,13 @@ static void computeCalleeSaveRegisterPairs(
     FirstReg = Count - 1;
   }
   bool FPAfterSVECalleeSaves = IsWindows && AFI->getSVECalleeSavedStackSize();
-  int ScalableByteOffset =
-      FPAfterSVECalleeSaves ? 0 : AFI->getSVECalleeSavedStackSize();
+  int ScalableByteOffset = FPAfterSVECalleeSaves
+                               ? 0
+                               : AFI->getZPRCalleeSavedStackSize() +
+                                     AFI->getPPRCalleeSavedStackSize();
   bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace();
   Register LastReg = 0;
+  bool HasCSHazardPadding = AFI->hasStackHazardSlotIndex();
 
   // When iterating backwards, the loop condition relies on unsigned wraparound.
   for (unsigned i = FirstReg; i < Count; i += RegInc) {
@@ -3106,7 +3180,7 @@ static void computeCalleeSaveRegisterPairs(
     }
 
     // Add the stack hazard size as we transition from GPR->FPR CSRs.
-    if (AFI->hasStackHazardSlotIndex() &&
+    if (HasCSHazardPadding &&
         (!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) &&
         AArch64InstrInfo::isFpOrNEON(RPI.Reg1))
       ByteOffset += StackFillDir * StackHazardSize;
@@ -3114,7 +3188,7 @@ static void computeCalleeSaveRegisterPairs(
 
     int Scale = TRI->getSpillSize(*RPI.RC);
     // Add the next reg to the pair if it is in the same register class.
-    if (unsigned(i + RegInc) < Count && !AFI->hasStackHazardSlotIndex()) {
+    if (unsigned(i + RegInc) < Count && !HasCSHazardPadding) {
       MCRegister NextReg = CSI[i + RegInc].getReg();
       bool IsFirst = i == FirstReg;
       switch (RPI.Type) {
@@ -3743,10 +3817,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
     return;
 
+  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+
   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
       MF.getSubtarget().getRegisterInfo());
-  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   unsigned UnspilledCSGPR = AArch64::NoRegister;
   unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
@@ -3867,19 +3942,29 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
 
   // Calculates the callee saved stack size.
   unsigned CSStackSize = 0;
-  unsigned SVECSStackSize = 0;
+  unsigned ZPRCSStackSize = 0;
+  unsigned PPRCSStackSize = 0;
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   for (unsigned Reg : SavedRegs.set_bits()) {
     auto *RC = TRI->getMinimalPhysRegClass(Reg);
     assert(RC && "expected register class!");
     auto SpillSize = TRI->getSpillSize(*RC);
-    if (AArch64::PPRRegClass.contains(Reg) ||
-        AArch64::ZPRRegClass.contains(Reg))
-      SVECSStackSize += SpillSize;
+    bool IsZPR = AArch64::ZPRRegClass.contains(Reg);
+    bool IsPPR = !IsZPR && AArch64::PPRRegClass.contains(Reg);
+    if (IsZPR || (IsPPR && arePPRsSpilledAsZPR(MF)))
+      ZPRCSStackSize += SpillSize;
+    else if (IsPPR)
+      PPRCSStackSize += SpillSize;
     else
       CSStackSize += SpillSize;
   }
 
+  // Determine if a Hazard slot should be used, and increase the CSStackSize by
+  // StackHazardSize if so.
+  determineStackHazardSlot(MF, SavedRegs);
+  if (AFI->hasStackHazardSlotIndex())
+    CSStackSize += getStackHazardSize(MF);
+
   // Increase the callee-saved stack size if the function has streaming mode
   // changes, as we will need to spill the value of the VG register.
   // For locally streaming functions, we spill both the streaming and
@@ -3892,12 +3977,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
       CSStackSize += 8;
   }
 
-  // Determine if a Hazard slot should be used, and increase the CSStackSize by
-  // StackHazardSize if so.
-  determineStackHazardSlot(MF, SavedRegs);
-  if (AFI->hasStackHazardSlotIndex())
-    CSStackSize += getStackHazardSize(MF);
-
   // Save number of saved regs, so we can easily update CSStackSize later.
   unsigned NumSavedRegs = SavedRegs.count();
 
@@ -3917,8 +3996,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
   });
 
   // If any callee-saved registers are used, the frame cannot be eliminated.
+  auto [ZPRLocalStackSize, PPRLocalStackSize] =
+      estimateSVEStackObjectOffsets(MF);
+  int64_t SVELocals = ZPRLocalStackSize + PPRLocalStackSize;
   int64_t SVEStackSize =
-      alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);
+      alignTo(ZPRCSStackSize + PPRCSStackSize + SVELocals, 16);
   bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
 
   // The CSR spill slots have not been allocated yet, so estimateStackSize
@@ -4003,7 +4085,9 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
   // instructions.
   AFI->setCalleeSavedStackSize(AlignedCSStackSize);
   AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
-  AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16));
+
+  AFI->setZPRCalleeSavedStackSize(ZPRCSStackSize);
+  AFI->setPPRCalleeSavedStackSize(alignTo(PPRCSStackSize, 16));
 }
 
 bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
@@ -4158,7 +4242,6 @@ static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI,
       assert((Max == std::numeric_limits<int>::min() ||
               Max + 1 == CS.getFrameIdx()) &&
              "SVE CalleeSaves are not consecutive");
-
       Min = std::min(Min, CS.getFrameIdx());
       Max = std::max(Max, CS.getFrameIdx());
     }
@@ -4171,10 +4254,20 @@ static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI,
 // Fills in the first and last callee-saved frame indices into
 // Min/MaxCSFrameIndex, respectively.
 // Returns the size of the stack.
-static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
-                                              int &MinCSFrameIndex,
-                                              int &MaxCSFrameIndex,
-                                              bool AssignOffsets) {
+static SVEStackSizes
+determineSVEStackObjectOffsets(MachineFunction &MF, bool AssignOffsets,
+                               bool SplitSVEObjects = false) {
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+
+  int64_t ZPRStack = 0;
+  int64_t PPRStack = 0;
+
+  auto [ZPROffset, PPROffset] = [&] {
+    if (SplitSVEObjects)
+      return std::tie(ZPRStack, PPRStack);
+    return std::tie(ZPRStack, ZPRStack);
+  }();
+
 #ifndef NDEBUG
   // First process all fixed stack objects.
   for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
@@ -4183,26 +4276,34 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
            "reference.");
 #endif
 
+  auto OffsetForObject = [&](int FI, int64_t &ZPROffset,
+                             int64_t &PPROffset) -> int64_t & {
+    return MFI.getStackID(FI) == TargetStackID::ScalableVector ? ZPROffset
+                                                               : PPROffset;
+  };
+
   auto Assign = [&MFI](int FI, int64_t Offset) {
     LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n");
     MFI.setObjectOffset(FI, Offset);
   };
 
-  int64_t Offset = 0;
-
   // Then process all callee saved slots.
+  int MinCSFrameIndex, MaxCSFrameIndex;
   if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) {
-    // Assign offsets to the callee save slots.
-    for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) {
-      Offset += MFI.getObjectSize(I);
-      Offset = alignTo(Offset, MFI.getObjectAlign(I));
-      if (AssignOffsets)
-        Assign(I, -Offset);
+    for (int FI = MinCSFrameIndex; FI <= MaxCSFrameIndex; ++FI) {
+      int64_t &Offset = OffsetForObject(FI, ZPROffset, PPROffset);
+      Offset += MFI.getObjectSize(FI);
+      Offset = alignTo(Offset, MFI.getObjectAlign(FI));
+      if (AssignOffsets) {
+        LLVM_DEBUG(dbgs() << "FI: " << FI << ", Offset: " << -Offset << "\n");
+        Assign(FI, -Offset);
+      }
     }
   }
 
-  // Ensure that the Callee-save area is aligned to 16bytes.
-  Offset = alignTo(Offset, Align(16U));
+  // Ensure the CS area is 16-byte aligned.
+  PPROffset = alignTo(PPROffset, Align(16U));
+  ZPROffset = alignTo(ZPROffset, Align(16U));
 
   // Create a buffer of SVE objects to allocate and sort it.
   SmallVector<int, 8> ObjectsToAllocate;
@@ -4212,20 +4313,21 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
   int StackProtectorFI = -1;
   if (MFI.hasStackProtectorIndex()) {
     StackProtectorFI = MFI.getStackProtectorIndex();
-    if (MFI.isScalableStackID(StackProtectorFI))
+    if (MFI.getStackID(StackProtectorFI) == TargetStackID::ScalableVector)
       ObjectsToAllocate.push_back(StackProtectorFI);
   }
-  for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
-    if (!MFI.isScalableStackID(I))
-      continue;
-    if (I == StackProtectorFI)
+
+  for (int FI = 0, E = MFI.getObjectIndexEnd(); FI != E; ++FI) {
+    if (FI == StackProtectorFI || MFI.isDeadObjectIndex(FI))
       continue;
-    if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex)
+    if (MaxCSFrameIndex >= FI && FI >= MinCSFrameIndex)
       continue;
-    if (MFI.isDeadObjectIndex(I))
+
+    if (MFI.getStackID(FI) != TargetStackID::ScalableVector &&
+        MFI.getStackID(FI) != TargetStackID::ScalablePredVector)
       continue;
 
-    ObjectsToAllocate.push_back(I);
+    ObjectsToAllocate.push_back(FI);
   }
 
   // Allocate all SVE locals and spills
@@ -4238,24 +4340,32 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
       report_fatal_error(
           "Alignment of scalable vectors > 16 bytes is not yet supported");
 
+    int64_t &Offset = OffsetForObject(FI, ZPROffset, PPROffset);
     Offset = alignTo(Offset + MFI.getObjectSize(FI), Alignment);
     if (AssignOffsets)
       Assign(FI, -Offset);
   }
 
-  return Offset;
+  PPROffset = alignTo(PPROffset, Align(16U));
+  ZPROffset = alignTo(ZPROffset, Align(16U));
+
+  if (&ZPROffset != &PPROffset) {
+    // SplitSVEObjects (PPRs and ZPRs allocated to separate areas).
+    return SVEStackSizes{ZPROffset, PPROffset};
+  }
+  // When SplitSVEObjects is disabled just attribute all the stack to ZPRs.
+  // Determining the split is not necessary.
+  return SVEStackSizes{ZPROffset, 0};
 }
 
-int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets(
-    MachineFrameInfo &MFI) const {
-  int MinCSFrameIndex, MaxCSFrameIndex;
-  return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, false);
+SVEStackSizes
+AArch64FrameLowering::estimateSVEStackObjectOffsets(MachineFunction &MF) const {
+  return determineSVEStackObjectOffsets(MF, false);
 }
 
-int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
-    MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const {
-  return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex,
-                                        true);
+SVEStackSizes
+AArch64FrameLowering::assignSVEStackObjectOffsets(MachineFunction &MF) const {
+  return determineSVEStackObjectOffsets(MF, true);
 }
 
 /// Attempts to scavenge a register from \p ScavengeableRegs given the used
@@ -4569,12 +4679,9 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
   assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
          "Upwards growing stack unsupported");
 
-  int MinCSFrameIndex, MaxCSFrameIndex;
-  int64_t SVEStackSize =
-      assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);
-
-  AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U));
-  AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex);
+  auto [ZPRStackSize, PPRStackSize] = assignSVEStackObjectOffsets(MF);
+  AFI->setStackSizeZPR(ZPRStackSize);
+  AFI->setStackSizePPR(PPRStackSize);
 
   // If this function isn't doing Win64-style C++ EH, we don't need to do
   // anything.
@@ -5106,8 +5213,7 @@ StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP(
   }
 
   // Go to common code if we cannot provide sp + offset.
-  if (MFI.hasVarSizedObjects() ||
-      MF.getInfo<AArch64FunctionInfo>()->getStackSizeSVE() ||
+  if (MFI.hasVarSizedObjects() || hasSVEStackSize(MF) ||
       MF.getSubtarget().getRegisterInfo()->hasStackRealignment(MF))
     return getFrameIndexReference(MF, FI, FrameReg);
 
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 605d6fd8bdbbb..cd1c7e5ed82e7 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -19,6 +19,11 @@
 
 namespace llvm {
 
+struct SVEStackSizes {
+  int64_t ZPRStackSize{0};
+  int64_t PPRStackSize{0};
+};
+
 class AArch64FrameLowering : public TargetFrameLowering {
 public:
   explicit AArch64FrameLowering()
@@ -145,10 +150,8 @@ class AArch64FrameLowering : public TargetFrameLowering {
   bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
                                       uint64_t StackBumpBytes) const;
 
-  int64_t estimateSVEStackObjectOffsets(MachineFrameInfo &MF) const;
-  int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF,
-                                      int &MinCSFrameIndex,
-                                      int &MaxCSFrameIndex) const;
+  SVEStackSizes estimateSVEStackObjectOffsets(MachineFunction &MF) const;
+  SVEStackSizes assignSVEStackObjectOffsets(MachineFunction &MF) const;
   bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB,
                                                 uint64_t StackBumpBytes) const;
   void emitCalleeSavedGPRLocations(MachineBasicBlock &MBB,
@@ -164,6 +167,7 @@ class AArch64FrameLowering : public TargetFrameLowering {
                           int64_t RealignmentPadding, StackOffset AllocSize,
                           bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
                           StackOffset InitialOffset, bool FollowupAllocs) const;
+
   /// Make a determination whether a Hazard slot is used and create it if
   /// needed.
   void determineStackHazardSlot(MachineFunction &MF,
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 361d5ec3f2b22..50f5c05a252ca 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -74,13 +74,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
   /// Amount of stack frame size, not including callee-saved registers.
   uint64_t LocalStackSize = 0;
 
-  /// The start and end frame indices for the SVE callee saves.
-  int MinSVECSFrameIndex = 0;
-  int MaxSVECSFrameIndex = 0;
-
   /// Amount of stack frame size used for saving callee-saved registers.
   unsigned CalleeSavedStackSize = 0;
-  unsigned SVECalleeSavedStackSize = 0;
+  unsigned ZPRCalleeSavedStackSize = 0;
+  unsigned PPRCalleeSavedStackSize = 0;
   bool HasCalleeSavedStackSize = false;
 
   /// Number of TLS accesses using the special (combinable)
@@ -136,9 +133,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
   /// SVE stack size (for predicates and data vectors) are maintained here
   /// rather than in FrameInfo, as the placement and Stack IDs are target
   /// specific.
-  uint64_t StackSizeSVE = 0;
+  uint64_t StackSizeZPR = 0;
+  uint64_t StackSizePPR = 0;
 
-  /// HasCalculatedStackSizeSVE indicates whether StackSizeSVE is valid.
+  /// HasCalculatedStackSizeSVE indicates whether StackSizeZPR/PPR is valid.
   bool HasCalculatedStackSizeSVE = false;
 
   /// Has a value when it is known whether or not the function uses a
@@ -299,14 +297,20 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
     TailCallReservedStack = bytes;
   }
 
-  bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; }
+  void setStackSizeZPR(uint64_t S) {
+    HasCalculatedStackSizeSVE = true;
+    StackSizeZPR = S;
+  }
 
-  void setStackSizeSVE(uint64_t S) {
+  void setStackSizePPR(uint64_t S) {
     HasCalculatedStackSizeSVE = true;
-    StackSizeSVE = S;
+    StackSizePPR = S;
   }
 
-  uint64_t getStackSizeSVE() const { return StackSizeSVE; }
+  uint64_t getStackSizeZPR() const { return StackSizeZPR; }
+  uint64_t getStackSizePPR() const { return StackSizePPR; }
+
+  bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; }
 
   bool hasStackFrame() const { return HasStackFrame; }
   void setHasStackFrame(bool s) { HasStackFrame = s; }
@@ -398,20 +402,25 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
   }
 
   // Saves the CalleeSavedStackSize for SVE vectors in 'scalable bytes'
-  void setSVECalleeSavedStackSize(unsigned Size) {
-    SVECalleeSavedStackSize = Size;
+  void setZPRCalleeSavedStackSize(unsigned Size) {
+    ZPRCalleeSavedStackSize = Size;
   }
-  unsigned getSVECalleeSavedStackSize() const {
-    return SVECalleeSavedStackSize;
+  unsigned getZPRCalleeSavedStackSize() const {
+    return ZPRCalleeSavedStackSize;
   }
 
-  void setMinMaxSVECSFrameIndex(int Min, int Max) {
-    MinSVECSFrameIndex = Min;
-    MaxSVECSFrameIndex = Max;
+  // Saves the CalleeSavedStackSize for SVE predicate vectors in 'scalable
+  // bytes'
+  void setPPRCalleeSavedStackSize(unsigned Size) {
+    PPRCalleeSavedStackSize = Size;
+  }
+  unsigned getPPRCalleeSavedStackSize() const {
+    return PPRCalleeSavedStackSize;
   }
 
-  int getMinSVECSFrameIndex() const { return MinSVECSFrameIndex; }
-  int getMaxSVECSFrameIndex() const { return MaxSVECSFrameIndex; }
+  unsigned getSVECalleeSavedStackSize() const {
+    return PPRCalleeSavedStackSize + ZPRCalleeSavedStackSize;
+  }
 
   void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamicTLSAccesses; }
   unsigned getNumLocalDynamicTLSAccesses() const {
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index b0c69b8aca806..afdddb2a7151b 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -644,7 +644,8 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
     if (ST.hasSVE() || ST.isStreaming()) {
       // Frames that have variable sized objects and scalable SVE objects,
       // should always use a basepointer.
-      if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeSVE())
+      if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeZPR() ||
+          AFI->getStackSizePPR())
         return true;
     }
 
@@ -784,8 +785,8 @@ AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
   assert((!MF.getSubtarget<AArch64Subtarget>().hasSVE() ||
           AFI->hasCalculatedStackSizeSVE()) &&
          "Expected SVE area to be calculated by this point");
-  return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE() &&
-         !AFI->hasStackHazardSlotIndex();
+  return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeZPR() &&
+         !AFI->getStackSizePPR() && !AFI->hasStackHazardSlotIndex();
 }
 
 bool AArch64RegisterInfo::requiresFrameIndexScavenging(
    
    
More information about the llvm-branch-commits
mailing list