[llvm] [AArch64] Fix SVE callee-save layout for nounwind functions on Windows (PR #156467)

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 2 07:50:26 PDT 2025


https://github.com/sdesmalen-arm created https://github.com/llvm/llvm-project/pull/156467

Without this change, functions with 'nounwind' don't compile (correctly), because the frame-lowering code makes the assumption that CFI is available when the function has SVE callee-saves.

>From b7e820eaa72a94ec496f17758a72da7bfd489b4c Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Tue, 2 Sep 2025 14:17:52 +0000
Subject: [PATCH] [AArch64] Fix SVE callee-save layout for nounwind functions
 on Windows

Without this change, functions with 'nounwind' don't compile (correctly),
because the frame-lowering code makes the assumption that CFI is available
when the function has SVE callee-saves.
---
 .../Target/AArch64/AArch64FrameLowering.cpp   | 53 ++++++++++---------
 .../CodeGen/AArch64/framelayout-sve-win.mir   | 27 ++++++++++
 2 files changed, 54 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 7725fa4f1ccb1..de090f58d0ae1 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -355,6 +355,28 @@ static bool isLikelyToHaveSVEStack(const MachineFunction &MF) {
   return false;
 }
 
+static bool isTargetWindows(const MachineFunction &MF) {
+  return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
+}
+
+// Windows unwind can't represent the required stack adjustments if we have
+// both SVE callee-saves and dynamic stack allocations, and the frame
+// pointer is before the SVE spills.  The allocation of the frame pointer
+// must be the last instruction in the prologue so the unwinder can restore
+// the stack pointer correctly. (And there isn't any unwind opcode for
+// `addvl sp, x29, -17`.)
+//
+// Because of this, we do spills in the opposite order on Windows: first SVE,
+// then GPRs. The main side-effect of this is that it makes accessing
+// parameters passed on the stack more expensive.
+//
+// We could consider rearranging the spills for simpler cases.
+static bool hasSVECalleeSavesAboveFrameRecord(const MachineFunction &MF) {
+  auto *AFI = MF.getInfo<AArch64FunctionInfo>();
+  return isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize() &&
+         needsWinCFI(MF);
+}
+
 /// Returns true if a homogeneous prolog or epilog code can be emitted
 /// for the size optimization. If possible, a frame helper call is injected.
 /// When Exit block is given, this check is for epilog.
@@ -1694,10 +1716,6 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
   }
 }
 
-static bool isTargetWindows(const MachineFunction &MF) {
-  return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
-}
-
 static unsigned getStackHazardSize(const MachineFunction &MF) {
   return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
 }
@@ -2052,21 +2070,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
   unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
 
-  // Windows unwind can't represent the required stack adjustments if we have
-  // both SVE callee-saves and dynamic stack allocations, and the frame
-  // pointer is before the SVE spills.  The allocation of the frame pointer
-  // must be the last instruction in the prologue so the unwinder can restore
-  // the stack pointer correctly. (And there isn't any unwind opcode for
-  // `addvl sp, x29, -17`.)
-  //
-  // Because of this, we do spills in the opposite order on Windows: first SVE,
-  // then GPRs. The main side-effect of this is that it makes accessing
-  // parameters passed on the stack more expensive.
-  //
-  // We could consider rearranging the spills for simpler cases.
-  bool FPAfterSVECalleeSaves =
-      Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();
-
+  bool FPAfterSVECalleeSaves = hasSVECalleeSavesAboveFrameRecord(MF);
   if (FPAfterSVECalleeSaves && AFI->hasStackHazardSlotIndex())
     reportFatalUsageError("SME hazard padding is not supported on Windows");
 
@@ -2566,8 +2570,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
     return;
   }
 
-  bool FPAfterSVECalleeSaves =
-      Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();
+  bool FPAfterSVECalleeSaves = hasSVECalleeSavesAboveFrameRecord(MF);
 
   bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
   // Assume we can't combine the last pop with the sp restore.
@@ -2895,8 +2898,7 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
     return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea());
 
   const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
-  bool FPAfterSVECalleeSaves =
-      isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize();
+  bool FPAfterSVECalleeSaves = hasSVECalleeSavesAboveFrameRecord(MF);
   if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
     if (FPAfterSVECalleeSaves &&
         -ObjectOffset <= (int64_t)AFI->getSVECalleeSavedStackSize())
@@ -3053,8 +3055,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
       "In the presence of dynamic stack pointer realignment, "
       "non-argument/CSR objects cannot be accessed through the frame pointer");
 
-  bool FPAfterSVECalleeSaves =
-      isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize();
+  bool FPAfterSVECalleeSaves = hasSVECalleeSavesAboveFrameRecord(MF);
 
   if (isSVE) {
     StackOffset FPOffset =
@@ -3279,7 +3280,7 @@ static void computeCalleeSaveRegisterPairs(
     RegInc = -1;
     FirstReg = Count - 1;
   }
-  bool FPAfterSVECalleeSaves = IsWindows && AFI->getSVECalleeSavedStackSize();
+  bool FPAfterSVECalleeSaves = hasSVECalleeSavesAboveFrameRecord(MF);
   int ScalableByteOffset =
       FPAfterSVECalleeSaves ? 0 : AFI->getSVECalleeSavedStackSize();
   bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace();
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir
index 5933c5daa67ed..81fbf26065f89 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir
@@ -17,6 +17,7 @@
   define aarch64_sve_vector_pcs void @save_restore_sve() uwtable { entry: unreachable }
   define aarch64_sve_vector_pcs void @save_restore_sve_realign() uwtable { entry: unreachable }
   define aarch64_sve_vector_pcs void @frame_layout() uwtable { entry: unreachable }
+  define aarch64_sve_vector_pcs void @test_nounwind_layout() nounwind { entry: unreachable }
 ...
 ---
 name:            test_allocate_sve
@@ -892,3 +893,29 @@ body:             |
 
     RET_ReallyLR
 ...
+---
+name:            test_nounwind_layout
+stack:
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: test_nounwind_layout
+    ; CHECK: fixedStack:
+    ; CHECK: liveins: $x20, $lr, $z8, $p8
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: early-clobber $sp = frame-setup STPXpre killed $lr, killed $x20, $sp, -2 :: (store (s64) into %stack.3), (store (s64) into %stack.2)
+    ; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg
+    ; CHECK-NEXT: frame-setup STR_ZXI killed $z8, $sp, 1 :: (store (s128) into %stack.1)
+    ; CHECK-NEXT: frame-setup STR_PXI killed $p8, $sp, 15 :: (store (s16) into %stack.0)
+    ; CHECK-NEXT: $x20 = IMPLICIT_DEF
+    ; CHECK-NEXT: $p8 = IMPLICIT_DEF
+    ; CHECK-NEXT: $z8 = IMPLICIT_DEF
+    ; CHECK-NEXT: $p8 = frame-destroy LDR_PXI $sp, 15 :: (load (s16) from %stack.0)
+    ; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.1)
+    ; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg
+    ; CHECK-NEXT: early-clobber $sp, $lr, $x20 = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.3), (load (s64) from %stack.2)
+    ; CHECK-NEXT: RET_ReallyLR
+    $x20 = IMPLICIT_DEF
+    $p8 = IMPLICIT_DEF
+    $z8 = IMPLICIT_DEF
+    RET_ReallyLR
+...



More information about the llvm-commits mailing list