[llvm] [AArch64] Fix SVE callee-save layout for nounwind functions on Windows (PR #156467)
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 2 07:50:26 PDT 2025
https://github.com/sdesmalen-arm created https://github.com/llvm/llvm-project/pull/156467
Without this change, functions with 'nounwind' don't compile (correctly), because the frame-lowering code makes the assumption that CFI is available when the function has SVE callee-saves.
>From b7e820eaa72a94ec496f17758a72da7bfd489b4c Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Tue, 2 Sep 2025 14:17:52 +0000
Subject: [PATCH] [AArch64] Fix SVE callee-save layout for nounwind functions
on Windows
Without this change, functions with 'nounwind' don't compile (correctly),
because the frame-lowering code makes the assumption that CFI is available
when the function has SVE callee-saves.
---
.../Target/AArch64/AArch64FrameLowering.cpp | 53 ++++++++++---------
.../CodeGen/AArch64/framelayout-sve-win.mir | 27 ++++++++++
2 files changed, 54 insertions(+), 26 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 7725fa4f1ccb1..de090f58d0ae1 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -355,6 +355,28 @@ static bool isLikelyToHaveSVEStack(const MachineFunction &MF) {
return false;
}
+static bool isTargetWindows(const MachineFunction &MF) {
+ return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
+}
+
+// Windows unwind can't represent the required stack adjustments if we have
+// both SVE callee-saves and dynamic stack allocations, and the frame
+// pointer is before the SVE spills. The allocation of the frame pointer
+// must be the last instruction in the prologue so the unwinder can restore
+// the stack pointer correctly. (And there isn't any unwind opcode for
+// `addvl sp, x29, -17`.)
+//
+// Because of this, we do spills in the opposite order on Windows: first SVE,
+// then GPRs. The main side-effect of this is that it makes accessing
+// parameters passed on the stack more expensive.
+//
+// We could consider rearranging the spills for simpler cases.
+static bool hasSVECalleeSavesAboveFrameRecord(const MachineFunction &MF) {
+ auto *AFI = MF.getInfo<AArch64FunctionInfo>();
+ return isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize() &&
+ needsWinCFI(MF);
+}
+
/// Returns true if a homogeneous prolog or epilog code can be emitted
/// for the size optimization. If possible, a frame helper call is injected.
/// When Exit block is given, this check is for epilog.
@@ -1694,10 +1716,6 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
}
}
-static bool isTargetWindows(const MachineFunction &MF) {
- return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
-}
-
static unsigned getStackHazardSize(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
}
@@ -2052,21 +2070,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
- // Windows unwind can't represent the required stack adjustments if we have
- // both SVE callee-saves and dynamic stack allocations, and the frame
- // pointer is before the SVE spills. The allocation of the frame pointer
- // must be the last instruction in the prologue so the unwinder can restore
- // the stack pointer correctly. (And there isn't any unwind opcode for
- // `addvl sp, x29, -17`.)
- //
- // Because of this, we do spills in the opposite order on Windows: first SVE,
- // then GPRs. The main side-effect of this is that it makes accessing
- // parameters passed on the stack more expensive.
- //
- // We could consider rearranging the spills for simpler cases.
- bool FPAfterSVECalleeSaves =
- Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();
-
+ bool FPAfterSVECalleeSaves = hasSVECalleeSavesAboveFrameRecord(MF);
if (FPAfterSVECalleeSaves && AFI->hasStackHazardSlotIndex())
reportFatalUsageError("SME hazard padding is not supported on Windows");
@@ -2566,8 +2570,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
return;
}
- bool FPAfterSVECalleeSaves =
- Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();
+ bool FPAfterSVECalleeSaves = hasSVECalleeSavesAboveFrameRecord(MF);
bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
// Assume we can't combine the last pop with the sp restore.
@@ -2895,8 +2898,7 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea());
const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
- bool FPAfterSVECalleeSaves =
- isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize();
+ bool FPAfterSVECalleeSaves = hasSVECalleeSavesAboveFrameRecord(MF);
if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
if (FPAfterSVECalleeSaves &&
-ObjectOffset <= (int64_t)AFI->getSVECalleeSavedStackSize())
@@ -3053,8 +3055,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
"In the presence of dynamic stack pointer realignment, "
"non-argument/CSR objects cannot be accessed through the frame pointer");
- bool FPAfterSVECalleeSaves =
- isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize();
+ bool FPAfterSVECalleeSaves = hasSVECalleeSavesAboveFrameRecord(MF);
if (isSVE) {
StackOffset FPOffset =
@@ -3279,7 +3280,7 @@ static void computeCalleeSaveRegisterPairs(
RegInc = -1;
FirstReg = Count - 1;
}
- bool FPAfterSVECalleeSaves = IsWindows && AFI->getSVECalleeSavedStackSize();
+ bool FPAfterSVECalleeSaves = hasSVECalleeSavesAboveFrameRecord(MF);
int ScalableByteOffset =
FPAfterSVECalleeSaves ? 0 : AFI->getSVECalleeSavedStackSize();
bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace();
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir
index 5933c5daa67ed..81fbf26065f89 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir
@@ -17,6 +17,7 @@
define aarch64_sve_vector_pcs void @save_restore_sve() uwtable { entry: unreachable }
define aarch64_sve_vector_pcs void @save_restore_sve_realign() uwtable { entry: unreachable }
define aarch64_sve_vector_pcs void @frame_layout() uwtable { entry: unreachable }
+ define aarch64_sve_vector_pcs void @test_nounwind_layout() nounwind { entry: unreachable }
...
---
name: test_allocate_sve
@@ -892,3 +893,29 @@ body: |
RET_ReallyLR
...
+---
+name: test_nounwind_layout
+stack:
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: test_nounwind_layout
+ ; CHECK: fixedStack:
+ ; CHECK: liveins: $x20, $lr, $z8, $p8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: early-clobber $sp = frame-setup STPXpre killed $lr, killed $x20, $sp, -2 :: (store (s64) into %stack.3), (store (s64) into %stack.2)
+ ; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg
+ ; CHECK-NEXT: frame-setup STR_ZXI killed $z8, $sp, 1 :: (store (s128) into %stack.1)
+ ; CHECK-NEXT: frame-setup STR_PXI killed $p8, $sp, 15 :: (store (s16) into %stack.0)
+ ; CHECK-NEXT: $x20 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p8 = IMPLICIT_DEF
+ ; CHECK-NEXT: $z8 = IMPLICIT_DEF
+ ; CHECK-NEXT: $p8 = frame-destroy LDR_PXI $sp, 15 :: (load (s16) from %stack.0)
+ ; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.1)
+ ; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg
+ ; CHECK-NEXT: early-clobber $sp, $lr, $x20 = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.3), (load (s64) from %stack.2)
+ ; CHECK-NEXT: RET_ReallyLR
+ $x20 = IMPLICIT_DEF
+ $p8 = IMPLICIT_DEF
+ $z8 = IMPLICIT_DEF
+ RET_ReallyLR
+...
More information about the llvm-commits
mailing list