[llvm] AArch64: align pair-wise spills on WoS to 16-byte (PR #166902)
Saleem Abdulrasool via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 6 23:13:32 PST 2025
https://github.com/compnerd created https://github.com/llvm/llvm-project/pull/166902
Adjust the frame setup code for Windows ARM64 to attempt to align pair-wise spills to 16-byte boundaries. This enables us to properly emit the spills for custom clang calling convensions such as preserve most which spills r9-r15 which are normally nonvolatile registers. Even when using the ARM64EC opcodes for the unwinding, we cannot represent the spill if it is unaligned.
>From 205e76fdbca769e00c438ee0108c9bf97b234940 Mon Sep 17 00:00:00 2001
From: Saleem Abdulrasool <compnerd at compnerd.org>
Date: Thu, 6 Nov 2025 22:29:44 -0800
Subject: [PATCH] AArch64: align pair-wise spills on WoS to 16-byte
Adjust the frame setup code for Windows ARM64 to attempt to align
pair-wise spills to 16-byte boundaries. This enables us to properly emit
the spills for custom clang calling convensions such as preserve most
which spills r9-r15 which are normally nonvolatile registers. Even when
using the ARM64EC opcodes for the unwinding, we cannot represent the
spill if it is unaligned.
---
.../Target/AArch64/AArch64FrameLowering.cpp | 34 +++++----
.../CodeGen/AArch64/preserve_nonecc_call.ll | 72 ++++++++++---------
.../CodeGen/AArch64/seh-extended-spills.ll | 37 +++++-----
.../CodeGen/AArch64/stack-hazard-windows.ll | 12 ++--
llvm/test/CodeGen/AArch64/wineh-frame2.mir | 25 +++----
5 files changed, 103 insertions(+), 77 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 70c5c29149288..6082e20b8327f 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1554,8 +1554,9 @@ static bool produceCompactUnwindFrame(const AArch64FrameLowering &AFL,
!AFL.requiresSaveVG(MF) && !AFI->isSVECC();
}
-static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
- bool NeedsWinCFI, bool IsFirst,
+static bool invalidateWindowsRegisterPairing(unsigned Spilled, unsigned Reg1,
+ unsigned Reg2, bool NeedsWinCFI,
+ bool IsFirst,
const TargetRegisterInfo *TRI) {
// If we are generating register pairs for a Windows function that requires
// EH support, then pair consecutive registers only. There are no unwind
@@ -1568,8 +1569,14 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
return true;
if (!NeedsWinCFI)
return false;
+ // ARM64EC introduced `save_any_regp` which expects 16-byte alignment.
+ // Accomodate that by ensuring that we re-align to 16-bytes when doing paired
+ // spills. Carve out an exception for {FP,LR} pairs which we perform without
+ // 16-byte alignment.
if (TRI->getEncodingValue(Reg2) == TRI->getEncodingValue(Reg1) + 1)
- return false;
+ return ((Reg1 == AArch64::FP && Reg2 == AArch64::LR) || (Spilled % 2) == 0)
+ ? false
+ : true;
// If pairing a GPR with LR, the pair can be described by the save_lrpair
// opcode. If this is the first register pair, it would end up with a
// predecrement, but there's no save_lrpair_x opcode, so we can only do this
@@ -1585,13 +1592,14 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
/// WindowsCFI requires that only consecutive registers can be paired.
/// LR and FP need to be allocated together when the frame needs to save
/// the frame-record. This means any other register pairing with LR is invalid.
-static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
- bool UsesWinAAPCS, bool NeedsWinCFI,
- bool NeedsFrameRecord, bool IsFirst,
+static bool invalidateRegisterPairing(unsigned Spilled, unsigned Reg1,
+ unsigned Reg2, bool UsesWinAAPCS,
+ bool NeedsWinCFI, bool NeedsFrameRecord,
+ bool IsFirst,
const TargetRegisterInfo *TRI) {
if (UsesWinAAPCS)
- return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst,
- TRI);
+ return invalidateWindowsRegisterPairing(Spilled, Reg1, Reg2, NeedsWinCFI,
+ IsFirst, TRI);
// If we need to store the frame record, don't pair any register
// with LR other than FP.
@@ -1752,15 +1760,15 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
switch (RPI.Type) {
case RegPairInfo::GPR:
if (AArch64::GPR64RegClass.contains(NextReg) &&
- !invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows,
- NeedsWinCFI, NeedsFrameRecord, IsFirst,
- TRI))
+ !invalidateRegisterPairing(i - FirstReg, RPI.Reg1, NextReg,
+ IsWindows, NeedsWinCFI, NeedsFrameRecord,
+ IsFirst, TRI))
RPI.Reg2 = NextReg;
break;
case RegPairInfo::FPR64:
if (AArch64::FPR64RegClass.contains(NextReg) &&
- !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
- IsFirst, TRI))
+ !invalidateWindowsRegisterPairing(i - FirstReg, RPI.Reg1, NextReg,
+ NeedsWinCFI, IsFirst, TRI))
RPI.Reg2 = NextReg;
break;
case RegPairInfo::FPR128:
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll
index 9b9717c19321e..575c93d62bd04 100644
--- a/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll
@@ -120,26 +120,30 @@ define void @caller1(ptr %a) {
; WIN-NEXT: .seh_save_regp x27, 64
; WIN-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
; WIN-NEXT: .seh_save_reg x30, 80
-; WIN-NEXT: stp d8, d9, [sp, #88] // 16-byte Folded Spill
-; WIN-NEXT: .seh_save_fregp d8, 88
-; WIN-NEXT: stp d10, d11, [sp, #104] // 16-byte Folded Spill
-; WIN-NEXT: .seh_save_fregp d10, 104
-; WIN-NEXT: stp d12, d13, [sp, #120] // 16-byte Folded Spill
-; WIN-NEXT: .seh_save_fregp d12, 120
-; WIN-NEXT: stp d14, d15, [sp, #136] // 16-byte Folded Spill
-; WIN-NEXT: .seh_save_fregp d14, 136
+; WIN-NEXT: str d8, [sp, #88] // 8-byte Folded Spill
+; WIN-NEXT: .seh_save_freg d8, 88
+; WIN-NEXT: stp d9, d10, [sp, #96] // 16-byte Folded Spill
+; WIN-NEXT: .seh_save_fregp d9, 96
+; WIN-NEXT: stp d11, d12, [sp, #112] // 16-byte Folded Spill
+; WIN-NEXT: .seh_save_fregp d11, 112
+; WIN-NEXT: stp d13, d14, [sp, #128] // 16-byte Folded Spill
+; WIN-NEXT: .seh_save_fregp d13, 128
+; WIN-NEXT: str d15, [sp, #144] // 8-byte Folded Spill
+; WIN-NEXT: .seh_save_freg d15, 144
; WIN-NEXT: .seh_endprologue
; WIN-NEXT: mov x20, x0
; WIN-NEXT: bl callee
; WIN-NEXT: .seh_startepilogue
-; WIN-NEXT: ldp d14, d15, [sp, #136] // 16-byte Folded Reload
-; WIN-NEXT: .seh_save_fregp d14, 136
-; WIN-NEXT: ldp d12, d13, [sp, #120] // 16-byte Folded Reload
-; WIN-NEXT: .seh_save_fregp d12, 120
-; WIN-NEXT: ldp d10, d11, [sp, #104] // 16-byte Folded Reload
-; WIN-NEXT: .seh_save_fregp d10, 104
-; WIN-NEXT: ldp d8, d9, [sp, #88] // 16-byte Folded Reload
-; WIN-NEXT: .seh_save_fregp d8, 88
+; WIN-NEXT: ldr d15, [sp, #144] // 8-byte Folded Reload
+; WIN-NEXT: .seh_save_freg d15, 144
+; WIN-NEXT: ldp d13, d14, [sp, #128] // 16-byte Folded Reload
+; WIN-NEXT: .seh_save_fregp d13, 128
+; WIN-NEXT: ldp d11, d12, [sp, #112] // 16-byte Folded Reload
+; WIN-NEXT: .seh_save_fregp d11, 112
+; WIN-NEXT: ldp d9, d10, [sp, #96] // 16-byte Folded Reload
+; WIN-NEXT: .seh_save_fregp d9, 96
+; WIN-NEXT: ldr d8, [sp, #88] // 8-byte Folded Reload
+; WIN-NEXT: .seh_save_freg d8, 88
; WIN-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; WIN-NEXT: .seh_save_reg x30, 80
; WIN-NEXT: ldp x27, x28, [sp, #64] // 16-byte Folded Reload
@@ -456,14 +460,16 @@ define i64 @caller3() {
; WIN-NEXT: .seh_save_regp x27, 80
; WIN-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
; WIN-NEXT: .seh_save_reg x30, 96
-; WIN-NEXT: stp d8, d9, [sp, #104] // 16-byte Folded Spill
-; WIN-NEXT: .seh_save_fregp d8, 104
-; WIN-NEXT: stp d10, d11, [sp, #120] // 16-byte Folded Spill
-; WIN-NEXT: .seh_save_fregp d10, 120
-; WIN-NEXT: stp d12, d13, [sp, #136] // 16-byte Folded Spill
-; WIN-NEXT: .seh_save_fregp d12, 136
-; WIN-NEXT: stp d14, d15, [sp, #152] // 16-byte Folded Spill
-; WIN-NEXT: .seh_save_fregp d14, 152
+; WIN-NEXT: str d8, [sp, #104] // 8-byte Folded Spill
+; WIN-NEXT: .seh_save_freg d8, 104
+; WIN-NEXT: stp d9, d10, [sp, #112] // 16-byte Folded Spill
+; WIN-NEXT: .seh_save_fregp d9, 112
+; WIN-NEXT: stp d11, d12, [sp, #128] // 16-byte Folded Spill
+; WIN-NEXT: .seh_save_fregp d11, 128
+; WIN-NEXT: stp d13, d14, [sp, #144] // 16-byte Folded Spill
+; WIN-NEXT: .seh_save_fregp d13, 144
+; WIN-NEXT: str d15, [sp, #160] // 8-byte Folded Spill
+; WIN-NEXT: .seh_save_freg d15, 160
; WIN-NEXT: .seh_endprologue
; WIN-NEXT: mov w8, #24 // =0x18
; WIN-NEXT: mov w20, #1 // =0x1
@@ -492,14 +498,16 @@ define i64 @caller3() {
; WIN-NEXT: str x8, [sp]
; WIN-NEXT: bl callee_with_many_param
; WIN-NEXT: .seh_startepilogue
-; WIN-NEXT: ldp d14, d15, [sp, #152] // 16-byte Folded Reload
-; WIN-NEXT: .seh_save_fregp d14, 152
-; WIN-NEXT: ldp d12, d13, [sp, #136] // 16-byte Folded Reload
-; WIN-NEXT: .seh_save_fregp d12, 136
-; WIN-NEXT: ldp d10, d11, [sp, #120] // 16-byte Folded Reload
-; WIN-NEXT: .seh_save_fregp d10, 120
-; WIN-NEXT: ldp d8, d9, [sp, #104] // 16-byte Folded Reload
-; WIN-NEXT: .seh_save_fregp d8, 104
+; WIN-NEXT: ldr d15, [sp, #160] // 8-byte Folded Reload
+; WIN-NEXT: .seh_save_freg d15, 160
+; WIN-NEXT: ldp d13, d14, [sp, #144] // 16-byte Folded Reload
+; WIN-NEXT: .seh_save_fregp d13, 144
+; WIN-NEXT: ldp d11, d12, [sp, #128] // 16-byte Folded Reload
+; WIN-NEXT: .seh_save_fregp d11, 128
+; WIN-NEXT: ldp d9, d10, [sp, #112] // 16-byte Folded Reload
+; WIN-NEXT: .seh_save_fregp d9, 112
+; WIN-NEXT: ldr d8, [sp, #104] // 8-byte Folded Reload
+; WIN-NEXT: .seh_save_freg d8, 104
; WIN-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
; WIN-NEXT: .seh_save_reg x30, 96
; WIN-NEXT: ldp x27, x28, [sp, #80] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/seh-extended-spills.ll b/llvm/test/CodeGen/AArch64/seh-extended-spills.ll
index ecc22703ef584..e0c2a2e042077 100644
--- a/llvm/test/CodeGen/AArch64/seh-extended-spills.ll
+++ b/llvm/test/CodeGen/AArch64/seh-extended-spills.ll
@@ -12,23 +12,28 @@ entry:
attributes #0 = { nounwind uwtable(sync) }
-; CHECK: stp x9, x10, [sp, #[[OFFSET_0:[0-9]+]]]
-; CHECK-NEXT: .seh_save_any_reg_p x9, [[OFFSET_0]]
-; CHECK: stp x11, x12, [sp, #[[OFFSET_1:[0-9]+]]]
-; CHECK-NEXT: .seh_save_any_reg_p x11, [[OFFSET_1]]
-; CHECK: stp x13, x14, [sp, #[[OFFSET_2:[0-9]+]]]
-; CHECK-NEXT: .seh_save_any_reg_p x13, [[OFFSET_2]]
-; CHECK: str x15, [sp, #[[OFFSET_3:[0-9]+]]]
-; CHECK-NEXT: .seh_save_any_reg x15, [[OFFSET_3]]
+; CHECK: str x30, [sp, #[[OFFSET_0:[0-9]+]]]
+; CHECK-NEXT: .seh_save_reg x30, [[OFFSET_0]]
+; CHECK: str x9, [sp, #[[OFFSET_1:[0-9]+]]]
+; CHECK-NEXT: .seh_save_any_reg x9, [[OFFSET_1]]
+; CHECK: stp x10, x11, [sp, #[[OFFSET_2:[0-9]+]]]
+; CHECK-NEXT: .seh_save_any_reg_p x10, [[OFFSET_2]]
+; CHECK: stp x12, x13, [sp, #[[OFFSET_3:[0-9]+]]]
+; CHECK-NEXT: .seh_save_any_reg_p x12, [[OFFSET_3]]
+; CHECK: stp x14, x15, [sp, #[[OFFSET_4:[0-9]+]]]
+; CHECK-NEXT: .seh_save_any_reg_p x14, [[OFFSET_4]]
; CHECK: .seh_endprologue
; CHECK: .seh_startepilogue
-; CHECK: ldr x15, [sp, #[[OFFSET_3]]]
-; CHECK-NEXT: .seh_save_any_reg x15, [[OFFSET_3]]
-; CHECK: ldp x13, x14, [sp, #[[OFFSET_2]]]
-; CHECK-NEXT: .seh_save_any_reg_p x13, [[OFFSET_2]]
-; CHECK: ldp x11, x12, [sp, #[[OFFSET_1]]]
-; CHECK-NEXT: .seh_save_any_reg_p x11, [[OFFSET_1]]
-; CHECK: ldp x9, x10, [sp, #[[OFFSET_0]]]
-; CHECK-NEXT: .seh_save_any_reg_p x9, [[OFFSET_0]]
+; CHECK: ldp x14, x15, [sp, #[[OFFSET_4]]]
+; CHECK-NEXT: .seh_save_any_reg_p x14, [[OFFSET_4]]
+; CHECK: ldp x12, x13, [sp, #[[OFFSET_3]]]
+; CHECK-NEXT: .seh_save_any_reg_p x12, [[OFFSET_3]]
+; CHECK: ldp x10, x11, [sp, #[[OFFSET_2]]]
+; CHECK-NEXT: .seh_save_any_reg_p x10, [[OFFSET_2]]
+; CHECK: ldr x9, [sp, #[[OFFSET_1]]]
+; CHECK-NEXT: .seh_save_any_reg x9, [[OFFSET_1]]
+; CHECK: ldr x30, [sp, #[[OFFSET_0]]]
+; CHECK-NEXT: .seh_save_reg x30, [[OFFSET_0]]
+
; CHECK: .seh_endepilogue
diff --git a/llvm/test/CodeGen/AArch64/stack-hazard-windows.ll b/llvm/test/CodeGen/AArch64/stack-hazard-windows.ll
index 927d8b68c46be..f519cabc13157 100644
--- a/llvm/test/CodeGen/AArch64/stack-hazard-windows.ll
+++ b/llvm/test/CodeGen/AArch64/stack-hazard-windows.ll
@@ -11,8 +11,10 @@ define i32 @fpr_csr_stackobj(double %x) "aarch64_pstate_sm_compatible" "frame-po
; CHECK0-NEXT: .seh_save_reg_x x23, 48
; CHECK0-NEXT: stp x29, x30, [sp, #8] // 16-byte Folded Spill
; CHECK0-NEXT: .seh_save_fplr 8
-; CHECK0-NEXT: stp d9, d10, [sp, #24] // 16-byte Folded Spill
-; CHECK0-NEXT: .seh_save_fregp d9, 24
+; CHECK0-NEXT: str d9, [sp, #24] // 8-byte Folded Spill
+; CHECK0-NEXT: .seh_save_freg d9, 24
+; CHECK0-NEXT: str d10, [sp, #32] // 8-byte Folded Spill
+; CHECK0-NEXT: .seh_save_freg d10, 32
; CHECK0-NEXT: add x29, sp, #8
; CHECK0-NEXT: .seh_add_fp 8
; CHECK0-NEXT: .seh_endprologue
@@ -21,8 +23,10 @@ define i32 @fpr_csr_stackobj(double %x) "aarch64_pstate_sm_compatible" "frame-po
; CHECK0-NEXT: //NO_APP
; CHECK0-NEXT: str d0, [x29, #32]
; CHECK0-NEXT: .seh_startepilogue
-; CHECK0-NEXT: ldp d9, d10, [sp, #24] // 16-byte Folded Reload
-; CHECK0-NEXT: .seh_save_fregp d9, 24
+; CHECK0-NEXT: ldr d10, [sp, #32] // 8-byte Folded Reload
+; CHECK0-NEXT: .seh_save_freg d10, 32
+; CHECK0-NEXT: ldr d9, [sp, #24] // 8-byte Folded Reload
+; CHECK0-NEXT: .seh_save_freg d9, 24
; CHECK0-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload
; CHECK0-NEXT: .seh_save_fplr 8
; CHECK0-NEXT: ldr x23, [sp], #48 // 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/wineh-frame2.mir b/llvm/test/CodeGen/AArch64/wineh-frame2.mir
index 1c8cb1e79a998..385d846bd2bfa 100644
--- a/llvm/test/CodeGen/AArch64/wineh-frame2.mir
+++ b/llvm/test/CodeGen/AArch64/wineh-frame2.mir
@@ -4,20 +4,21 @@
# CHECK: early-clobber $sp = frame-setup STRXpre killed $x19, $sp, -48
# CHECK-NEXT: frame-setup SEH_SaveReg_X 19, -48
-# CHECK-NEXT: frame-setup STPDi killed $d8, killed $d9, $sp, 1
-# CHECK-NEXT: frame-setup SEH_SaveFRegP 8, 9, 8
-# CHECK-NEXT: frame-setup STPDi killed $d10, killed $d11, $sp, 3
-# CHECK-NEXT: frame-setup SEH_SaveFRegP 10, 11, 24
-# CHECK-NEXT: frame-setup STRDui killed $d12, $sp, 5
-# CHECK-NEXT: frame-setup SEH_SaveFReg 12, 40
+# CHECK-NEXT: frame-setup STRDui killed $d8, $sp, 1
+# CHECK-NEXT: frame-setup SEH_SaveFReg 8, 8
+# CHECK-NEXT: frame-setup STPDi killed $d9, killed $d10, $sp, 2
+# CHECK-NEXT: frame-setup SEH_SaveFRegP 9, 10, 16
+# CHECK-NEXT: frame-setup STPDi killed $d11, killed $d12, $sp, 4
+# CHECK-NEXT: frame-setup SEH_SaveFRegP 11, 12, 32
# CHECK-NEXT: frame-setup SEH_PrologEnd
+
# CHECK: frame-destroy SEH_EpilogStart
-# CHECK-NEXT: $d12 = frame-destroy LDRDui $sp, 5
-# CHECK-NEXT: frame-destroy SEH_SaveFReg 12, 40
-# CHECK-NEXT: $d10, $d11 = frame-destroy LDPDi $sp, 3
-# CHECK-NEXT: frame-destroy SEH_SaveFRegP 10, 11, 24
-# CHECK-NEXT: $d8, $d9 = frame-destroy LDPDi $sp, 1
-# CHECK-NEXT: frame-destroy SEH_SaveFRegP 8, 9, 8
+# CHECK-NEXT: $d11, $d12 = frame-destroy LDPDi $sp, 4
+# CHECK-NEXT: frame-destroy SEH_SaveFRegP 11, 12, 32
+# CHECK-NEXT: $d9, $d10 = frame-destroy LDPDi $sp, 2
+# CHECK-NEXT: frame-destroy SEH_SaveFRegP 9, 10, 16
+# CHECK-NEXT: $d8 = frame-destroy LDRDui $sp, 1
+# CHECK-NEXT: frame-destroy SEH_SaveFReg 8, 8
# CHECK-NEXT: early-clobber $sp, $x19 = frame-destroy LDRXpost $sp, 48
# CHECK-NEXT: frame-destroy SEH_SaveReg_X 19, -48
# CHECK-NEXT: frame-destroy SEH_EpilogEnd
More information about the llvm-commits
mailing list