[llvm] [AArch64] Ensure FPR128 callee-save stack offsets are aligned (PR #184314)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 6 07:36:30 PST 2026
https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/184314
>From 516dd79311c7bcabcae9eec1c1ca63615185a204 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 2 Mar 2026 17:26:13 +0000
Subject: [PATCH 1/2] [AArch64] Ensure FPR128 callee-save stack offsets are
aligned
I believe this was mostly benign as when dividing by the scale the
offset would be truncated. However, it would result in failures for
builds with `-DLLVM_ENABLE_ASSERTIONS=On`.
Fixes: #183708
---
.../Target/AArch64/AArch64FrameLowering.cpp | 20 ++++++--
.../CodeGen/AArch64/framelayout-fp-csr.ll | 47 ++++++++++++++-----
.../AArch64/framelayout-fpr128-spill.mir | 26 ++++++++++
3 files changed, 77 insertions(+), 16 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/framelayout-fpr128-spill.mir
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index e524c98984ee7..6790b80086136 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1730,6 +1730,12 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
Register LastReg = 0;
bool HasCSHazardPadding = AFI->hasStackHazardSlotIndex() && !SplitPPRs;
+ auto AlignOffset = [StackFillDir](int Offset, int Align) {
+ if (StackFillDir < 0)
+ return alignDown(Offset, Align);
+ return alignTo(Offset, Align);
+ };
+
// When iterating backwards, the loop condition relies on unsigned wraparound.
for (unsigned i = FirstReg; i < Count; i += RegInc) {
RegPairInfo RPI;
@@ -1844,11 +1850,15 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
RPI.isPaired()) // RPI.FrameIdx must be the lower index of the pair
RPI.FrameIdx = CSI[i + RegInc].getFrameIdx();
- // Realign the scalable offset if necessary. This is relevant when
- // spilling predicates on Windows.
- if (RPI.isScalable() && ScalableByteOffset % Scale != 0) {
- ScalableByteOffset = alignTo(ScalableByteOffset, Scale);
- }
+ // Realign the scalable offset if necessary. This is relevant when spilling
+ // predicates on Windows.
+ if (RPI.isScalable() && ScalableByteOffset % Scale != 0)
+ ScalableByteOffset = AlignOffset(ScalableByteOffset, Scale);
+
+ // Realign the fixed offset if necessary. This is relevant when spilling Q
+ // registers after spilling an odd amount of X registers.
+ if (!RPI.isScalable() && ByteOffset % Scale != 0)
+ ByteOffset = AlignOffset(ByteOffset, Scale);
int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
assert(OffsetPre % Scale == 0);
diff --git a/llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll b/llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll
index 3b13dee29f069..49127557a1293 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll
+++ b/llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra --frame-pointer=all < %s | FileCheck %s
; The purpose of this test is to verify that frame pointer (x29)
@@ -5,18 +6,42 @@
; point registers. The frame pointer should point to the frame
; record, which is located 16 bytes above the end of the CSR
; space when a single FP CSR is in use.
-define void @test1(i32) #26 {
+define void @test1(i32) nounwind {
+; CHECK-LABEL: test1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #16
+; CHECK-NEXT: //APP
+; CHECK-NEXT: nop
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: ret
entry:
- call void asm sideeffect "nop", "~{d8}"() #26
+ call void asm sideeffect "nop", "~{d8}"()
ret void
}
-; CHECK-LABEL: test1:
-; CHECK: str d8, [sp, #-32]!
-; CHECK-NEXT: stp x29, x30, [sp, #16]
-; CHECK-NEXT: add x29, sp, #16
-; CHECK: nop
-; CHECK: ldp x29, x30, [sp, #16]
-; CHECK-NEXT: ldr d8, [sp], #32
-; CHECK-NEXT: ret
-attributes #26 = { nounwind }
+; The purpose of this test is to verify q8 is assigned a 16-byte aligned offset
+; after the x10 is assigned an offset. The CSR (on Linux) are assigned offsets
+; in the order GPRs then FPRs. The stack size of this function is 48
+; (alignTo((16 + 8 * 3), 16)), so after x8 is given the offset 24, q8 originally
+; would be assigned offset 8, which is not 16-byte aligned.
+define preserve_allcc void @d(ptr %ptr) nounwind {
+; CHECK-LABEL: d:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str q8, [sp, #-48]! // 16-byte Folded Spill
+; CHECK-NEXT: str x10, [sp, #24] // 8-byte Spill
+; CHECK-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #32
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x10, [sp, #24] // 8-byte Reload
+; CHECK-NEXT: ldr q8, [sp], #48 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ tail call void asm sideeffect "", "~{x10},~{q8}"()
+ ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/framelayout-fpr128-spill.mir b/llvm/test/CodeGen/AArch64/framelayout-fpr128-spill.mir
new file mode 100644
index 0000000000000..282e621d1bb4c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/framelayout-fpr128-spill.mir
@@ -0,0 +1,26 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s
+
+--- |
+ ; Tests Q8 is assigned a 16-byte aligned offset after X10 is assigned an offset.
+ define preserve_allcc void @test_fpr128_spill_alignment() nounwind { entry: unreachable }
+...
+---
+name: test_fpr128_spill_alignment
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: test_fpr128_spill_alignment
+ ; CHECK: liveins: $q8, $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: early-clobber $sp = frame-setup STRQpre killed $q8, $sp, -32 :: (store (s128) into %stack.1)
+ ; CHECK-NEXT: frame-setup STRXui killed $x10, $sp, 3 :: (store (s64) into %stack.0)
+ ; CHECK-NEXT: $q8 = IMPLICIT_DEF
+ ; CHECK-NEXT: $x10 = IMPLICIT_DEF
+ ; CHECK-NEXT: $x10 = frame-destroy LDRXui $sp, 3 :: (load (s64) from %stack.0)
+ ; CHECK-NEXT: early-clobber $sp, $q8 = frame-destroy LDRQpost $sp, 32 :: (load (s128) from %stack.1)
+ ; CHECK-NEXT: RET_ReallyLR
+ $q8 = IMPLICIT_DEF
+ $x10 = IMPLICIT_DEF
+ RET_ReallyLR
+...
>From a45de7fc4c63cbdb4a27dbbd1e465db926dea3b8 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 6 Mar 2026 15:35:27 +0000
Subject: [PATCH 2/2] Fixups
---
.../CodeGen/AArch64/framelayout-fp-csr.ll | 47 +++++--------------
.../CodeGen/AArch64/framelayout-fpr128-csr.ll | 33 +++++++++++++
.../AArch64/framelayout-fpr128-spill.mir | 12 +++++
3 files changed, 56 insertions(+), 36 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/framelayout-fpr128-csr.ll
diff --git a/llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll b/llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll
index 49127557a1293..3b13dee29f069 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll
+++ b/llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra --frame-pointer=all < %s | FileCheck %s
; The purpose of this test is to verify that frame pointer (x29)
@@ -6,42 +5,18 @@
; point registers. The frame pointer should point to the frame
; record, which is located 16 bytes above the end of the CSR
; space when a single FP CSR is in use.
-define void @test1(i32) nounwind {
-; CHECK-LABEL: test1:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill
-; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: add x29, sp, #16
-; CHECK-NEXT: //APP
-; CHECK-NEXT: nop
-; CHECK-NEXT: //NO_APP
-; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload
-; CHECK-NEXT: ret
+define void @test1(i32) #26 {
entry:
- call void asm sideeffect "nop", "~{d8}"()
+ call void asm sideeffect "nop", "~{d8}"() #26
ret void
}
+; CHECK-LABEL: test1:
+; CHECK: str d8, [sp, #-32]!
+; CHECK-NEXT: stp x29, x30, [sp, #16]
+; CHECK-NEXT: add x29, sp, #16
+; CHECK: nop
+; CHECK: ldp x29, x30, [sp, #16]
+; CHECK-NEXT: ldr d8, [sp], #32
+; CHECK-NEXT: ret
-; The purpose of this test is to verify q8 is assigned a 16-byte aligned offset
-; after the x10 is assigned an offset. The CSR (on Linux) are assigned offsets
-; in the order GPRs then FPRs. The stack size of this function is 48
-; (alignTo((16 + 8 * 3), 16)), so after x8 is given the offset 24, q8 originally
-; would be assigned offset 8, which is not 16-byte aligned.
-define preserve_allcc void @d(ptr %ptr) nounwind {
-; CHECK-LABEL: d:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: str q8, [sp, #-48]! // 16-byte Folded Spill
-; CHECK-NEXT: str x10, [sp, #24] // 8-byte Spill
-; CHECK-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: add x29, sp, #32
-; CHECK-NEXT: //APP
-; CHECK-NEXT: //NO_APP
-; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x10, [sp, #24] // 8-byte Reload
-; CHECK-NEXT: ldr q8, [sp], #48 // 16-byte Folded Reload
-; CHECK-NEXT: ret
-entry:
- tail call void asm sideeffect "", "~{x10},~{q8}"()
- ret void
-}
+attributes #26 = { nounwind }
diff --git a/llvm/test/CodeGen/AArch64/framelayout-fpr128-csr.ll b/llvm/test/CodeGen/AArch64/framelayout-fpr128-csr.ll
new file mode 100644
index 0000000000000..09bfbc5d4c82b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/framelayout-fpr128-csr.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-windows-msvc < %s | FileCheck %s --check-prefix=CHECK-WINDOWS
+
+; The purpose of this test is to verify q8 is assigned a 16-byte aligned offset
+; after the x10 is assigned an offset. The CSR (on Linux) are assigned offsets
+; in the order GPRs then FPRs. The stack size of this function is 48
+; (alignTo((16 + 8 * 3), 16)), so after x8 is given the offset 24, q8 originally
+; would be assigned offset 8, which is not 16-byte aligned.
+define preserve_allcc void @d(ptr %ptr) nounwind {
+; CHECK-LABEL: d:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str q8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT: str x10, [sp, #24] // 8-byte Spill
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: ldr x10, [sp, #24] // 8-byte Reload
+; CHECK-NEXT: ldr q8, [sp], #32 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; CHECK-WINDOWS-LABEL: d:
+; CHECK-WINDOWS: // %bb.0: // %entry
+; CHECK-WINDOWS-NEXT: str x10, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-WINDOWS-NEXT: str q8, [sp, #16] // 16-byte Spill
+; CHECK-WINDOWS-NEXT: //APP
+; CHECK-WINDOWS-NEXT: //NO_APP
+; CHECK-WINDOWS-NEXT: ldr q8, [sp, #16] // 16-byte Reload
+; CHECK-WINDOWS-NEXT: ldr x10, [sp], #32 // 8-byte Folded Reload
+; CHECK-WINDOWS-NEXT: ret
+entry:
+ tail call void asm sideeffect "", "~{x10},~{q8}"()
+ ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/framelayout-fpr128-spill.mir b/llvm/test/CodeGen/AArch64/framelayout-fpr128-spill.mir
index 282e621d1bb4c..a6236bd917129 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-fpr128-spill.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-fpr128-spill.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64-windows-msvc -run-pass=prologepilog %s -o - | FileCheck %s --check-prefix=CHECK-WINDOWS
--- |
; Tests Q8 is assigned a 16-byte aligned offset after X10 is assigned an offset.
@@ -20,6 +21,17 @@ body: |
; CHECK-NEXT: $x10 = frame-destroy LDRXui $sp, 3 :: (load (s64) from %stack.0)
; CHECK-NEXT: early-clobber $sp, $q8 = frame-destroy LDRQpost $sp, 32 :: (load (s128) from %stack.1)
; CHECK-NEXT: RET_ReallyLR
+ ;
+ ; CHECK-WINDOWS-LABEL: name: test_fpr128_spill_alignment
+ ; CHECK-WINDOWS: liveins: $x10, $q8
+ ; CHECK-WINDOWS-NEXT: {{ $}}
+ ; CHECK-WINDOWS-NEXT: early-clobber $sp = frame-setup STRXpre killed $x10, $sp, -32 :: (store (s64) into %stack.1)
+ ; CHECK-WINDOWS-NEXT: frame-setup STRQui killed $q8, $sp, 1 :: (store (s128) into %stack.0)
+ ; CHECK-WINDOWS-NEXT: $q8 = IMPLICIT_DEF
+ ; CHECK-WINDOWS-NEXT: $x10 = IMPLICIT_DEF
+ ; CHECK-WINDOWS-NEXT: $q8 = frame-destroy LDRQui $sp, 1 :: (load (s128) from %stack.0)
+ ; CHECK-WINDOWS-NEXT: early-clobber $sp, $x10 = frame-destroy LDRXpost $sp, 32 :: (load (s64) from %stack.1)
+ ; CHECK-WINDOWS-NEXT: RET_ReallyLR
$q8 = IMPLICIT_DEF
$x10 = IMPLICIT_DEF
RET_ReallyLR
More information about the llvm-commits
mailing list