[llvm-branch-commits] [llvm] release/22.x: Fix comment in FPR128 test (NFC) (PR #185927)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Mar 11 10:15:23 PDT 2026


https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/185927

Backport 327f1adef8df6afc07f6c88cfa380c97399af3dc 4c31b6f93c7d8499b93cd6d29b8874a62f2cfed0

Requested by: @MacDue

>From 5b0f764d0b52edd921c75d444745976d68eb0f8a Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Sun, 8 Mar 2026 09:38:42 +0000
Subject: [PATCH 1/2] [AArch64] Ensure FPR128 callee-save stack offsets are
 aligned (#184314)

This was benign for Linux targets (as when dividing by the scale the
offset would be correctly truncated), so only resulted in failures with
`-DLLVM_ENABLE_ASSERTIONS=On`. On Windows, this was a miscompile as the
lack of alignment would result in the FPR128 callee-save getting
assigned to the same offset as the previous GPR.

Fixes: #183708
(cherry picked from commit 327f1adef8df6afc07f6c88cfa380c97399af3dc)
---
 .../Target/AArch64/AArch64FrameLowering.cpp   | 20 +++++++---
 .../CodeGen/AArch64/framelayout-fpr128-csr.ll | 33 ++++++++++++++++
 .../AArch64/framelayout-fpr128-spill.mir      | 38 +++++++++++++++++++
 3 files changed, 86 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/framelayout-fpr128-csr.ll
 create mode 100644 llvm/test/CodeGen/AArch64/framelayout-fpr128-spill.mir

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 4fb7c62156733..d89a972f5de27 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1736,6 +1736,12 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
   Register LastReg = 0;
   bool HasCSHazardPadding = AFI->hasStackHazardSlotIndex() && !SplitPPRs;
 
+  auto AlignOffset = [StackFillDir](int Offset, int Align) {
+    if (StackFillDir < 0)
+      return alignDown(Offset, Align);
+    return alignTo(Offset, Align);
+  };
+
   // When iterating backwards, the loop condition relies on unsigned wraparound.
   for (unsigned i = FirstReg; i < Count; i += RegInc) {
     RegPairInfo RPI;
@@ -1851,11 +1857,15 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
         RPI.isPaired()) // RPI.FrameIdx must be the lower index of the pair
       RPI.FrameIdx = CSI[i + RegInc].getFrameIdx();
 
-    // Realign the scalable offset if necessary.  This is relevant when
-    // spilling predicates on Windows.
-    if (RPI.isScalable() && ScalableByteOffset % Scale != 0) {
-      ScalableByteOffset = alignTo(ScalableByteOffset, Scale);
-    }
+    // Realign the scalable offset if necessary. This is relevant when spilling
+    // predicates on Windows.
+    if (RPI.isScalable() && ScalableByteOffset % Scale != 0)
+      ScalableByteOffset = AlignOffset(ScalableByteOffset, Scale);
+
+    // Realign the fixed offset if necessary. This is relevant when spilling Q
+    // registers after spilling an odd amount of X registers.
+    if (!RPI.isScalable() && ByteOffset % Scale != 0)
+      ByteOffset = AlignOffset(ByteOffset, Scale);
 
     int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
     assert(OffsetPre % Scale == 0);
diff --git a/llvm/test/CodeGen/AArch64/framelayout-fpr128-csr.ll b/llvm/test/CodeGen/AArch64/framelayout-fpr128-csr.ll
new file mode 100644
index 0000000000000..09bfbc5d4c82b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/framelayout-fpr128-csr.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-windows-msvc < %s | FileCheck %s --check-prefix=CHECK-WINDOWS
+
+; The purpose of this test is to verify q8 is assigned a 16-byte aligned offset
+; after the x10 is assigned an offset. The CSR (on Linux) are assigned offsets
+; in the order GPRs then FPRs. The stack size of this function is 48
+; (alignTo((16 + 8 * 3), 16)), so after x8 is given the offset 24, q8 originally
+; would be assigned offset 8, which is not 16-byte aligned.
+define preserve_allcc void @d(ptr %ptr) nounwind {
+; CHECK-LABEL: d:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str q8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT:    str x10, [sp, #24] // 8-byte Spill
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    ldr x10, [sp, #24] // 8-byte Reload
+; CHECK-NEXT:    ldr q8, [sp], #32 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; CHECK-WINDOWS-LABEL: d:
+; CHECK-WINDOWS:       // %bb.0: // %entry
+; CHECK-WINDOWS-NEXT:    str x10, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-WINDOWS-NEXT:    str q8, [sp, #16] // 16-byte Spill
+; CHECK-WINDOWS-NEXT:    //APP
+; CHECK-WINDOWS-NEXT:    //NO_APP
+; CHECK-WINDOWS-NEXT:    ldr q8, [sp, #16] // 16-byte Reload
+; CHECK-WINDOWS-NEXT:    ldr x10, [sp], #32 // 8-byte Folded Reload
+; CHECK-WINDOWS-NEXT:    ret
+entry:
+  tail call void asm sideeffect "", "~{x10},~{q8}"()
+  ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/framelayout-fpr128-spill.mir b/llvm/test/CodeGen/AArch64/framelayout-fpr128-spill.mir
new file mode 100644
index 0000000000000..a6236bd917129
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/framelayout-fpr128-spill.mir
@@ -0,0 +1,38 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64-windows-msvc -run-pass=prologepilog %s -o - | FileCheck %s --check-prefix=CHECK-WINDOWS
+
+--- |
+  ; Tests Q8 is assigned a 16-byte aligned offset after X10 is assigned an offset.
+  define preserve_allcc void @test_fpr128_spill_alignment() nounwind { entry: unreachable }
+...
+---
+name: test_fpr128_spill_alignment
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: test_fpr128_spill_alignment
+    ; CHECK: liveins: $q8, $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: early-clobber $sp = frame-setup STRQpre killed $q8, $sp, -32 :: (store (s128) into %stack.1)
+    ; CHECK-NEXT: frame-setup STRXui killed $x10, $sp, 3 :: (store (s64) into %stack.0)
+    ; CHECK-NEXT: $q8 = IMPLICIT_DEF
+    ; CHECK-NEXT: $x10 = IMPLICIT_DEF
+    ; CHECK-NEXT: $x10 = frame-destroy LDRXui $sp, 3 :: (load (s64) from %stack.0)
+    ; CHECK-NEXT: early-clobber $sp, $q8 = frame-destroy LDRQpost $sp, 32 :: (load (s128) from %stack.1)
+    ; CHECK-NEXT: RET_ReallyLR
+    ;
+    ; CHECK-WINDOWS-LABEL: name: test_fpr128_spill_alignment
+    ; CHECK-WINDOWS: liveins: $x10, $q8
+    ; CHECK-WINDOWS-NEXT: {{  $}}
+    ; CHECK-WINDOWS-NEXT: early-clobber $sp = frame-setup STRXpre killed $x10, $sp, -32 :: (store (s64) into %stack.1)
+    ; CHECK-WINDOWS-NEXT: frame-setup STRQui killed $q8, $sp, 1 :: (store (s128) into %stack.0)
+    ; CHECK-WINDOWS-NEXT: $q8 = IMPLICIT_DEF
+    ; CHECK-WINDOWS-NEXT: $x10 = IMPLICIT_DEF
+    ; CHECK-WINDOWS-NEXT: $q8 = frame-destroy LDRQui $sp, 1 :: (load (s128) from %stack.0)
+    ; CHECK-WINDOWS-NEXT: early-clobber $sp, $x10 = frame-destroy LDRXpost $sp, 32 :: (load (s64) from %stack.1)
+    ; CHECK-WINDOWS-NEXT: RET_ReallyLR
+    $q8 = IMPLICIT_DEF
+    $x10 = IMPLICIT_DEF
+    RET_ReallyLR
+...

>From 6652e3af5fea9732b02e6b2911a8ad9ab5b6f669 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <macdue at dueutil.tech>
Date: Sun, 8 Mar 2026 10:31:57 +0000
Subject: [PATCH 2/2] Fix comment in FPR128 test (NFC)

(cherry picked from commit 4c31b6f93c7d8499b93cd6d29b8874a62f2cfed0)
---
 llvm/test/CodeGen/AArch64/framelayout-fpr128-csr.ll | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/framelayout-fpr128-csr.ll b/llvm/test/CodeGen/AArch64/framelayout-fpr128-csr.ll
index 09bfbc5d4c82b..4cce7ec8a47cd 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-fpr128-csr.ll
+++ b/llvm/test/CodeGen/AArch64/framelayout-fpr128-csr.ll
@@ -3,9 +3,9 @@
 ; RUN: llc -verify-machineinstrs -mtriple=aarch64-windows-msvc < %s | FileCheck %s --check-prefix=CHECK-WINDOWS
 
 ; The purpose of this test is to verify q8 is assigned a 16-byte aligned offset
-; after the x10 is assigned an offset. The CSR (on Linux) are assigned offsets
-; in the order GPRs then FPRs. The stack size of this function is 48
-; (alignTo((16 + 8 * 3), 16)), so after x8 is given the offset 24, q8 originally
+; after the x10 is assigned an offset. The CSRs (on Linux) are assigned offsets
+; in the order GPRs then FPRs. The stack size of this function is 32
+; (alignTo((16 + 8), 16)), so after x8 is given the offset 24, q8 originally
 ; would be assigned offset 8, which is not 16-byte aligned.
 define preserve_allcc void @d(ptr %ptr) nounwind {
 ; CHECK-LABEL: d:



More information about the llvm-branch-commits mailing list