[llvm] [AArch64][SME] Fix accessing the emergency spill slot with hazard padding (PR #142190)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Fri May 30 11:41:40 PDT 2025
https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/142190
>From d86c0f120728b4602b1ea7d00723d9484265b38f Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 30 May 2025 17:28:46 +0000
Subject: [PATCH 1/2] [AArch64][SME] Fix accessing the emergency spill slot
with hazard padding
This patch fixes an issue where when hazard padding was enabled locals,
including the emergency spill slot, could not be directly addressed.
Generally, this is fine, we can materialize the constant offset in a
scratch register, but if there's no register free we need to spill, and
if we can't even reach the emergency spill slot then we fail to compile.
This patch fixes this by ensuring that if a function has variable-sized
objects and is likely to have hazard padding we enable the base pointer.
Then if we know a function has hazard padding, place the emergency spill
slot next to the BP/SP, to ensure it can be directly accessed without
stepping over any hazard padding.
---
.../Target/AArch64/AArch64RegisterInfo.cpp | 18 ++-
...ramelayout-scavengingslot-stack-hazard.mir | 99 ++++++++++++++
llvm/test/CodeGen/AArch64/sme-agnostic-za.ll | 27 ++--
.../CodeGen/AArch64/sme-framelower-use-bp.ll | 121 +++++++-----------
.../CodeGen/AArch64/sme-lazy-save-call.ll | 13 +-
llvm/test/CodeGen/AArch64/stack-hazard.ll | 43 +++----
.../CodeGen/AArch64/sve-stack-frame-layout.ll | 13 +-
7 files changed, 212 insertions(+), 122 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/framelayout-scavengingslot-stack-hazard.mir
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 8caa49de0af43..da3e33429a87a 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -18,6 +18,7 @@
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64InstPrinter.h"
+#include "Utils/AArch64SMEAttributes.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
@@ -632,14 +633,26 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
return true;
auto &ST = MF.getSubtarget<AArch64Subtarget>();
+ const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
if (ST.hasSVE() || ST.isStreaming()) {
- const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
// Frames that have variable sized objects and scalable SVE objects,
// should always use a basepointer.
if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeSVE())
return true;
}
+ // Frames with hazard padding can have a large offset between the frame
+ // pointer and GPR locals, which includes the emergency spill slot. If the
+ // emergency spill slot is not within range of the load/store instructions
+ // (which have a signed 9-bit range), we will fail to compile if it is used.
+ // Since hasBasePointer() is called before we know if we have hazard padding
+ // or an emergency spill slot we need to enable the basepointer
+ // conservatively.
+ if (AFI->hasStackHazardSlotIndex() ||
+ !SMEAttrs(MF.getFunction()).hasNonStreamingInterfaceAndBody()) {
+ return true;
+ }
+
// Conservatively estimate whether the negative offset from the frame
// pointer will be sufficient to reach. If a function has a smallish
// frame, it's less likely to have lots of spills and callee saved
@@ -764,7 +777,8 @@ AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
assert((!MF.getSubtarget<AArch64Subtarget>().hasSVE() ||
AFI->hasCalculatedStackSizeSVE()) &&
"Expected SVE area to be calculated by this point");
- return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE();
+ return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE() &&
+ !AFI->hasStackHazardSlotIndex();
}
bool AArch64RegisterInfo::requiresFrameIndexScavenging(
diff --git a/llvm/test/CodeGen/AArch64/framelayout-scavengingslot-stack-hazard.mir b/llvm/test/CodeGen/AArch64/framelayout-scavengingslot-stack-hazard.mir
new file mode 100644
index 0000000000000..52ac36f801854
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/framelayout-scavengingslot-stack-hazard.mir
@@ -0,0 +1,99 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -aarch64-stack-hazard-size=1024 -run-pass=prologepilog %s -o - | FileCheck %s
+--- |
+
+ define void @stack_hazard_streaming_compat() "aarch64_pstate_sm_compatible" { entry: unreachable }
+ define void @stack_hazard_streaming_compat_emergency_spill_slot() "aarch64_pstate_sm_compatible" { entry: unreachable }
+
+...
+
+# +------------------+
+# | GPR callee-saves |
+# +------------------+ <- FP
+# | <hazard padding> |
+# +------------------+
+# | FPR locals |
+# | %stack.1 |
+# +------------------+
+# | <hazard padding> |
+# +------------------+
+# | GPR locals |
+# | %stack.2 |
+# | <emergency spill>|
+# +------------------+ <- BP
+# | <VLA> |
+# +------------------+ <- SP (can't be used due to VLA)
+
+# In this case without the base pointer we'd need the emergency spill slot to
+# access both %stack.1 and %stack.2. With the base pointer we can reach both
+# without spilling.
+
+name: stack_hazard_streaming_compat
+# CHECK-LABEL: name: stack_hazard_streaming_compat
+# CHECK: bb.0:
+# CHECK: STRDui $d0, $x19, 131
+# CHECK-NEXT: STRXui $x0, $x19, 1
+# CHECK: bb.1:
+tracksRegLiveness: true
+frameInfo:
+ isFrameAddressTaken: true
+stack:
+ - { id: 0, type: variable-sized, alignment: 1 }
+ - { id: 1, size: 8, alignment: 8 }
+ - { id: 2, size: 8, alignment: 8 }
+body: |
+ bb.0:
+ liveins: $x0, $x8, $d0
+ $x9 = LDRXui $x0, 0 :: (load (s64))
+ STRDui $d0, %stack.1, 0 :: (store (s64) into %stack.1)
+ STRXui $x0, %stack.2, 0 :: (store (s64) into %stack.2)
+ B %bb.1
+ bb.1:
+ liveins: $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $lr
+ RET_ReallyLR implicit $x19, implicit $x20, implicit $x21, implicit $x22, implicit $x23, implicit $x24, implicit $x25, implicit $x26, implicit $x27, implicit $x28, implicit $lr
+...
+---
+# +------------------+
+# | GPR callee-saves |
+# +------------------+ <- FP
+# | <hazard padding> |
+# +------------------+
+# | FPR locals |
+# | %stack.1 |
+# +------------------+
+# | <hazard padding> |
+# +------------------+
+# | GPR locals |
+# | %stack.2 | (very large)
+# | <emergency spill>|
+# +------------------+ <- BP
+# | <VLA> |
+# +------------------+ <- SP (can't be used due to VLA)
+
+# In this case we need to use the emergency spill slot to access %stack.1 as it
+# is too far from the frame pointer and the base pointer to directly address.
+# Note: This also tests that the <emergency spill> located near the SP/BP.
+
+name: stack_hazard_streaming_compat_emergency_spill_slot
+# CHECK-LABEL: name: stack_hazard_streaming_compat_emergency_spill_slot
+# CHECK: bb.0:
+# CHECK: STRXui killed $[[SCRATCH:x[0-9]+]], $x19, 0
+# CHECK-NEXT: $[[SCRATCH]] = ADDXri $x19, 1056, 0
+# CHECK-NEXT: STRDui $d0, killed $[[SCRATCH]], 4095
+# CHECK-NEXT: $[[SCRATCH]] = LDRXui $x19, 0
+# CHECK: bb.1:
+tracksRegLiveness: true
+frameInfo:
+ isFrameAddressTaken: true
+stack:
+ - { id: 0, type: variable-sized, alignment: 1 }
+ - { id: 1, size: 8, alignment: 8 }
+ - { id: 2, size: 32761, alignment: 8 }
+body: |
+ bb.0:
+ liveins: $x0, $x8, $d0
+ $x9 = LDRXui $x0, 0 :: (load (s64))
+ STRDui $d0, %stack.1, 0 :: (store (s64) into %stack.1)
+ B %bb.1
+ bb.1:
+ liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $lr
+ RET_ReallyLR implicit $x19, implicit $x20, implicit $x21, implicit $x22, implicit $x23, implicit $x24, implicit $x25, implicit $x26, implicit $x27, implicit $x28, implicit $lr
diff --git a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
index 1f68815411097..a38bbe00a40f4 100644
--- a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
+++ b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
@@ -98,6 +98,7 @@ define i64 @streaming_agnostic_caller_nonstreaming_private_za_callee(i64 %v) nou
; CHECK-NEXT: mov x0, x9
; CHECK-NEXT: add x29, sp, #64
; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: mov x8, x0
; CHECK-NEXT: bl __arm_sme_state_size
; CHECK-NEXT: sub sp, sp, x0
@@ -145,51 +146,53 @@ define i64 @streaming_compatible_agnostic_caller_nonstreaming_private_za_callee(
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_get_current_vg
-; CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT: stp x0, x21, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: mov x0, x9
; CHECK-NEXT: add x29, sp, #64
; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: mov x8, x0
; CHECK-NEXT: bl __arm_sme_state_size
; CHECK-NEXT: sub sp, sp, x0
-; CHECK-NEXT: mov x19, sp
-; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x20, sp
+; CHECK-NEXT: mov x0, x20
; CHECK-NEXT: bl __arm_sme_save
; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: and x20, x0, #0x1
-; CHECK-NEXT: tbz w20, #0, .LBB5_2
+; CHECK-NEXT: and x21, x0, #0x1
+; CHECK-NEXT: tbz w21, #0, .LBB5_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB5_2:
; CHECK-NEXT: mov x0, x8
; CHECK-NEXT: bl private_za_decl
; CHECK-NEXT: mov x2, x0
-; CHECK-NEXT: tbz w20, #0, .LBB5_4
+; CHECK-NEXT: tbz w21, #0, .LBB5_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB5_4:
-; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x0, x20
; CHECK-NEXT: bl __arm_sme_restore
-; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x0, x20
; CHECK-NEXT: bl __arm_sme_save
; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: and x20, x0, #0x1
-; CHECK-NEXT: tbz w20, #0, .LBB5_6
+; CHECK-NEXT: and x21, x0, #0x1
+; CHECK-NEXT: tbz w21, #0, .LBB5_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB5_6:
; CHECK-NEXT: mov x0, x2
; CHECK-NEXT: bl private_za_decl
; CHECK-NEXT: mov x1, x0
-; CHECK-NEXT: tbz w20, #0, .LBB5_8
+; CHECK-NEXT: tbz w21, #0, .LBB5_8
; CHECK-NEXT: // %bb.7:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB5_8:
-; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x0, x20
; CHECK-NEXT: bl __arm_sme_restore
; CHECK-NEXT: mov x0, x1
; CHECK-NEXT: sub sp, x29, #64
; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x21, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll b/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
index 99c65b090adb0..026852867fdd6 100644
--- a/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
+++ b/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
@@ -19,6 +19,7 @@ define void @quux() #1 {
; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #384
+; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: .cfi_def_cfa w29, 96
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
@@ -53,93 +54,78 @@ define void @quux() #1 {
; CHECK-NEXT: // implicit-def: $x9
; CHECK-NEXT: mov w9, w0
; CHECK-NEXT: and x14, x9, #0x70
-; CHECK-NEXT: sub x9, x29, #120
-; CHECK-NEXT: stur x14, [x9, #-256] // 8-byte Folded Spill
+; CHECK-NEXT: str x14, [x19, #8] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x14
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: sub x10, x29, #112
-; CHECK-NEXT: stur x9, [x10, #-256] // 8-byte Folded Spill
+; CHECK-NEXT: str x9, [x19, #16] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x14
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: sub x10, x29, #104
-; CHECK-NEXT: stur x9, [x10, #-256] // 8-byte Folded Spill
+; CHECK-NEXT: str x9, [x19, #24] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, x8
; CHECK-NEXT: incb x9
; CHECK-NEXT: mov w0, w9
; CHECK-NEXT: // implicit-def: $x9
; CHECK-NEXT: mov w9, w0
; CHECK-NEXT: and x10, x9, #0x3f0
-; CHECK-NEXT: sub x9, x29, #96
-; CHECK-NEXT: stur x10, [x9, #-256] // 8-byte Folded Spill
+; CHECK-NEXT: str x10, [x19, #32] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x10
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: sub x11, x29, #88
-; CHECK-NEXT: stur x9, [x11, #-256] // 8-byte Folded Spill
+; CHECK-NEXT: str x9, [x19, #40] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x10
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: sub x11, x29, #80
-; CHECK-NEXT: stur x9, [x11, #-256] // 8-byte Folded Spill
+; CHECK-NEXT: str x9, [x19, #48] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x14
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: sub x11, x29, #72
-; CHECK-NEXT: stur x9, [x11, #-256] // 8-byte Folded Spill
+; CHECK-NEXT: str x9, [x19, #56] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x14
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: sub x11, x29, #64
-; CHECK-NEXT: stur x9, [x11, #-256] // 8-byte Folded Spill
+; CHECK-NEXT: str x9, [x19, #64] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x10
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: sub x11, x29, #56
-; CHECK-NEXT: stur x9, [x11, #-256] // 8-byte Folded Spill
+; CHECK-NEXT: str x9, [x19, #72] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x10
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: sub x11, x29, #48
-; CHECK-NEXT: stur x9, [x11, #-256] // 8-byte Folded Spill
+; CHECK-NEXT: str x9, [x19, #80] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x14
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: sub x11, x29, #40
-; CHECK-NEXT: stur x9, [x11, #-256] // 8-byte Folded Spill
+; CHECK-NEXT: str x9, [x19, #88] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x14
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: sub x11, x29, #32
-; CHECK-NEXT: stur x9, [x11, #-256] // 8-byte Folded Spill
+; CHECK-NEXT: str x9, [x19, #96] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x10
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: sub x11, x29, #24
-; CHECK-NEXT: stur x9, [x11, #-256] // 8-byte Folded Spill
+; CHECK-NEXT: str x9, [x19, #104] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x10
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: sub x11, x29, #16
-; CHECK-NEXT: stur x9, [x11, #-256] // 8-byte Folded Spill
+; CHECK-NEXT: str x9, [x19, #112] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x14
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: sub x11, x29, #8
-; CHECK-NEXT: stur x9, [x11, #-256] // 8-byte Folded Spill
+; CHECK-NEXT: str x9, [x19, #120] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x14
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: stur x9, [x29, #-256] // 8-byte Folded Spill
+; CHECK-NEXT: str x9, [x19, #128] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x10
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: stur x9, [x29, #-248] // 8-byte Folded Spill
+; CHECK-NEXT: str x9, [x19, #136] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x10
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: stur x9, [x29, #-240] // 8-byte Folded Spill
+; CHECK-NEXT: str x9, [x19, #144] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, #16
; CHECK-NEXT: mov sp, x9
@@ -174,29 +160,29 @@ define void @quux() #1 {
; CHECK-NEXT: mov x2, sp
; CHECK-NEXT: subs x10, x2, #16
; CHECK-NEXT: mov sp, x10
-; CHECK-NEXT: stur x10, [x29, #-232] // 8-byte Folded Spill
+; CHECK-NEXT: str x10, [x19, #152] // 8-byte Folded Spill
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: subs x11, x10, x14
; CHECK-NEXT: mov sp, x11
; CHECK-NEXT: mov x10, x11
-; CHECK-NEXT: stur x10, [x29, #-224] // 8-byte Folded Spill
+; CHECK-NEXT: str x10, [x19, #160] // 8-byte Folded Spill
; CHECK-NEXT: mov x0, sp
; CHECK-NEXT: subs x10, x0, #16
; CHECK-NEXT: mov sp, x10
-; CHECK-NEXT: stur x10, [x29, #-216] // 8-byte Folded Spill
+; CHECK-NEXT: str x10, [x19, #168] // 8-byte Folded Spill
; CHECK-NEXT: mov x17, sp
; CHECK-NEXT: subs x10, x17, #16
; CHECK-NEXT: mov sp, x10
-; CHECK-NEXT: stur x10, [x29, #-208] // 8-byte Folded Spill
+; CHECK-NEXT: str x10, [x19, #176] // 8-byte Folded Spill
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: subs x10, x10, x14
; CHECK-NEXT: stur x10, [x29, #-32] // 8-byte Folded Spill
; CHECK-NEXT: mov sp, x10
-; CHECK-NEXT: stur x10, [x29, #-200] // 8-byte Folded Spill
+; CHECK-NEXT: str x10, [x19, #184] // 8-byte Folded Spill
; CHECK-NEXT: mov x15, sp
; CHECK-NEXT: subs x10, x15, #16
; CHECK-NEXT: mov sp, x10
-; CHECK-NEXT: stur x10, [x29, #-192] // 8-byte Folded Spill
+; CHECK-NEXT: str x10, [x19, #192] // 8-byte Folded Spill
; CHECK-NEXT: mov x13, sp
; CHECK-NEXT: subs x10, x13, #16
; CHECK-NEXT: mov sp, x10
@@ -535,46 +521,33 @@ define void @quux() #1 {
; CHECK-NEXT: b .LBB0_3
; CHECK-NEXT: .LBB0_3: // %bb178
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldur x9, [x29, #-232] // 8-byte Folded Reload
-; CHECK-NEXT: sub x8, x29, #80
-; CHECK-NEXT: ldur x8, [x8, #-256] // 8-byte Folded Reload
-; CHECK-NEXT: sub x10, x29, #88
-; CHECK-NEXT: ldur x10, [x10, #-256] // 8-byte Folded Reload
-; CHECK-NEXT: sub x11, x29, #104
-; CHECK-NEXT: ldur x11, [x11, #-256] // 8-byte Folded Reload
-; CHECK-NEXT: sub x12, x29, #112
-; CHECK-NEXT: ldur x12, [x12, #-256] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x9, [x19, #152] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x8, [x19, #48] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x10, [x19, #40] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x11, [x19, #24] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x12, [x19, #16] // 8-byte Folded Reload
; CHECK-NEXT: ldur x13, [x29, #-152] // 8-byte Folded Reload
; CHECK-NEXT: ldur x14, [x29, #-160] // 8-byte Folded Reload
-; CHECK-NEXT: sub x15, x29, #48
-; CHECK-NEXT: ldur x17, [x15, #-256] // 8-byte Folded Reload
-; CHECK-NEXT: sub x15, x29, #56
-; CHECK-NEXT: ldur x18, [x15, #-256] // 8-byte Folded Reload
-; CHECK-NEXT: sub x15, x29, #64
-; CHECK-NEXT: ldur x0, [x15, #-256] // 8-byte Folded Reload
-; CHECK-NEXT: sub x15, x29, #72
-; CHECK-NEXT: ldur x1, [x15, #-256] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x17, [x19, #80] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x18, [x19, #72] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x0, [x19, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x1, [x19, #56] // 8-byte Folded Reload
; CHECK-NEXT: ldur x15, [x29, #-168] // 8-byte Folded Reload
; CHECK-NEXT: ldur x2, [x29, #-176] // 8-byte Folded Reload
-; CHECK-NEXT: sub x16, x29, #16
-; CHECK-NEXT: ldur x3, [x16, #-256] // 8-byte Folded Reload
-; CHECK-NEXT: sub x16, x29, #24
-; CHECK-NEXT: ldur x4, [x16, #-256] // 8-byte Folded Reload
-; CHECK-NEXT: sub x16, x29, #32
-; CHECK-NEXT: ldur x5, [x16, #-256] // 8-byte Folded Reload
-; CHECK-NEXT: sub x16, x29, #40
-; CHECK-NEXT: ldur x6, [x16, #-256] // 8-byte Folded Reload
-; CHECK-NEXT: ldur x16, [x29, #-240] // 8-byte Folded Reload
-; CHECK-NEXT: ldur x7, [x29, #-248] // 8-byte Folded Reload
-; CHECK-NEXT: ldur x20, [x29, #-256] // 8-byte Folded Reload
-; CHECK-NEXT: sub x21, x29, #8
-; CHECK-NEXT: ldur x21, [x21, #-256] // 8-byte Folded Reload
-; CHECK-NEXT: ldur x23, [x29, #-192] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x3, [x19, #112] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x4, [x19, #104] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x5, [x19, #96] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x6, [x19, #88] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x16, [x19, #144] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x7, [x19, #136] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x20, [x19, #128] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x21, [x19, #120] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x23, [x19, #192] // 8-byte Folded Reload
; CHECK-NEXT: ldur x22, [x29, #-184] // 8-byte Folded Reload
-; CHECK-NEXT: ldur x24, [x29, #-200] // 8-byte Folded Reload
-; CHECK-NEXT: ldur x26, [x29, #-216] // 8-byte Folded Reload
-; CHECK-NEXT: ldur x25, [x29, #-208] // 8-byte Folded Reload
-; CHECK-NEXT: ldur x27, [x29, #-224] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x24, [x19, #184] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x26, [x19, #168] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x25, [x19, #176] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x27, [x19, #160] // 8-byte Folded Reload
; CHECK-NEXT: ldr p0, [x27]
; CHECK-NEXT: ldr x27, [x26]
; CHECK-NEXT: mov p8.b, p0.b
diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
index e463e833bdbde..6038806dd2651 100644
--- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
+++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
@@ -139,13 +139,14 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: stur x9, [x29, #-80]
-; CHECK-NEXT: sub x9, x29, #80
-; CHECK-NEXT: sturh wzr, [x29, #-70]
-; CHECK-NEXT: stur wzr, [x29, #-68]
-; CHECK-NEXT: sturh w8, [x29, #-72]
+; CHECK-NEXT: str x9, [x19]
+; CHECK-NEXT: add x9, x19, #0
+; CHECK-NEXT: strh wzr, [x19, #10]
+; CHECK-NEXT: str wzr, [x19, #12]
+; CHECK-NEXT: strh w8, [x19, #8]
; CHECK-NEXT: msr TPIDR2_EL0, x9
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x20, x0, #0x1
@@ -160,7 +161,7 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za
; CHECK-NEXT: .LBB3_4:
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: sub x0, x29, #80
+; CHECK-NEXT: add x0, x19, #0
; CHECK-NEXT: cbnz x8, .LBB3_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: bl __arm_tpidr2_restore
diff --git a/llvm/test/CodeGen/AArch64/stack-hazard.ll b/llvm/test/CodeGen/AArch64/stack-hazard.ll
index 791d7580c327d..f57b446f39e44 100644
--- a/llvm/test/CodeGen/AArch64/stack-hazard.ll
+++ b/llvm/test/CodeGen/AArch64/stack-hazard.ll
@@ -2831,12 +2831,13 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK0-NEXT: mov x9, sp
; CHECK0-NEXT: mov w20, w0
; CHECK0-NEXT: msub x9, x8, x8, x9
+; CHECK0-NEXT: mov x19, sp
; CHECK0-NEXT: mov sp, x9
-; CHECK0-NEXT: stur x9, [x29, #-80]
-; CHECK0-NEXT: sub x9, x29, #80
-; CHECK0-NEXT: sturh wzr, [x29, #-70]
-; CHECK0-NEXT: stur wzr, [x29, #-68]
-; CHECK0-NEXT: sturh w8, [x29, #-72]
+; CHECK0-NEXT: str x9, [x19]
+; CHECK0-NEXT: add x9, x19, #0
+; CHECK0-NEXT: strh wzr, [x19, #10]
+; CHECK0-NEXT: str wzr, [x19, #12]
+; CHECK0-NEXT: strh w8, [x19, #8]
; CHECK0-NEXT: msr TPIDR2_EL0, x9
; CHECK0-NEXT: .cfi_offset vg, -32
; CHECK0-NEXT: smstop sm
@@ -2845,7 +2846,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK0-NEXT: .cfi_restore vg
; CHECK0-NEXT: smstart za
; CHECK0-NEXT: mrs x8, TPIDR2_EL0
-; CHECK0-NEXT: sub x0, x29, #80
+; CHECK0-NEXT: add x0, x19, #0
; CHECK0-NEXT: cbnz x8, .LBB33_2
; CHECK0-NEXT: // %bb.1: // %entry
; CHECK0-NEXT: bl __arm_tpidr2_restore
@@ -2905,12 +2906,13 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK64-NEXT: mov x9, sp
; CHECK64-NEXT: mov w20, w0
; CHECK64-NEXT: msub x9, x8, x8, x9
+; CHECK64-NEXT: mov x19, sp
; CHECK64-NEXT: mov sp, x9
-; CHECK64-NEXT: stur x9, [x29, #-208]
-; CHECK64-NEXT: sub x9, x29, #208
-; CHECK64-NEXT: sturh wzr, [x29, #-198]
-; CHECK64-NEXT: stur wzr, [x29, #-196]
-; CHECK64-NEXT: sturh w8, [x29, #-200]
+; CHECK64-NEXT: str x9, [x19]
+; CHECK64-NEXT: add x9, x19, #0
+; CHECK64-NEXT: strh wzr, [x19, #10]
+; CHECK64-NEXT: str wzr, [x19, #12]
+; CHECK64-NEXT: strh w8, [x19, #8]
; CHECK64-NEXT: msr TPIDR2_EL0, x9
; CHECK64-NEXT: .cfi_offset vg, -32
; CHECK64-NEXT: smstop sm
@@ -2919,7 +2921,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK64-NEXT: .cfi_restore vg
; CHECK64-NEXT: smstart za
; CHECK64-NEXT: mrs x8, TPIDR2_EL0
-; CHECK64-NEXT: sub x0, x29, #208
+; CHECK64-NEXT: add x0, x19, #0
; CHECK64-NEXT: cbnz x8, .LBB33_2
; CHECK64-NEXT: // %bb.1: // %entry
; CHECK64-NEXT: bl __arm_tpidr2_restore
@@ -2985,16 +2987,13 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK1024-NEXT: mov x9, sp
; CHECK1024-NEXT: mov w20, w0
; CHECK1024-NEXT: msub x9, x8, x8, x9
+; CHECK1024-NEXT: mov x19, sp
; CHECK1024-NEXT: mov sp, x9
-; CHECK1024-NEXT: sub x10, x29, #1872
-; CHECK1024-NEXT: stur x9, [x10, #-256]
-; CHECK1024-NEXT: sub x9, x29, #1862
-; CHECK1024-NEXT: sub x10, x29, #1860
-; CHECK1024-NEXT: sturh wzr, [x9, #-256]
-; CHECK1024-NEXT: sub x9, x29, #2128
-; CHECK1024-NEXT: stur wzr, [x10, #-256]
-; CHECK1024-NEXT: sub x10, x29, #1864
-; CHECK1024-NEXT: sturh w8, [x10, #-256]
+; CHECK1024-NEXT: str x9, [x19]
+; CHECK1024-NEXT: add x9, x19, #0
+; CHECK1024-NEXT: strh wzr, [x19, #10]
+; CHECK1024-NEXT: str wzr, [x19, #12]
+; CHECK1024-NEXT: strh w8, [x19, #8]
; CHECK1024-NEXT: msr TPIDR2_EL0, x9
; CHECK1024-NEXT: .cfi_offset vg, -32
; CHECK1024-NEXT: smstop sm
@@ -3003,7 +3002,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK1024-NEXT: .cfi_restore vg
; CHECK1024-NEXT: smstart za
; CHECK1024-NEXT: mrs x8, TPIDR2_EL0
-; CHECK1024-NEXT: sub x0, x29, #2128
+; CHECK1024-NEXT: add x0, x19, #0
; CHECK1024-NEXT: cbnz x8, .LBB33_2
; CHECK1024-NEXT: // %bb.1: // %entry
; CHECK1024-NEXT: bl __arm_tpidr2_restore
diff --git a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
index c5cf4593cc86d..a52daa5a7741e 100644
--- a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
+++ b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
@@ -547,12 +547,13 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: mov w20, w0
; CHECK-NEXT: msub x9, x8, x8, x9
+; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: stur x9, [x29, #-80]
-; CHECK-NEXT: sub x9, x29, #80
-; CHECK-NEXT: sturh wzr, [x29, #-70]
-; CHECK-NEXT: stur wzr, [x29, #-68]
-; CHECK-NEXT: sturh w8, [x29, #-72]
+; CHECK-NEXT: str x9, [x19]
+; CHECK-NEXT: add x9, x19, #0
+; CHECK-NEXT: strh wzr, [x19, #10]
+; CHECK-NEXT: str wzr, [x19, #12]
+; CHECK-NEXT: strh w8, [x19, #8]
; CHECK-NEXT: msr TPIDR2_EL0, x9
; CHECK-NEXT: .cfi_offset vg, -32
; CHECK-NEXT: smstop sm
@@ -561,7 +562,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK-NEXT: .cfi_restore vg
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: sub x0, x29, #80
+; CHECK-NEXT: add x0, x19, #0
; CHECK-NEXT: cbnz x8, .LBB8_2
; CHECK-NEXT: // %bb.1: // %entry
; CHECK-NEXT: bl __arm_tpidr2_restore
>From a46b26a11fb990fa53d7f107e41760f9d014d3d6 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 30 May 2025 18:39:20 +0000
Subject: [PATCH 2/2] Don't enable BP when there's definitely no hazard padding
Maybe we should (as some of these cases look better), but this was
not intentional I missed a check.
---
.../Target/AArch64/AArch64RegisterInfo.cpp | 3 +-
llvm/test/CodeGen/AArch64/sme-agnostic-za.ll | 27 ++--
.../CodeGen/AArch64/sme-framelower-use-bp.ll | 121 +++++++++++-------
.../CodeGen/AArch64/sme-lazy-save-call.ll | 13 +-
llvm/test/CodeGen/AArch64/stack-hazard.ll | 13 +-
.../CodeGen/AArch64/sve-stack-frame-layout.ll | 13 +-
6 files changed, 106 insertions(+), 84 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index da3e33429a87a..1afe23e637e8d 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -649,7 +649,8 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
// or an emergency spill slot we need to enable the basepointer
// conservatively.
if (AFI->hasStackHazardSlotIndex() ||
- !SMEAttrs(MF.getFunction()).hasNonStreamingInterfaceAndBody()) {
+ (ST.getStreamingHazardSize() &&
+ !SMEAttrs(MF.getFunction()).hasNonStreamingInterfaceAndBody())) {
return true;
}
diff --git a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
index a38bbe00a40f4..1f68815411097 100644
--- a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
+++ b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
@@ -98,7 +98,6 @@ define i64 @streaming_agnostic_caller_nonstreaming_private_za_callee(i64 %v) nou
; CHECK-NEXT: mov x0, x9
; CHECK-NEXT: add x29, sp, #64
; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: mov x8, x0
; CHECK-NEXT: bl __arm_sme_state_size
; CHECK-NEXT: sub sp, sp, x0
@@ -146,53 +145,51 @@ define i64 @streaming_compatible_agnostic_caller_nonstreaming_private_za_callee(
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_get_current_vg
-; CHECK-NEXT: stp x0, x21, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill
; CHECK-NEXT: mov x0, x9
; CHECK-NEXT: add x29, sp, #64
; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: mov x8, x0
; CHECK-NEXT: bl __arm_sme_state_size
; CHECK-NEXT: sub sp, sp, x0
-; CHECK-NEXT: mov x20, sp
-; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: mov x19, sp
+; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_save
; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: and x21, x0, #0x1
-; CHECK-NEXT: tbz w21, #0, .LBB5_2
+; CHECK-NEXT: and x20, x0, #0x1
+; CHECK-NEXT: tbz w20, #0, .LBB5_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB5_2:
; CHECK-NEXT: mov x0, x8
; CHECK-NEXT: bl private_za_decl
; CHECK-NEXT: mov x2, x0
-; CHECK-NEXT: tbz w21, #0, .LBB5_4
+; CHECK-NEXT: tbz w20, #0, .LBB5_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB5_4:
-; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_restore
-; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_save
; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: and x21, x0, #0x1
-; CHECK-NEXT: tbz w21, #0, .LBB5_6
+; CHECK-NEXT: and x20, x0, #0x1
+; CHECK-NEXT: tbz w20, #0, .LBB5_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB5_6:
; CHECK-NEXT: mov x0, x2
; CHECK-NEXT: bl private_za_decl
; CHECK-NEXT: mov x1, x0
-; CHECK-NEXT: tbz w21, #0, .LBB5_8
+; CHECK-NEXT: tbz w20, #0, .LBB5_8
; CHECK-NEXT: // %bb.7:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB5_8:
-; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_restore
; CHECK-NEXT: mov x0, x1
; CHECK-NEXT: sub sp, x29, #64
; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x21, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll b/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
index 026852867fdd6..99c65b090adb0 100644
--- a/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
+++ b/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
@@ -19,7 +19,6 @@ define void @quux() #1 {
; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #384
-; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: .cfi_def_cfa w29, 96
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
@@ -54,78 +53,93 @@ define void @quux() #1 {
; CHECK-NEXT: // implicit-def: $x9
; CHECK-NEXT: mov w9, w0
; CHECK-NEXT: and x14, x9, #0x70
-; CHECK-NEXT: str x14, [x19, #8] // 8-byte Folded Spill
+; CHECK-NEXT: sub x9, x29, #120
+; CHECK-NEXT: stur x14, [x9, #-256] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x14
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: str x9, [x19, #16] // 8-byte Folded Spill
+; CHECK-NEXT: sub x10, x29, #112
+; CHECK-NEXT: stur x9, [x10, #-256] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x14
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: str x9, [x19, #24] // 8-byte Folded Spill
+; CHECK-NEXT: sub x10, x29, #104
+; CHECK-NEXT: stur x9, [x10, #-256] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, x8
; CHECK-NEXT: incb x9
; CHECK-NEXT: mov w0, w9
; CHECK-NEXT: // implicit-def: $x9
; CHECK-NEXT: mov w9, w0
; CHECK-NEXT: and x10, x9, #0x3f0
-; CHECK-NEXT: str x10, [x19, #32] // 8-byte Folded Spill
+; CHECK-NEXT: sub x9, x29, #96
+; CHECK-NEXT: stur x10, [x9, #-256] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x10
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: str x9, [x19, #40] // 8-byte Folded Spill
+; CHECK-NEXT: sub x11, x29, #88
+; CHECK-NEXT: stur x9, [x11, #-256] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x10
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: str x9, [x19, #48] // 8-byte Folded Spill
+; CHECK-NEXT: sub x11, x29, #80
+; CHECK-NEXT: stur x9, [x11, #-256] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x14
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: str x9, [x19, #56] // 8-byte Folded Spill
+; CHECK-NEXT: sub x11, x29, #72
+; CHECK-NEXT: stur x9, [x11, #-256] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x14
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: str x9, [x19, #64] // 8-byte Folded Spill
+; CHECK-NEXT: sub x11, x29, #64
+; CHECK-NEXT: stur x9, [x11, #-256] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x10
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: str x9, [x19, #72] // 8-byte Folded Spill
+; CHECK-NEXT: sub x11, x29, #56
+; CHECK-NEXT: stur x9, [x11, #-256] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x10
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: str x9, [x19, #80] // 8-byte Folded Spill
+; CHECK-NEXT: sub x11, x29, #48
+; CHECK-NEXT: stur x9, [x11, #-256] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x14
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: str x9, [x19, #88] // 8-byte Folded Spill
+; CHECK-NEXT: sub x11, x29, #40
+; CHECK-NEXT: stur x9, [x11, #-256] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x14
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: str x9, [x19, #96] // 8-byte Folded Spill
+; CHECK-NEXT: sub x11, x29, #32
+; CHECK-NEXT: stur x9, [x11, #-256] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x10
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: str x9, [x19, #104] // 8-byte Folded Spill
+; CHECK-NEXT: sub x11, x29, #24
+; CHECK-NEXT: stur x9, [x11, #-256] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x10
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: str x9, [x19, #112] // 8-byte Folded Spill
+; CHECK-NEXT: sub x11, x29, #16
+; CHECK-NEXT: stur x9, [x11, #-256] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x14
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: str x9, [x19, #120] // 8-byte Folded Spill
+; CHECK-NEXT: sub x11, x29, #8
+; CHECK-NEXT: stur x9, [x11, #-256] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x14
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: str x9, [x19, #128] // 8-byte Folded Spill
+; CHECK-NEXT: stur x9, [x29, #-256] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x10
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: str x9, [x19, #136] // 8-byte Folded Spill
+; CHECK-NEXT: stur x9, [x29, #-248] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, x10
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: str x9, [x19, #144] // 8-byte Folded Spill
+; CHECK-NEXT: stur x9, [x29, #-240] // 8-byte Folded Spill
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: subs x9, x9, #16
; CHECK-NEXT: mov sp, x9
@@ -160,29 +174,29 @@ define void @quux() #1 {
; CHECK-NEXT: mov x2, sp
; CHECK-NEXT: subs x10, x2, #16
; CHECK-NEXT: mov sp, x10
-; CHECK-NEXT: str x10, [x19, #152] // 8-byte Folded Spill
+; CHECK-NEXT: stur x10, [x29, #-232] // 8-byte Folded Spill
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: subs x11, x10, x14
; CHECK-NEXT: mov sp, x11
; CHECK-NEXT: mov x10, x11
-; CHECK-NEXT: str x10, [x19, #160] // 8-byte Folded Spill
+; CHECK-NEXT: stur x10, [x29, #-224] // 8-byte Folded Spill
; CHECK-NEXT: mov x0, sp
; CHECK-NEXT: subs x10, x0, #16
; CHECK-NEXT: mov sp, x10
-; CHECK-NEXT: str x10, [x19, #168] // 8-byte Folded Spill
+; CHECK-NEXT: stur x10, [x29, #-216] // 8-byte Folded Spill
; CHECK-NEXT: mov x17, sp
; CHECK-NEXT: subs x10, x17, #16
; CHECK-NEXT: mov sp, x10
-; CHECK-NEXT: str x10, [x19, #176] // 8-byte Folded Spill
+; CHECK-NEXT: stur x10, [x29, #-208] // 8-byte Folded Spill
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: subs x10, x10, x14
; CHECK-NEXT: stur x10, [x29, #-32] // 8-byte Folded Spill
; CHECK-NEXT: mov sp, x10
-; CHECK-NEXT: str x10, [x19, #184] // 8-byte Folded Spill
+; CHECK-NEXT: stur x10, [x29, #-200] // 8-byte Folded Spill
; CHECK-NEXT: mov x15, sp
; CHECK-NEXT: subs x10, x15, #16
; CHECK-NEXT: mov sp, x10
-; CHECK-NEXT: str x10, [x19, #192] // 8-byte Folded Spill
+; CHECK-NEXT: stur x10, [x29, #-192] // 8-byte Folded Spill
; CHECK-NEXT: mov x13, sp
; CHECK-NEXT: subs x10, x13, #16
; CHECK-NEXT: mov sp, x10
@@ -521,33 +535,46 @@ define void @quux() #1 {
; CHECK-NEXT: b .LBB0_3
; CHECK-NEXT: .LBB0_3: // %bb178
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr x9, [x19, #152] // 8-byte Folded Reload
-; CHECK-NEXT: ldr x8, [x19, #48] // 8-byte Folded Reload
-; CHECK-NEXT: ldr x10, [x19, #40] // 8-byte Folded Reload
-; CHECK-NEXT: ldr x11, [x19, #24] // 8-byte Folded Reload
-; CHECK-NEXT: ldr x12, [x19, #16] // 8-byte Folded Reload
+; CHECK-NEXT: ldur x9, [x29, #-232] // 8-byte Folded Reload
+; CHECK-NEXT: sub x8, x29, #80
+; CHECK-NEXT: ldur x8, [x8, #-256] // 8-byte Folded Reload
+; CHECK-NEXT: sub x10, x29, #88
+; CHECK-NEXT: ldur x10, [x10, #-256] // 8-byte Folded Reload
+; CHECK-NEXT: sub x11, x29, #104
+; CHECK-NEXT: ldur x11, [x11, #-256] // 8-byte Folded Reload
+; CHECK-NEXT: sub x12, x29, #112
+; CHECK-NEXT: ldur x12, [x12, #-256] // 8-byte Folded Reload
; CHECK-NEXT: ldur x13, [x29, #-152] // 8-byte Folded Reload
; CHECK-NEXT: ldur x14, [x29, #-160] // 8-byte Folded Reload
-; CHECK-NEXT: ldr x17, [x19, #80] // 8-byte Folded Reload
-; CHECK-NEXT: ldr x18, [x19, #72] // 8-byte Folded Reload
-; CHECK-NEXT: ldr x0, [x19, #64] // 8-byte Folded Reload
-; CHECK-NEXT: ldr x1, [x19, #56] // 8-byte Folded Reload
+; CHECK-NEXT: sub x15, x29, #48
+; CHECK-NEXT: ldur x17, [x15, #-256] // 8-byte Folded Reload
+; CHECK-NEXT: sub x15, x29, #56
+; CHECK-NEXT: ldur x18, [x15, #-256] // 8-byte Folded Reload
+; CHECK-NEXT: sub x15, x29, #64
+; CHECK-NEXT: ldur x0, [x15, #-256] // 8-byte Folded Reload
+; CHECK-NEXT: sub x15, x29, #72
+; CHECK-NEXT: ldur x1, [x15, #-256] // 8-byte Folded Reload
; CHECK-NEXT: ldur x15, [x29, #-168] // 8-byte Folded Reload
; CHECK-NEXT: ldur x2, [x29, #-176] // 8-byte Folded Reload
-; CHECK-NEXT: ldr x3, [x19, #112] // 8-byte Folded Reload
-; CHECK-NEXT: ldr x4, [x19, #104] // 8-byte Folded Reload
-; CHECK-NEXT: ldr x5, [x19, #96] // 8-byte Folded Reload
-; CHECK-NEXT: ldr x6, [x19, #88] // 8-byte Folded Reload
-; CHECK-NEXT: ldr x16, [x19, #144] // 8-byte Folded Reload
-; CHECK-NEXT: ldr x7, [x19, #136] // 8-byte Folded Reload
-; CHECK-NEXT: ldr x20, [x19, #128] // 8-byte Folded Reload
-; CHECK-NEXT: ldr x21, [x19, #120] // 8-byte Folded Reload
-; CHECK-NEXT: ldr x23, [x19, #192] // 8-byte Folded Reload
+; CHECK-NEXT: sub x16, x29, #16
+; CHECK-NEXT: ldur x3, [x16, #-256] // 8-byte Folded Reload
+; CHECK-NEXT: sub x16, x29, #24
+; CHECK-NEXT: ldur x4, [x16, #-256] // 8-byte Folded Reload
+; CHECK-NEXT: sub x16, x29, #32
+; CHECK-NEXT: ldur x5, [x16, #-256] // 8-byte Folded Reload
+; CHECK-NEXT: sub x16, x29, #40
+; CHECK-NEXT: ldur x6, [x16, #-256] // 8-byte Folded Reload
+; CHECK-NEXT: ldur x16, [x29, #-240] // 8-byte Folded Reload
+; CHECK-NEXT: ldur x7, [x29, #-248] // 8-byte Folded Reload
+; CHECK-NEXT: ldur x20, [x29, #-256] // 8-byte Folded Reload
+; CHECK-NEXT: sub x21, x29, #8
+; CHECK-NEXT: ldur x21, [x21, #-256] // 8-byte Folded Reload
+; CHECK-NEXT: ldur x23, [x29, #-192] // 8-byte Folded Reload
; CHECK-NEXT: ldur x22, [x29, #-184] // 8-byte Folded Reload
-; CHECK-NEXT: ldr x24, [x19, #184] // 8-byte Folded Reload
-; CHECK-NEXT: ldr x26, [x19, #168] // 8-byte Folded Reload
-; CHECK-NEXT: ldr x25, [x19, #176] // 8-byte Folded Reload
-; CHECK-NEXT: ldr x27, [x19, #160] // 8-byte Folded Reload
+; CHECK-NEXT: ldur x24, [x29, #-200] // 8-byte Folded Reload
+; CHECK-NEXT: ldur x26, [x29, #-216] // 8-byte Folded Reload
+; CHECK-NEXT: ldur x25, [x29, #-208] // 8-byte Folded Reload
+; CHECK-NEXT: ldur x27, [x29, #-224] // 8-byte Folded Reload
; CHECK-NEXT: ldr p0, [x27]
; CHECK-NEXT: ldr x27, [x26]
; CHECK-NEXT: mov p8.b, p0.b
diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
index 6038806dd2651..e463e833bdbde 100644
--- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
+++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
@@ -139,14 +139,13 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: str x9, [x19]
-; CHECK-NEXT: add x9, x19, #0
-; CHECK-NEXT: strh wzr, [x19, #10]
-; CHECK-NEXT: str wzr, [x19, #12]
-; CHECK-NEXT: strh w8, [x19, #8]
+; CHECK-NEXT: stur x9, [x29, #-80]
+; CHECK-NEXT: sub x9, x29, #80
+; CHECK-NEXT: sturh wzr, [x29, #-70]
+; CHECK-NEXT: stur wzr, [x29, #-68]
+; CHECK-NEXT: sturh w8, [x29, #-72]
; CHECK-NEXT: msr TPIDR2_EL0, x9
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x20, x0, #0x1
@@ -161,7 +160,7 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za
; CHECK-NEXT: .LBB3_4:
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: add x0, x19, #0
+; CHECK-NEXT: sub x0, x29, #80
; CHECK-NEXT: cbnz x8, .LBB3_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: bl __arm_tpidr2_restore
diff --git a/llvm/test/CodeGen/AArch64/stack-hazard.ll b/llvm/test/CodeGen/AArch64/stack-hazard.ll
index f57b446f39e44..e169b199733bd 100644
--- a/llvm/test/CodeGen/AArch64/stack-hazard.ll
+++ b/llvm/test/CodeGen/AArch64/stack-hazard.ll
@@ -2831,13 +2831,12 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK0-NEXT: mov x9, sp
; CHECK0-NEXT: mov w20, w0
; CHECK0-NEXT: msub x9, x8, x8, x9
-; CHECK0-NEXT: mov x19, sp
; CHECK0-NEXT: mov sp, x9
-; CHECK0-NEXT: str x9, [x19]
-; CHECK0-NEXT: add x9, x19, #0
-; CHECK0-NEXT: strh wzr, [x19, #10]
-; CHECK0-NEXT: str wzr, [x19, #12]
-; CHECK0-NEXT: strh w8, [x19, #8]
+; CHECK0-NEXT: stur x9, [x29, #-80]
+; CHECK0-NEXT: sub x9, x29, #80
+; CHECK0-NEXT: sturh wzr, [x29, #-70]
+; CHECK0-NEXT: stur wzr, [x29, #-68]
+; CHECK0-NEXT: sturh w8, [x29, #-72]
; CHECK0-NEXT: msr TPIDR2_EL0, x9
; CHECK0-NEXT: .cfi_offset vg, -32
; CHECK0-NEXT: smstop sm
@@ -2846,7 +2845,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK0-NEXT: .cfi_restore vg
; CHECK0-NEXT: smstart za
; CHECK0-NEXT: mrs x8, TPIDR2_EL0
-; CHECK0-NEXT: add x0, x19, #0
+; CHECK0-NEXT: sub x0, x29, #80
; CHECK0-NEXT: cbnz x8, .LBB33_2
; CHECK0-NEXT: // %bb.1: // %entry
; CHECK0-NEXT: bl __arm_tpidr2_restore
diff --git a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
index a52daa5a7741e..c5cf4593cc86d 100644
--- a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
+++ b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
@@ -547,13 +547,12 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: mov w20, w0
; CHECK-NEXT: msub x9, x8, x8, x9
-; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: str x9, [x19]
-; CHECK-NEXT: add x9, x19, #0
-; CHECK-NEXT: strh wzr, [x19, #10]
-; CHECK-NEXT: str wzr, [x19, #12]
-; CHECK-NEXT: strh w8, [x19, #8]
+; CHECK-NEXT: stur x9, [x29, #-80]
+; CHECK-NEXT: sub x9, x29, #80
+; CHECK-NEXT: sturh wzr, [x29, #-70]
+; CHECK-NEXT: stur wzr, [x29, #-68]
+; CHECK-NEXT: sturh w8, [x29, #-72]
; CHECK-NEXT: msr TPIDR2_EL0, x9
; CHECK-NEXT: .cfi_offset vg, -32
; CHECK-NEXT: smstop sm
@@ -562,7 +561,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK-NEXT: .cfi_restore vg
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: add x0, x19, #0
+; CHECK-NEXT: sub x0, x29, #80
; CHECK-NEXT: cbnz x8, .LBB8_2
; CHECK-NEXT: // %bb.1: // %entry
; CHECK-NEXT: bl __arm_tpidr2_restore
More information about the llvm-commits
mailing list