[llvm] 702c3f5 - [SME] Don't scavenge a spillslot in callee-save area in presence of streaming-mode changes.
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 4 03:15:13 PDT 2023
Author: Sander de Smalen
Date: 2023-09-04T10:14:44Z
New Revision: 702c3f56d3868831f9aff104b53d6bde0ed2e5f1
URL: https://github.com/llvm/llvm-project/commit/702c3f56d3868831f9aff104b53d6bde0ed2e5f1
DIFF: https://github.com/llvm/llvm-project/commit/702c3f56d3868831f9aff104b53d6bde0ed2e5f1.diff
LOG: [SME] Don't scavenge a spillslot in callee-save area in presence of streaming-mode changes.
If no frame-pointer is available and the compiler has scavenged a
spill-slot in the callee-save area, the compiler may be forced to emit an
'addvl' inside the streaming-mode-changing call sequence when it needs to
fill (reload) an FP register being passed to the call.
We can avoid this entirely by disabling stack-slot scavenging when there
are streaming-mode-changing call-sequences in the function.
Reviewed By: david-arm
Differential Revision: https://reviews.llvm.org/D159196
Added:
llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll
Modified:
llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
llvm/test/CodeGen/AArch64/sme-streaming-body.ll
llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index a45b509fcb4a3b..68e68449d4073b 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -427,6 +427,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+
// Win64 EH requires a frame pointer if funclets are present, as the locals
// are accessed off the frame pointer in both the parent function and the
// funclets.
@@ -3278,6 +3279,12 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
bool AArch64FrameLowering::enableStackSlotScavenging(
const MachineFunction &MF) const {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ // If the function has streaming-mode changes, don't scavenge a
+ // spillslot in the callee-save area, as that might require an
+ // 'addvl' in the streaming-mode-changing call-sequence when the
+ // function doesn't use a FP.
+ if (AFI->hasStreamingModeChanges() && !hasFP(MF))
+ return false;
return AFI->hasCalleeSaveStackFreeSpace();
}
diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
index b3d97d25ff6a40..848cc9903b3441 100644
--- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
+++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
@@ -17,15 +17,15 @@ define double @nonstreaming_caller_streaming_callee(double %x) nounwind noinline
; CHECK-FISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-FISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK-FISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
-; CHECK-FISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
+; CHECK-FISEL-NEXT: str d0, [sp] // 8-byte Folded Spill
; CHECK-FISEL-NEXT: smstart sm
-; CHECK-FISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
+; CHECK-FISEL-NEXT: ldr d0, [sp] // 8-byte Folded Reload
; CHECK-FISEL-NEXT: bl streaming_callee
-; CHECK-FISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill
+; CHECK-FISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-FISEL-NEXT: smstop sm
; CHECK-FISEL-NEXT: adrp x8, .LCPI0_0
; CHECK-FISEL-NEXT: ldr d0, [x8, :lo12:.LCPI0_0]
-; CHECK-FISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload
+; CHECK-FISEL-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload
; CHECK-FISEL-NEXT: fadd d0, d1, d0
; CHECK-FISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-FISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
@@ -43,15 +43,15 @@ define double @nonstreaming_caller_streaming_callee(double %x) nounwind noinline
; CHECK-GISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-GISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK-GISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
-; CHECK-GISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
+; CHECK-GISEL-NEXT: str d0, [sp] // 8-byte Folded Spill
; CHECK-GISEL-NEXT: smstart sm
-; CHECK-GISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
+; CHECK-GISEL-NEXT: ldr d0, [sp] // 8-byte Folded Reload
; CHECK-GISEL-NEXT: bl streaming_callee
-; CHECK-GISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill
+; CHECK-GISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-GISEL-NEXT: smstop sm
; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
; CHECK-GISEL-NEXT: fmov d0, x8
-; CHECK-GISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload
+; CHECK-GISEL-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload
; CHECK-GISEL-NEXT: fadd d0, d1, d0
; CHECK-GISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-GISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
@@ -76,15 +76,15 @@ define double @streaming_caller_nonstreaming_callee(double %x) nounwind noinline
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
-; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
+; CHECK-COMMON-NEXT: str d0, [sp] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: smstop sm
-; CHECK-COMMON-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
+; CHECK-COMMON-NEXT: ldr d0, [sp] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: bl normal_callee
-; CHECK-COMMON-NEXT: str d0, [sp, #88] // 8-byte Folded Spill
+; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: smstart sm
; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
; CHECK-COMMON-NEXT: fmov d0, x8
-; CHECK-COMMON-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload
+; CHECK-COMMON-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: fadd d0, d1, d0
; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
@@ -102,32 +102,32 @@ entry:
define double @locally_streaming_caller_normal_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_body" {
; CHECK-COMMON-LABEL: locally_streaming_caller_normal_callee:
; CHECK-COMMON: // %bb.0:
-; CHECK-COMMON-NEXT: sub sp, sp, #96
-; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
-; CHECK-COMMON-NEXT: str d0, [sp, #88] // 8-byte Folded Spill
+; CHECK-COMMON-NEXT: sub sp, sp, #112
+; CHECK-COMMON-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
+; CHECK-COMMON-NEXT: str d0, [sp, #24] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: smstart sm
; CHECK-COMMON-NEXT: smstop sm
-; CHECK-COMMON-NEXT: ldr d0, [sp, #88] // 8-byte Folded Reload
+; CHECK-COMMON-NEXT: ldr d0, [sp, #24] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: bl normal_callee
-; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
+; CHECK-COMMON-NEXT: str d0, [sp, #16] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: smstart sm
; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
; CHECK-COMMON-NEXT: fmov d0, x8
-; CHECK-COMMON-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload
+; CHECK-COMMON-NEXT: ldr d1, [sp, #16] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: fadd d0, d1, d0
-; CHECK-COMMON-NEXT: str d0, [sp] // 8-byte Folded Spill
+; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: smstop sm
-; CHECK-COMMON-NEXT: ldr d0, [sp] // 8-byte Folded Reload
-; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
-; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT: add sp, sp, #96
+; CHECK-COMMON-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
+; CHECK-COMMON-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: add sp, sp, #112
; CHECK-COMMON-NEXT: ret
%call = call double @normal_callee(double %x);
%add = fadd double %call, 4.200000e+01
@@ -378,23 +378,25 @@ define double @frem_call_za(double %a, double %b) "aarch64_pstate_za_shared" nou
define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounwind {
; CHECK-COMMON-LABEL: frem_call_sm:
; CHECK-COMMON: // %bb.0:
-; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
-; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
-; CHECK-COMMON-NEXT: stp s0, s1, [sp, #72] // 8-byte Folded Spill
+; CHECK-COMMON-NEXT: sub sp, sp, #96
+; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: smstop sm
-; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #72] // 8-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: bl fmodf
-; CHECK-COMMON-NEXT: str s0, [sp, #76] // 4-byte Folded Spill
+; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
; CHECK-COMMON-NEXT: smstart sm
-; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT: ldr s0, [sp, #76] // 4-byte Folded Reload
-; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: add sp, sp, #96
; CHECK-COMMON-NEXT: ret
%res = frem float %a, %b
ret float %res
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll
index ff23ed9dfe5a2d..74dd66fedceb77 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s
declare void @normal_callee();
declare void @streaming_callee() "aarch64_pstate_sm_enabled";
@@ -237,25 +237,27 @@ declare void @use_ptr(ptr) "aarch64_pstate_sm_compatible"
define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_pstate_sm_body" {
; CHECK-LABEL: call_to_intrinsic_without_chain:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
-; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: str d0, [sp, #72] // 8-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: smstart sm
; CHECK-NEXT: smstop sm
-; CHECK-NEXT: ldr d0, [sp, #72] // 8-byte Folded Reload
+; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: bl cos
-; CHECK-NEXT: str d0, [sp, #72] // 8-byte Folded Spill
+; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: smstart sm
; CHECK-NEXT: smstop sm
-; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ldr d0, [sp, #72] // 8-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #96
; CHECK-NEXT: ret
entry:
%0 = call fast double @llvm.cos.f64(double %x)
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
index 65521a0edc6d26..97e9cd6f97098d 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
; This file tests the following combinations related to streaming-enabled functions:
; [ ] N -> S (Normal -> Streaming)
@@ -313,19 +313,18 @@ define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_psta
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
-; CHECK-NEXT: str d0, [sp, #88] // 8-byte Folded Spill
+; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: smstop sm
-; CHECK-NEXT: ldr d0, [sp, #88] // 8-byte Folded Reload
+; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: bl cos
-; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
+; CHECK-NEXT: str d0, [sp] // 8-byte Folded Spill
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: ldr d0, [sp, #88] // 8-byte Folded Reload
-; CHECK-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload
+; CHECK-NEXT: ldp d1, d0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: fadd d0, d1, d0
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: fadd d0, d1, d0
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #96
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll
new file mode 100644
index 00000000000000..e4cd4d6c05c5ee
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64"
+
+; This function would normally scavenge a stackslot from the callee-save
+; area, which would lead to spilling 's0' to that stackslot before the
+; smstop and filling it with 'addvl + <offset>' after the smstop because
+; the frame-pointer is not available.
+; This would not be valid, since the vector-length has changed so 'addvl'
+; cannot be used. This is testing that the stackslot-scavenging is disabled
+; when there are any streaming-mode-changing call-sequences in the
+; function.
+define void @test_no_stackslot_scavenging(float %f) #0 {
+; CHECK-LABEL: test_no_stackslot_scavenging:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT: stp x30, x24, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
+; CHECK-NEXT: bl use_f
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: ldp x30, x24, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ %ptr = alloca <vscale x 16 x i8>
+ call void asm sideeffect "", "~{x24}"() nounwind
+ call void @use_f(float %f)
+ ret void
+}
+
+declare void @use_f(float)
+
+attributes #0 = { nounwind "target-features"="+sme" "aarch64_pstate_sm_enabled" }
More information about the llvm-commits
mailing list