[llvm] [AArch64][SME] Fix frame lowering not using a base pointer for SME functions. (PR #91643)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 9 11:48:32 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Amara Emerson (aemerson)
<details>
<summary>Changes</summary>
The existing code is checking for the presence of the +sve subtarget feature
when deciding to use a base pointer for the function, but this check doesn't
work when only +sme is used.
rdar://126878490
---
Patch is 20.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/91643.diff
5 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp (+2-1)
- (modified) llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll (+16-8)
- (modified) llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll (+26-20)
- (modified) llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll (+8-4)
- (modified) llvm/test/CodeGen/AArch64/sme-zt0-state.ll (+26-18)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index ad29003f1e817..a192e01f69b20 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -552,7 +552,8 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
if (hasStackRealignment(MF))
return true;
- if (MF.getSubtarget<AArch64Subtarget>().hasSVE()) {
+ auto &ST = MF.getSubtarget<AArch64Subtarget>();
+ if (ST.hasSVEorSME()) {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
// Frames that have variable sized objects and scalable SVE objects,
// should always use a basepointer.
diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
index 254e37e836cbb..50d04e39f3527 100644
--- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
+++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
@@ -214,7 +214,8 @@ declare double @za_shared_callee(double) "aarch64_inout_za"
define double @za_new_caller_to_za_shared_callee(double %x) nounwind noinline optnone "aarch64_new_za"{
; CHECK-COMMON-LABEL: za_new_caller_to_za_shared_callee:
; CHECK-COMMON: // %bb.0: // %prelude
-; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: mov x29, sp
; CHECK-COMMON-NEXT: sub sp, sp, #16
; CHECK-COMMON-NEXT: rdsvl x8, #1
@@ -240,7 +241,8 @@ define double @za_new_caller_to_za_shared_callee(double %x) nounwind noinline o
; CHECK-COMMON-NEXT: fadd d0, d0, d1
; CHECK-COMMON-NEXT: smstop za
; CHECK-COMMON-NEXT: mov sp, x29
-; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ret
entry:
%call = call double @za_shared_callee(double %x)
@@ -251,7 +253,8 @@ entry:
define double @za_shared_caller_to_za_none_callee(double %x) nounwind noinline optnone "aarch64_inout_za"{
; CHECK-COMMON-LABEL: za_shared_caller_to_za_none_callee:
; CHECK-COMMON: // %bb.0: // %entry
-; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: mov x29, sp
; CHECK-COMMON-NEXT: sub sp, sp, #16
; CHECK-COMMON-NEXT: rdsvl x8, #1
@@ -279,7 +282,8 @@ define double @za_shared_caller_to_za_none_callee(double %x) nounwind noinline
; CHECK-COMMON-NEXT: fmov d1, x8
; CHECK-COMMON-NEXT: fadd d0, d0, d1
; CHECK-COMMON-NEXT: mov sp, x29
-; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ret
entry:
%call = call double @normal_callee(double %x)
@@ -291,7 +295,8 @@ entry:
define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_inout_za" nounwind {
; CHECK-COMMON-LABEL: f128_call_za:
; CHECK-COMMON: // %bb.0:
-; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: mov x29, sp
; CHECK-COMMON-NEXT: sub sp, sp, #16
; CHECK-COMMON-NEXT: rdsvl x8, #1
@@ -314,7 +319,8 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_inout_za" nounwind {
; CHECK-COMMON-NEXT: .LBB8_2:
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
; CHECK-COMMON-NEXT: mov sp, x29
-; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ret
%res = fadd fp128 %a, %b
ret fp128 %res
@@ -353,7 +359,8 @@ define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounw
define double @frem_call_za(double %a, double %b) "aarch64_inout_za" nounwind {
; CHECK-COMMON-LABEL: frem_call_za:
; CHECK-COMMON: // %bb.0:
-; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: mov x29, sp
; CHECK-COMMON-NEXT: sub sp, sp, #16
; CHECK-COMMON-NEXT: rdsvl x8, #1
@@ -376,7 +383,8 @@ define double @frem_call_za(double %a, double %b) "aarch64_inout_za" nounwind {
; CHECK-COMMON-NEXT: .LBB10_2:
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
; CHECK-COMMON-NEXT: mov sp, x29
-; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ret
%res = frem double %a, %b
ret double %res
diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
index 9d635f0b88f19..92baf0d223a4e 100644
--- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
+++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
@@ -8,7 +8,8 @@ declare float @llvm.cos.f32(float)
define void @test_lazy_save_1_callee() nounwind "aarch64_inout_za" {
; CHECK-LABEL: test_lazy_save_1_callee:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
@@ -31,7 +32,8 @@ define void @test_lazy_save_1_callee() nounwind "aarch64_inout_za" {
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
call void @private_za_callee()
ret void
@@ -41,20 +43,21 @@ define void @test_lazy_save_1_callee() nounwind "aarch64_inout_za" {
define void @test_lazy_save_2_callees() nounwind "aarch64_inout_za" {
; CHECK-LABEL: test_lazy_save_2_callees:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #-48]! // 16-byte Folded Spill
+; CHECK-NEXT: str x21, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: rdsvl x19, #1
+; CHECK-NEXT: rdsvl x20, #1
; CHECK-NEXT: mov x8, sp
-; CHECK-NEXT: msub x8, x19, x19, x8
+; CHECK-NEXT: msub x8, x20, x20, x8
; CHECK-NEXT: mov sp, x8
-; CHECK-NEXT: sub x20, x29, #16
+; CHECK-NEXT: sub x21, x29, #16
; CHECK-NEXT: stur wzr, [x29, #-4]
; CHECK-NEXT: sturh wzr, [x29, #-6]
; CHECK-NEXT: stur x8, [x29, #-16]
-; CHECK-NEXT: sturh w19, [x29, #-8]
-; CHECK-NEXT: msr TPIDR2_EL0, x20
+; CHECK-NEXT: sturh w20, [x29, #-8]
+; CHECK-NEXT: msr TPIDR2_EL0, x21
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
@@ -64,8 +67,8 @@ define void @test_lazy_save_2_callees() nounwind "aarch64_inout_za" {
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEXT: sturh w19, [x29, #-8]
-; CHECK-NEXT: msr TPIDR2_EL0, x20
+; CHECK-NEXT: sturh w20, [x29, #-8]
+; CHECK-NEXT: msr TPIDR2_EL0, x21
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
@@ -76,8 +79,9 @@ define void @test_lazy_save_2_callees() nounwind "aarch64_inout_za" {
; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
+; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x21, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp], #48 // 16-byte Folded Reload
; CHECK-NEXT: ret
call void @private_za_callee()
call void @private_za_callee()
@@ -88,7 +92,8 @@ define void @test_lazy_save_2_callees() nounwind "aarch64_inout_za" {
define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_inout_za" {
; CHECK-LABEL: test_lazy_save_expanded_intrinsic:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
@@ -111,7 +116,8 @@ define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_inou
; CHECK-NEXT: .LBB2_2:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
%res = call float @llvm.cos.f32(float %a)
ret float %res
@@ -127,7 +133,7 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: add x29, sp, #64
-; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
@@ -140,13 +146,13 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za
; CHECK-NEXT: sturh w8, [x29, #-72]
; CHECK-NEXT: msr TPIDR2_EL0, x10
; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: and x19, x0, #0x1
-; CHECK-NEXT: tbz w19, #0, .LBB3_2
+; CHECK-NEXT: and x20, x0, #0x1
+; CHECK-NEXT: tbz w20, #0, .LBB3_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB3_2:
; CHECK-NEXT: bl private_za_callee
-; CHECK-NEXT: tbz w19, #0, .LBB3_4
+; CHECK-NEXT: tbz w20, #0, .LBB3_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB3_4:
@@ -159,8 +165,8 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za
; CHECK-NEXT: .LBB3_6:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: sub sp, x29, #64
+; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
index cd7460b177c4b..095e84cda1085 100644
--- a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
@@ -7,7 +7,8 @@ declare void @private_za_callee()
define void @disable_tailcallopt() "aarch64_inout_za" nounwind {
; CHECK-LABEL: disable_tailcallopt:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
@@ -30,7 +31,8 @@ define void @disable_tailcallopt() "aarch64_inout_za" nounwind {
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
tail call void @private_za_callee()
ret void
@@ -40,7 +42,8 @@ define void @disable_tailcallopt() "aarch64_inout_za" nounwind {
define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_inout_za" nounwind {
; CHECK-LABEL: f128_call_za:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
@@ -63,7 +66,8 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_inout_za" nounwind {
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
%res = fadd fp128 %a, %b
ret fp128 %res
diff --git a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
index 7f40b5e7e1344..884096743e034 100644
--- a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
+++ b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
@@ -34,7 +34,7 @@ define void @za_zt0_shared_caller_no_state_callee() "aarch64_inout_za" "aarch64_
; CHECK-LABEL: za_zt0_shared_caller_no_state_callee:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #80
; CHECK-NEXT: rdsvl x8, #1
@@ -42,16 +42,16 @@ define void @za_zt0_shared_caller_no_state_callee() "aarch64_inout_za" "aarch64_
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: sub x10, x29, #16
-; CHECK-NEXT: sub x19, x29, #80
+; CHECK-NEXT: sub x20, x29, #80
; CHECK-NEXT: stur wzr, [x29, #-4]
; CHECK-NEXT: sturh wzr, [x29, #-6]
; CHECK-NEXT: stur x9, [x29, #-16]
; CHECK-NEXT: sturh w8, [x29, #-8]
; CHECK-NEXT: msr TPIDR2_EL0, x10
-; CHECK-NEXT: str zt0, [x19]
+; CHECK-NEXT: str zt0, [x20]
; CHECK-NEXT: bl callee
; CHECK-NEXT: smstart za
-; CHECK-NEXT: ldr zt0, [x19]
+; CHECK-NEXT: ldr zt0, [x20]
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB1_2
@@ -60,7 +60,7 @@ define void @za_zt0_shared_caller_no_state_callee() "aarch64_inout_za" "aarch64_
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
call void @callee();
@@ -88,22 +88,22 @@ define void @za_zt0_shared_caller_za_shared_callee() "aarch64_inout_za" "aarch64
; CHECK-LABEL: za_zt0_shared_caller_za_shared_callee:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #80
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: msub x8, x8, x8, x9
; CHECK-NEXT: mov sp, x8
-; CHECK-NEXT: sub x19, x29, #80
+; CHECK-NEXT: sub x20, x29, #80
; CHECK-NEXT: stur wzr, [x29, #-4]
; CHECK-NEXT: sturh wzr, [x29, #-6]
; CHECK-NEXT: stur x8, [x29, #-16]
-; CHECK-NEXT: str zt0, [x19]
+; CHECK-NEXT: str zt0, [x20]
; CHECK-NEXT: bl callee
-; CHECK-NEXT: ldr zt0, [x19]
+; CHECK-NEXT: ldr zt0, [x20]
; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
call void @callee() "aarch64_inout_za";
@@ -114,7 +114,8 @@ define void @za_zt0_shared_caller_za_shared_callee() "aarch64_inout_za" "aarch64
define void @za_zt0_shared_caller_za_zt0_shared_callee() "aarch64_inout_za" "aarch64_in_zt0" nounwind {
; CHECK-LABEL: za_zt0_shared_caller_za_zt0_shared_callee:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
@@ -126,7 +127,8 @@ define void @za_zt0_shared_caller_za_zt0_shared_callee() "aarch64_inout_za" "aar
; CHECK-NEXT: stur x8, [x29, #-16]
; CHECK-NEXT: bl callee
; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
call void @callee() "aarch64_inout_za" "aarch64_in_zt0";
ret void;
@@ -192,7 +194,8 @@ define void @zt0_new_caller() "aarch64_new_zt0" nounwind {
define void @new_za_zt0_caller() "aarch64_new_za" "aarch64_new_zt0" nounwind {
; CHECK-LABEL: new_za_zt0_caller:
; CHECK: // %bb.0: // %prelude
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #80
; CHECK-NEXT: rdsvl x8, #1
@@ -217,7 +220,8 @@ define void @new_za_zt0_caller() "aarch64_new_za" "aarch64_new_zt0" nounwind {
; CHECK-NEXT: bl callee
; CHECK-NEXT: smstop za
; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
call void @callee() "aarch64_inout_za" "aarch64_in_zt0";
ret void;
@@ -227,7 +231,8 @@ define void @new_za_zt0_caller() "aarch64_new_za" "aarch64_new_zt0" nounwind {
define void @new_za_shared_zt0_caller() "aarch64_new_za" "aarch64_in_zt0" nounwind {
; CHECK-LABEL: new_za_shared_zt0_caller:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
@@ -240,7 +245,8 @@ define void @new_za_shared_zt0_caller() "aarch64_new_za" "aarch64_in_zt0" nounwi
; CHECK-NEXT: zero {za}
; CHECK-NEXT: bl callee
; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
call void @callee() "aarch64_inout_za" "aarch64_in_zt0";
ret void;
@@ -250,7 +256,8 @@ define void @new_za_shared_zt0_caller() "aarch64_new_za" "aarch64_in_zt0" nounwi
define void @shared_za_new_zt0() "aarch64_inout_za" "aarch64_new_zt0" nounwind {
; CHECK-LABEL: shared_za_new_zt0:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
; CHE...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/91643
More information about the llvm-commits
mailing list