[llvm] [AArch64][SME] Add missing SMStartStop regmasks (PR #68458)
Jon Roelofs via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 6 16:27:13 PDT 2023
https://github.com/jroelofs created https://github.com/llvm/llvm-project/pull/68458
Without these, the register allocator doesn't know they clobber callee-saved NEON regs (among other things).
>From 5042481ea786929841096e9a2c48cfe4b789a032 Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Fri, 6 Oct 2023 16:15:09 -0700
Subject: [PATCH] [AArch64][SME] Add missing SMSTartStop regmasks
Without these, the register allocator doesn't know they clobber callee-saved
NEON regs (among other things).
---
.../Target/AArch64/AArch64ISelLowering.cpp | 10 +-
.../AArch64/sme-disable-gisel-fisel.ll | 112 ++++++++++++------
.../CodeGen/AArch64/sme-lazy-save-call.ll | 84 ++++++++-----
.../AArch64/sme-shared-za-interface.ll | 52 +++++---
.../CodeGen/AArch64/sme-toggle-pstateza.ll | 17 +++
5 files changed, 191 insertions(+), 84 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9cda43e58d27a43..b073d0347dbcaf9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4819,6 +4819,7 @@ SDValue AArch64TargetLowering::getPStateSM(SelectionDAG &DAG, SDValue Chain,
SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SelectionDAG &DAG) const {
+ const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
unsigned IntNo = Op.getConstantOperandVal(1);
SDLoc DL(Op);
switch (IntNo) {
@@ -4845,13 +4846,15 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
AArch64ISD::SMSTART, DL, MVT::Other,
Op->getOperand(0), // Chain
DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
+ DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64),
+ DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask()));
case Intrinsic::aarch64_sme_za_disable:
return DAG.getNode(
AArch64ISD::SMSTOP, DL, MVT::Other,
Op->getOperand(0), // Chain
DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
+ DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64),
+ DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask()));
}
}
@@ -7850,7 +7853,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
Result = DAG.getNode(
AArch64ISD::SMSTART, DL, MVT::Other, Result,
DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
+ DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64),
+ DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask()));
// Conditionally restore the lazy save using a pseudo node.
unsigned FI = FuncInfo->getLazySaveTPIDR2Obj();
diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
index 885cd7b0b0947da..fc1104412519583 100644
--- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
+++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
@@ -212,14 +212,19 @@ declare double @za_shared_callee(double) "aarch64_pstate_za_shared"
define double @za_new_caller_to_za_shared_callee(double %x) nounwind noinline optnone "aarch64_pstate_za_new"{
; CHECK-COMMON-LABEL: za_new_caller_to_za_shared_callee:
; CHECK-COMMON: // %bb.0: // %prelude
-; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-COMMON-NEXT: mov x29, sp
-; CHECK-COMMON-NEXT: sub sp, sp, #16
+; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: add x29, sp, #64
+; CHECK-COMMON-NEXT: sub sp, sp, #32
+; CHECK-COMMON-NEXT: stur d0, [x29, #-88] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: rdsvl x8, #1
; CHECK-COMMON-NEXT: mov x9, sp
; CHECK-COMMON-NEXT: msub x8, x8, x8, x9
; CHECK-COMMON-NEXT: mov sp, x8
-; CHECK-COMMON-NEXT: stur x8, [x29, #-16]
+; CHECK-COMMON-NEXT: stur x8, [x29, #-80]
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
; CHECK-COMMON-NEXT: cbz x8, .LBB6_2
; CHECK-COMMON-NEXT: b .LBB6_1
@@ -230,13 +235,20 @@ define double @za_new_caller_to_za_shared_callee(double %x) nounwind noinline o
; CHECK-COMMON-NEXT: .LBB6_2: // %entry
; CHECK-COMMON-NEXT: smstart za
; CHECK-COMMON-NEXT: zero {za}
+; CHECK-COMMON-NEXT: ldur d0, [x29, #-88] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: bl za_shared_callee
; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
; CHECK-COMMON-NEXT: fmov d1, x8
; CHECK-COMMON-NEXT: fadd d0, d0, d1
+; CHECK-COMMON-NEXT: stur d0, [x29, #-96] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: smstop za
-; CHECK-COMMON-NEXT: mov sp, x29
-; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldur d0, [x29, #-96] // 8-byte Folded Reload
+; CHECK-COMMON-NEXT: sub sp, x29, #64
+; CHECK-COMMON-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ret
entry:
%call = call double @za_shared_callee(double %x)
@@ -247,21 +259,26 @@ entry:
define double @za_shared_caller_to_za_none_callee(double %x) nounwind noinline optnone "aarch64_pstate_za_shared"{
; CHECK-COMMON-LABEL: za_shared_caller_to_za_none_callee:
; CHECK-COMMON: // %bb.0: // %entry
-; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-COMMON-NEXT: mov x29, sp
-; CHECK-COMMON-NEXT: sub sp, sp, #16
+; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: add x29, sp, #64
+; CHECK-COMMON-NEXT: sub sp, sp, #32
; CHECK-COMMON-NEXT: rdsvl x8, #1
; CHECK-COMMON-NEXT: mov x9, sp
; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
; CHECK-COMMON-NEXT: mov sp, x9
-; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
-; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
-; CHECK-COMMON-NEXT: sub x8, x29, #16
+; CHECK-COMMON-NEXT: stur x9, [x29, #-80]
+; CHECK-COMMON-NEXT: sturh w8, [x29, #-72]
+; CHECK-COMMON-NEXT: sub x8, x29, #80
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8
; CHECK-COMMON-NEXT: bl normal_callee
+; CHECK-COMMON-NEXT: stur d0, [x29, #-88] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: smstart za
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-COMMON-NEXT: sub x0, x29, #16
+; CHECK-COMMON-NEXT: sub x0, x29, #80
; CHECK-COMMON-NEXT: cbz x8, .LBB7_1
; CHECK-COMMON-NEXT: b .LBB7_2
; CHECK-COMMON-NEXT: .LBB7_1: // %entry
@@ -270,10 +287,15 @@ define double @za_shared_caller_to_za_none_callee(double %x) nounwind noinline
; CHECK-COMMON-NEXT: .LBB7_2: // %entry
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
-; CHECK-COMMON-NEXT: fmov d1, x8
-; CHECK-COMMON-NEXT: fadd d0, d0, d1
-; CHECK-COMMON-NEXT: mov sp, x29
-; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: fmov d0, x8
+; CHECK-COMMON-NEXT: ldur d1, [x29, #-88] // 8-byte Folded Reload
+; CHECK-COMMON-NEXT: fadd d0, d1, d0
+; CHECK-COMMON-NEXT: sub sp, x29, #64
+; CHECK-COMMON-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ret
entry:
%call = call double @normal_callee(double %x)
@@ -285,28 +307,38 @@ entry:
define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_pstate_za_shared" nounwind {
; CHECK-COMMON-LABEL: f128_call_za:
; CHECK-COMMON: // %bb.0:
-; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-COMMON-NEXT: mov x29, sp
-; CHECK-COMMON-NEXT: sub sp, sp, #16
+; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: add x29, sp, #64
+; CHECK-COMMON-NEXT: sub sp, sp, #32
; CHECK-COMMON-NEXT: rdsvl x8, #1
; CHECK-COMMON-NEXT: mov x9, sp
; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
; CHECK-COMMON-NEXT: mov sp, x9
-; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
-; CHECK-COMMON-NEXT: sub x9, x29, #16
-; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
+; CHECK-COMMON-NEXT: stur x9, [x29, #-80]
+; CHECK-COMMON-NEXT: sub x9, x29, #80
+; CHECK-COMMON-NEXT: sturh w8, [x29, #-72]
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x9
; CHECK-COMMON-NEXT: bl __addtf3
+; CHECK-COMMON-NEXT: stur q0, [x29, #-96] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: smstart za
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-COMMON-NEXT: sub x0, x29, #16
+; CHECK-COMMON-NEXT: sub x0, x29, #80
; CHECK-COMMON-NEXT: cbnz x8, .LBB8_2
; CHECK-COMMON-NEXT: // %bb.1:
; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
; CHECK-COMMON-NEXT: .LBB8_2:
+; CHECK-COMMON-NEXT: ldur q0, [x29, #-96] // 16-byte Folded Reload
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-COMMON-NEXT: mov sp, x29
-; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: sub sp, x29, #64
+; CHECK-COMMON-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ret
%res = fadd fp128 %a, %b
ret fp128 %res
@@ -345,28 +377,38 @@ define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounw
define double @frem_call_za(double %a, double %b) "aarch64_pstate_za_shared" nounwind {
; CHECK-COMMON-LABEL: frem_call_za:
; CHECK-COMMON: // %bb.0:
-; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-COMMON-NEXT: mov x29, sp
-; CHECK-COMMON-NEXT: sub sp, sp, #16
+; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: add x29, sp, #64
+; CHECK-COMMON-NEXT: sub sp, sp, #32
; CHECK-COMMON-NEXT: rdsvl x8, #1
; CHECK-COMMON-NEXT: mov x9, sp
; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
; CHECK-COMMON-NEXT: mov sp, x9
-; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
-; CHECK-COMMON-NEXT: sub x9, x29, #16
-; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
+; CHECK-COMMON-NEXT: stur x9, [x29, #-80]
+; CHECK-COMMON-NEXT: sub x9, x29, #80
+; CHECK-COMMON-NEXT: sturh w8, [x29, #-72]
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x9
; CHECK-COMMON-NEXT: bl fmod
+; CHECK-COMMON-NEXT: stur d0, [x29, #-88] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: smstart za
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-COMMON-NEXT: sub x0, x29, #16
+; CHECK-COMMON-NEXT: sub x0, x29, #80
; CHECK-COMMON-NEXT: cbnz x8, .LBB10_2
; CHECK-COMMON-NEXT: // %bb.1:
; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
; CHECK-COMMON-NEXT: .LBB10_2:
+; CHECK-COMMON-NEXT: ldur d0, [x29, #-88] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-COMMON-NEXT: mov sp, x29
-; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: sub sp, x29, #64
+; CHECK-COMMON-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ret
%res = frem double %a, %b
ret double %res
diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
index 6757af01278bd9b..a46820b3a08ba05 100644
--- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
+++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
@@ -9,28 +9,36 @@ declare float @llvm.cos.f32(float)
define void @test_lazy_save_1_callee() nounwind "aarch64_pstate_za_shared" {
; CHECK-LABEL: test_lazy_save_1_callee:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #64
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: stur x9, [x29, #-16]
-; CHECK-NEXT: sub x9, x29, #16
-; CHECK-NEXT: sturh w8, [x29, #-8]
+; CHECK-NEXT: stur x9, [x29, #-80]
+; CHECK-NEXT: sub x9, x29, #80
+; CHECK-NEXT: sturh w8, [x29, #-72]
; CHECK-NEXT: msr TPIDR2_EL0, x9
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: sub x0, x29, #16
+; CHECK-NEXT: sub x0, x29, #80
; CHECK-NEXT: cbnz x8, .LBB0_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: sub sp, x29, #64
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
; CHECK-NEXT: ret
call void @private_za_callee()
ret void
@@ -40,41 +48,49 @@ define void @test_lazy_save_1_callee() nounwind "aarch64_pstate_za_shared" {
define void @test_lazy_save_2_callees() nounwind "aarch64_pstate_za_shared" {
; CHECK-LABEL: test_lazy_save_2_callees:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #64
+; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x19, #1
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: msub x8, x19, x19, x8
; CHECK-NEXT: mov sp, x8
-; CHECK-NEXT: sub x20, x29, #16
-; CHECK-NEXT: stur x8, [x29, #-16]
-; CHECK-NEXT: sturh w19, [x29, #-8]
+; CHECK-NEXT: sub x20, x29, #80
+; CHECK-NEXT: stur x8, [x29, #-80]
+; CHECK-NEXT: sturh w19, [x29, #-72]
; CHECK-NEXT: msr TPIDR2_EL0, x20
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: sub x0, x29, #16
+; CHECK-NEXT: sub x0, x29, #80
; CHECK-NEXT: cbnz x8, .LBB1_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEXT: sturh w19, [x29, #-8]
+; CHECK-NEXT: sturh w19, [x29, #-72]
; CHECK-NEXT: msr TPIDR2_EL0, x20
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: sub x0, x29, #16
+; CHECK-NEXT: sub x0, x29, #80
; CHECK-NEXT: cbnz x8, .LBB1_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
+; CHECK-NEXT: sub sp, x29, #64
+; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
; CHECK-NEXT: ret
call void @private_za_callee()
call void @private_za_callee()
@@ -85,28 +101,38 @@ define void @test_lazy_save_2_callees() nounwind "aarch64_pstate_za_shared" {
define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_pstate_za_shared" {
; CHECK-LABEL: test_lazy_save_expanded_intrinsic:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT: mov x29, sp
-; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #64
+; CHECK-NEXT: sub sp, sp, #32
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: stur x9, [x29, #-16]
-; CHECK-NEXT: sub x9, x29, #16
-; CHECK-NEXT: sturh w8, [x29, #-8]
+; CHECK-NEXT: stur x9, [x29, #-80]
+; CHECK-NEXT: sub x9, x29, #80
+; CHECK-NEXT: sturh w8, [x29, #-72]
; CHECK-NEXT: msr TPIDR2_EL0, x9
; CHECK-NEXT: bl cosf
+; CHECK-NEXT: stur s0, [x29, #-84] // 4-byte Folded Spill
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: sub x0, x29, #16
+; CHECK-NEXT: sub x0, x29, #80
; CHECK-NEXT: cbnz x8, .LBB2_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB2_2:
+; CHECK-NEXT: ldur s0, [x29, #-84] // 4-byte Folded Reload
; CHECK-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: sub sp, x29, #64
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
; CHECK-NEXT: ret
%res = call float @llvm.cos.f32(float %a)
ret float %res
diff --git a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
index 0ac2b21c6aba360..9dccec5a99da148 100644
--- a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
@@ -7,28 +7,36 @@ declare void @private_za_callee()
define void @disable_tailcallopt() "aarch64_pstate_za_shared" nounwind {
; CHECK-LABEL: disable_tailcallopt:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #64
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: stur x9, [x29, #-16]
-; CHECK-NEXT: sub x9, x29, #16
-; CHECK-NEXT: sturh w8, [x29, #-8]
+; CHECK-NEXT: stur x9, [x29, #-80]
+; CHECK-NEXT: sub x9, x29, #80
+; CHECK-NEXT: sturh w8, [x29, #-72]
; CHECK-NEXT: msr TPIDR2_EL0, x9
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: sub x0, x29, #16
+; CHECK-NEXT: sub x0, x29, #80
; CHECK-NEXT: cbnz x8, .LBB0_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: sub sp, x29, #64
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
; CHECK-NEXT: ret
tail call void @private_za_callee()
ret void
@@ -38,28 +46,38 @@ define void @disable_tailcallopt() "aarch64_pstate_za_shared" nounwind {
define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_pstate_za_shared" nounwind {
; CHECK-LABEL: f128_call_za:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT: mov x29, sp
-; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #64
+; CHECK-NEXT: sub sp, sp, #32
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: stur x9, [x29, #-16]
-; CHECK-NEXT: sub x9, x29, #16
-; CHECK-NEXT: sturh w8, [x29, #-8]
+; CHECK-NEXT: stur x9, [x29, #-80]
+; CHECK-NEXT: sub x9, x29, #80
+; CHECK-NEXT: sturh w8, [x29, #-72]
; CHECK-NEXT: msr TPIDR2_EL0, x9
; CHECK-NEXT: bl __addtf3
+; CHECK-NEXT: stur q0, [x29, #-96] // 16-byte Folded Spill
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: sub x0, x29, #16
+; CHECK-NEXT: sub x0, x29, #80
; CHECK-NEXT: cbnz x8, .LBB1_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB1_2:
+; CHECK-NEXT: ldur q0, [x29, #-96] // 16-byte Folded Reload
; CHECK-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: sub sp, x29, #64
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
; CHECK-NEXT: ret
%res = fadd fp128 %a, %b
ret fp128 %res
diff --git a/llvm/test/CodeGen/AArch64/sme-toggle-pstateza.ll b/llvm/test/CodeGen/AArch64/sme-toggle-pstateza.ll
index 3c50ab54e561e69..6dfcf5989237f2b 100644
--- a/llvm/test/CodeGen/AArch64/sme-toggle-pstateza.ll
+++ b/llvm/test/CodeGen/AArch64/sme-toggle-pstateza.ll
@@ -4,8 +4,25 @@
define void @toggle_pstate_za() {
; CHECK-LABEL: toggle_pstate_za:
; CHECK: // %bb.0:
+; CHECK-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset b8, -8
+; CHECK-NEXT: .cfi_offset b9, -16
+; CHECK-NEXT: .cfi_offset b10, -24
+; CHECK-NEXT: .cfi_offset b11, -32
+; CHECK-NEXT: .cfi_offset b12, -40
+; CHECK-NEXT: .cfi_offset b13, -48
+; CHECK-NEXT: .cfi_offset b14, -56
+; CHECK-NEXT: .cfi_offset b15, -64
; CHECK-NEXT: smstart za
; CHECK-NEXT: smstop za
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.za.enable()
call void @llvm.aarch64.sme.za.disable()
More information about the llvm-commits
mailing list