[llvm] [AArch64][SME] Add missing SMStartStop regmasks (PR #68458)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 6 16:28:16 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
<details>
<summary>Changes</summary>
Without these, the register allocator doesn't know they clobber callee-saved NEON regs (among other things).
---
Patch is 25.16 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/68458.diff
5 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+7-3)
- (modified) llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll (+77-35)
- (modified) llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll (+55-29)
- (modified) llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll (+35-17)
- (modified) llvm/test/CodeGen/AArch64/sme-toggle-pstateza.ll (+17)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9cda43e58d27a43..b073d0347dbcaf9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4819,6 +4819,7 @@ SDValue AArch64TargetLowering::getPStateSM(SelectionDAG &DAG, SDValue Chain,
SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SelectionDAG &DAG) const {
+ const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
unsigned IntNo = Op.getConstantOperandVal(1);
SDLoc DL(Op);
switch (IntNo) {
@@ -4845,13 +4846,15 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
AArch64ISD::SMSTART, DL, MVT::Other,
Op->getOperand(0), // Chain
DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
+ DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64),
+ DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask()));
case Intrinsic::aarch64_sme_za_disable:
return DAG.getNode(
AArch64ISD::SMSTOP, DL, MVT::Other,
Op->getOperand(0), // Chain
DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
+ DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64),
+ DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask()));
}
}
@@ -7850,7 +7853,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
Result = DAG.getNode(
AArch64ISD::SMSTART, DL, MVT::Other, Result,
DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
+ DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64),
+ DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask()));
// Conditionally restore the lazy save using a pseudo node.
unsigned FI = FuncInfo->getLazySaveTPIDR2Obj();
diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
index 885cd7b0b0947da..fc1104412519583 100644
--- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
+++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
@@ -212,14 +212,19 @@ declare double @za_shared_callee(double) "aarch64_pstate_za_shared"
define double @za_new_caller_to_za_shared_callee(double %x) nounwind noinline optnone "aarch64_pstate_za_new"{
; CHECK-COMMON-LABEL: za_new_caller_to_za_shared_callee:
; CHECK-COMMON: // %bb.0: // %prelude
-; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-COMMON-NEXT: mov x29, sp
-; CHECK-COMMON-NEXT: sub sp, sp, #16
+; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: add x29, sp, #64
+; CHECK-COMMON-NEXT: sub sp, sp, #32
+; CHECK-COMMON-NEXT: stur d0, [x29, #-88] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: rdsvl x8, #1
; CHECK-COMMON-NEXT: mov x9, sp
; CHECK-COMMON-NEXT: msub x8, x8, x8, x9
; CHECK-COMMON-NEXT: mov sp, x8
-; CHECK-COMMON-NEXT: stur x8, [x29, #-16]
+; CHECK-COMMON-NEXT: stur x8, [x29, #-80]
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
; CHECK-COMMON-NEXT: cbz x8, .LBB6_2
; CHECK-COMMON-NEXT: b .LBB6_1
@@ -230,13 +235,20 @@ define double @za_new_caller_to_za_shared_callee(double %x) nounwind noinline o
; CHECK-COMMON-NEXT: .LBB6_2: // %entry
; CHECK-COMMON-NEXT: smstart za
; CHECK-COMMON-NEXT: zero {za}
+; CHECK-COMMON-NEXT: ldur d0, [x29, #-88] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: bl za_shared_callee
; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
; CHECK-COMMON-NEXT: fmov d1, x8
; CHECK-COMMON-NEXT: fadd d0, d0, d1
+; CHECK-COMMON-NEXT: stur d0, [x29, #-96] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: smstop za
-; CHECK-COMMON-NEXT: mov sp, x29
-; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldur d0, [x29, #-96] // 8-byte Folded Reload
+; CHECK-COMMON-NEXT: sub sp, x29, #64
+; CHECK-COMMON-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ret
entry:
%call = call double @za_shared_callee(double %x)
@@ -247,21 +259,26 @@ entry:
define double @za_shared_caller_to_za_none_callee(double %x) nounwind noinline optnone "aarch64_pstate_za_shared"{
; CHECK-COMMON-LABEL: za_shared_caller_to_za_none_callee:
; CHECK-COMMON: // %bb.0: // %entry
-; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-COMMON-NEXT: mov x29, sp
-; CHECK-COMMON-NEXT: sub sp, sp, #16
+; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: add x29, sp, #64
+; CHECK-COMMON-NEXT: sub sp, sp, #32
; CHECK-COMMON-NEXT: rdsvl x8, #1
; CHECK-COMMON-NEXT: mov x9, sp
; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
; CHECK-COMMON-NEXT: mov sp, x9
-; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
-; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
-; CHECK-COMMON-NEXT: sub x8, x29, #16
+; CHECK-COMMON-NEXT: stur x9, [x29, #-80]
+; CHECK-COMMON-NEXT: sturh w8, [x29, #-72]
+; CHECK-COMMON-NEXT: sub x8, x29, #80
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8
; CHECK-COMMON-NEXT: bl normal_callee
+; CHECK-COMMON-NEXT: stur d0, [x29, #-88] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: smstart za
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-COMMON-NEXT: sub x0, x29, #16
+; CHECK-COMMON-NEXT: sub x0, x29, #80
; CHECK-COMMON-NEXT: cbz x8, .LBB7_1
; CHECK-COMMON-NEXT: b .LBB7_2
; CHECK-COMMON-NEXT: .LBB7_1: // %entry
@@ -270,10 +287,15 @@ define double @za_shared_caller_to_za_none_callee(double %x) nounwind noinline
; CHECK-COMMON-NEXT: .LBB7_2: // %entry
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
-; CHECK-COMMON-NEXT: fmov d1, x8
-; CHECK-COMMON-NEXT: fadd d0, d0, d1
-; CHECK-COMMON-NEXT: mov sp, x29
-; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: fmov d0, x8
+; CHECK-COMMON-NEXT: ldur d1, [x29, #-88] // 8-byte Folded Reload
+; CHECK-COMMON-NEXT: fadd d0, d1, d0
+; CHECK-COMMON-NEXT: sub sp, x29, #64
+; CHECK-COMMON-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ret
entry:
%call = call double @normal_callee(double %x)
@@ -285,28 +307,38 @@ entry:
define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_pstate_za_shared" nounwind {
; CHECK-COMMON-LABEL: f128_call_za:
; CHECK-COMMON: // %bb.0:
-; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-COMMON-NEXT: mov x29, sp
-; CHECK-COMMON-NEXT: sub sp, sp, #16
+; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: add x29, sp, #64
+; CHECK-COMMON-NEXT: sub sp, sp, #32
; CHECK-COMMON-NEXT: rdsvl x8, #1
; CHECK-COMMON-NEXT: mov x9, sp
; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
; CHECK-COMMON-NEXT: mov sp, x9
-; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
-; CHECK-COMMON-NEXT: sub x9, x29, #16
-; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
+; CHECK-COMMON-NEXT: stur x9, [x29, #-80]
+; CHECK-COMMON-NEXT: sub x9, x29, #80
+; CHECK-COMMON-NEXT: sturh w8, [x29, #-72]
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x9
; CHECK-COMMON-NEXT: bl __addtf3
+; CHECK-COMMON-NEXT: stur q0, [x29, #-96] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: smstart za
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-COMMON-NEXT: sub x0, x29, #16
+; CHECK-COMMON-NEXT: sub x0, x29, #80
; CHECK-COMMON-NEXT: cbnz x8, .LBB8_2
; CHECK-COMMON-NEXT: // %bb.1:
; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
; CHECK-COMMON-NEXT: .LBB8_2:
+; CHECK-COMMON-NEXT: ldur q0, [x29, #-96] // 16-byte Folded Reload
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-COMMON-NEXT: mov sp, x29
-; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: sub sp, x29, #64
+; CHECK-COMMON-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ret
%res = fadd fp128 %a, %b
ret fp128 %res
@@ -345,28 +377,38 @@ define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounw
define double @frem_call_za(double %a, double %b) "aarch64_pstate_za_shared" nounwind {
; CHECK-COMMON-LABEL: frem_call_za:
; CHECK-COMMON: // %bb.0:
-; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-COMMON-NEXT: mov x29, sp
-; CHECK-COMMON-NEXT: sub sp, sp, #16
+; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: add x29, sp, #64
+; CHECK-COMMON-NEXT: sub sp, sp, #32
; CHECK-COMMON-NEXT: rdsvl x8, #1
; CHECK-COMMON-NEXT: mov x9, sp
; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
; CHECK-COMMON-NEXT: mov sp, x9
-; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
-; CHECK-COMMON-NEXT: sub x9, x29, #16
-; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
+; CHECK-COMMON-NEXT: stur x9, [x29, #-80]
+; CHECK-COMMON-NEXT: sub x9, x29, #80
+; CHECK-COMMON-NEXT: sturh w8, [x29, #-72]
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x9
; CHECK-COMMON-NEXT: bl fmod
+; CHECK-COMMON-NEXT: stur d0, [x29, #-88] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: smstart za
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-COMMON-NEXT: sub x0, x29, #16
+; CHECK-COMMON-NEXT: sub x0, x29, #80
; CHECK-COMMON-NEXT: cbnz x8, .LBB10_2
; CHECK-COMMON-NEXT: // %bb.1:
; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
; CHECK-COMMON-NEXT: .LBB10_2:
+; CHECK-COMMON-NEXT: ldur d0, [x29, #-88] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-COMMON-NEXT: mov sp, x29
-; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: sub sp, x29, #64
+; CHECK-COMMON-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ret
%res = frem double %a, %b
ret double %res
diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
index 6757af01278bd9b..a46820b3a08ba05 100644
--- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
+++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
@@ -9,28 +9,36 @@ declare float @llvm.cos.f32(float)
define void @test_lazy_save_1_callee() nounwind "aarch64_pstate_za_shared" {
; CHECK-LABEL: test_lazy_save_1_callee:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #64
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: stur x9, [x29, #-16]
-; CHECK-NEXT: sub x9, x29, #16
-; CHECK-NEXT: sturh w8, [x29, #-8]
+; CHECK-NEXT: stur x9, [x29, #-80]
+; CHECK-NEXT: sub x9, x29, #80
+; CHECK-NEXT: sturh w8, [x29, #-72]
; CHECK-NEXT: msr TPIDR2_EL0, x9
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: sub x0, x29, #16
+; CHECK-NEXT: sub x0, x29, #80
; CHECK-NEXT: cbnz x8, .LBB0_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: sub sp, x29, #64
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
; CHECK-NEXT: ret
call void @private_za_callee()
ret void
@@ -40,41 +48,49 @@ define void @test_lazy_save_1_callee() nounwind "aarch64_pstate_za_shared" {
define void @test_lazy_save_2_callees() nounwind "aarch64_pstate_za_shared" {
; CHECK-LABEL: test_lazy_save_2_callees:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #64
+; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x19, #1
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: msub x8, x19, x19, x8
; CHECK-NEXT: mov sp, x8
-; CHECK-NEXT: sub x20, x29, #16
-; CHECK-NEXT: stur x8, [x29, #-16]
-; CHECK-NEXT: sturh w19, [x29, #-8]
+; CHECK-NEXT: sub x20, x29, #80
+; CHECK-NEXT: stur x8, [x29, #-80]
+; CHECK-NEXT: sturh w19, [x29, #-72]
; CHECK-NEXT: msr TPIDR2_EL0, x20
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: sub x0, x29, #16
+; CHECK-NEXT: sub x0, x29, #80
; CHECK-NEXT: cbnz x8, .LBB1_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEXT: sturh w19, [x29, #-8]
+; CHECK-NEXT: sturh w19, [x29, #-72]
; CHECK-NEXT: msr TPIDR2_EL0, x20
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: sub x0, x29, #16
+; CHECK-NEXT: sub x0, x29, #80
; CHECK-NEXT: cbnz x8, .LBB1_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
+; CHECK-NEXT: sub sp, x29, #64
+; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
; CHECK-NEXT: ret
call void @private_za_callee()
call void @private_za_callee()
@@ -85,28 +101,38 @@ define void @test_lazy_save_2_callees() nounwind "aarch64_pstate_za_shared" {
define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_pstate_za_shared" {
; CHECK-LABEL: test_lazy_save_expanded_intrinsic:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT: mov x29, sp
-; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #64
+; CHECK-NEXT: sub sp, sp, #32
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: stur x9, [x29, #-16]
-; CHECK-NEXT: sub x9, x29, #16
-; CHECK-NEXT: sturh w8, [x29, #-8]
+; CHECK-NEXT: stur x9, [x29, #-80]
+; CHECK-NEXT: sub x9, x29, #80
+; CHECK-NEXT: sturh w8, [x29, #-72]
; CHECK-NEXT: msr TPIDR2_EL0, x9
; CHECK-NEXT: bl cosf
+; CHECK-NEXT: stur s0, [x29, #-84] // 4-byte Folded Spill
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: sub x0, x29, #16
+; CHECK-NEXT: sub x0, x29, #80
; CHECK-NEXT: cbnz x8, .LBB2_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB2_2:
+; CHECK-NEXT: ldur s0, [x29, #-84] // 4-byte Folded Reload
; CHECK-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: sub sp, x29, #64
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
; CHECK-NEXT: ret
%res = call float @llvm.cos.f32(float %a)
ret float %res
diff --git a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
index 0ac2b21c6aba360..9dccec5a99da148 100644
--- a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
@@ -7,28 +7,36 @@ declare void @private_za_callee()
define void @disable_tailcallopt() "aarch64_pstate_za_shared" nounwind {
; CHECK-LABEL: disable_tailcallopt:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/68458
More information about the llvm-commits
mailing list