[llvm] 340054e - [AArch64][SME] Remove combination of private-ZA and preserves_za. (#78563)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 19 05:48:49 PST 2024


Author: Sander de Smalen
Date: 2024-01-19T13:48:44Z
New Revision: 340054e561bc7fa407de5c591d503ad52fbffaff

URL: https://github.com/llvm/llvm-project/commit/340054e561bc7fa407de5c591d503ad52fbffaff
DIFF: https://github.com/llvm/llvm-project/commit/340054e561bc7fa407de5c591d503ad52fbffaff.diff

LOG: [AArch64][SME] Remove combination of private-ZA and preserves_za. (#78563)

The new Clang attributes no longer support the combination of having a
private-ZA function that preserves ZA. The use of __arm_preserves("za")
means that ZA is shared and preserved.

There wasn't that much benefit to the special handling of this, because
in practice it only meant that we'd avoid restoring the lazy-save
afterwards, but it still needed setting up a lazy-save (with the
possibility of using a 0-sized buffer).

Perhaps a new attribute will be added in the future to support this
case, at which point we can revert back some of the changes removed in
this patch. But for now removing this code simplifies things.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll
    llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8a6f1dc7487bae..7b9b6a7a428125 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7609,16 +7609,6 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
 
   bool RequiresLazySave = CallerAttrs.requiresLazySave(CalleeAttrs);
   if (RequiresLazySave) {
-    SDValue NumZaSaveSlices;
-    if (!CalleeAttrs.preservesZA()) {
-      // Set up a lazy save mechanism by storing the runtime live slices
-      // (worst-case SVL) to the TPIDR2 stack object.
-      NumZaSaveSlices = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
-                                    DAG.getConstant(1, DL, MVT::i32));
-    } else if (CalleeAttrs.preservesZA()) {
-      NumZaSaveSlices = DAG.getConstant(0, DL, MVT::i64);
-    }
-
     unsigned TPIDR2Obj = FuncInfo->getLazySaveTPIDR2Obj();
     MachinePointerInfo MPI = MachinePointerInfo::getStack(MF, TPIDR2Obj);
     SDValue TPIDR2ObjAddr = DAG.getFrameIndex(TPIDR2Obj,
@@ -7626,6 +7616,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
     SDValue NumZaSaveSlicesAddr =
         DAG.getNode(ISD::ADD, DL, TPIDR2ObjAddr.getValueType(), TPIDR2ObjAddr,
                     DAG.getConstant(8, DL, TPIDR2ObjAddr.getValueType()));
+    SDValue NumZaSaveSlices = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
+                                          DAG.getConstant(1, DL, MVT::i32));
     Chain = DAG.getTruncStore(Chain, DL, NumZaSaveSlices, NumZaSaveSlicesAddr,
                               MPI, MVT::i16);
     Chain = DAG.getNode(
@@ -7638,14 +7630,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
                                                    CLI.CB)
                       : OptimizationRemarkAnalysis("sme", "SMELazySaveZA",
                                                    &MF.getFunction());
-      DescribeCallsite(R) << " sets up a lazy save for ZA";
-      if (CalleeAttrs.preservesZA())
-        R << ", but callee preserves ZA, so we request 0 slices to be saved";
-      else
-        R << ", and we request that all slices be saved";
-      R << ore::setExtraArgs()
-        << ore::NV("CalleePreservesZA", CalleeAttrs.preservesZA());
-      return R;
+      return DescribeCallsite(R) << " sets up a lazy save for ZA";
     });
   }
 
@@ -8081,34 +8066,33 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
   }
 
   if (RequiresLazySave) {
-    if (!CalleeAttrs.preservesZA()) {
-      // Unconditionally resume ZA.
-      Result = DAG.getNode(
-          AArch64ISD::SMSTART, DL, MVT::Other, Result,
-          DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
-          DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
-
-      // Conditionally restore the lazy save using a pseudo node.
-      unsigned FI = FuncInfo->getLazySaveTPIDR2Obj();
-      SDValue RegMask = DAG.getRegisterMask(
-          TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
-      SDValue RestoreRoutine = DAG.getTargetExternalSymbol(
-          "__arm_tpidr2_restore", getPointerTy(DAG.getDataLayout()));
-      SDValue TPIDR2_EL0 = DAG.getNode(
-          ISD::INTRINSIC_W_CHAIN, DL, MVT::i64, Result,
-          DAG.getConstant(Intrinsic::aarch64_sme_get_tpidr2, DL, MVT::i32));
-
-      // Copy the address of the TPIDR2 block into X0 before 'calling' the
-      // RESTORE_ZA pseudo.
-      SDValue Glue;
-      SDValue TPIDR2Block = DAG.getFrameIndex(
-          FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
-      Result = DAG.getCopyToReg(Result, DL, AArch64::X0, TPIDR2Block, Glue);
-      Result = DAG.getNode(AArch64ISD::RESTORE_ZA, DL, MVT::Other,
-                           {Result, TPIDR2_EL0,
-                            DAG.getRegister(AArch64::X0, MVT::i64),
-                            RestoreRoutine, RegMask, Result.getValue(1)});
-    }
+    // Unconditionally resume ZA.
+    Result = DAG.getNode(
+        AArch64ISD::SMSTART, DL, MVT::Other, Result,
+        DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
+        DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
+
+    // Conditionally restore the lazy save using a pseudo node.
+    unsigned FI = FuncInfo->getLazySaveTPIDR2Obj();
+    SDValue RegMask = DAG.getRegisterMask(
+        TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
+    SDValue RestoreRoutine = DAG.getTargetExternalSymbol(
+        "__arm_tpidr2_restore", getPointerTy(DAG.getDataLayout()));
+    SDValue TPIDR2_EL0 = DAG.getNode(
+        ISD::INTRINSIC_W_CHAIN, DL, MVT::i64, Result,
+        DAG.getConstant(Intrinsic::aarch64_sme_get_tpidr2, DL, MVT::i32));
+
+    // Copy the address of the TPIDR2 block into X0 before 'calling' the
+    // RESTORE_ZA pseudo.
+    SDValue Glue;
+    SDValue TPIDR2Block = DAG.getFrameIndex(
+        FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
+    Result = DAG.getCopyToReg(Result, DL, AArch64::X0, TPIDR2Block, Glue);
+    Result =
+        DAG.getNode(AArch64ISD::RESTORE_ZA, DL, MVT::Other,
+                    {Result, TPIDR2_EL0, DAG.getRegister(AArch64::X0, MVT::i64),
+                     RestoreRoutine, RegMask, Result.getValue(1)});
+
     // Finally reset the TPIDR2_EL0 register to 0.
     Result = DAG.getNode(
         ISD::INTRINSIC_VOID, DL, MVT::Other, Result,

diff  --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll
index 6762a768fd5bd4..d999311301f943 100644
--- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll
+++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll
@@ -2,31 +2,24 @@
 ; RUN: llc -mtriple=aarch64 -mattr=+sme --pass-remarks-analysis=sme -o /dev/null < %s 2>&1 | FileCheck %s
 
 declare void @private_za_callee()
-declare void @private_za_preserved_callee() "aarch64_pstate_za_preserved"
 declare float @llvm.cos.f32(float)
 
 define void @test_lazy_save_1_callee() nounwind "aarch64_pstate_za_shared" {
-; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_1_callee' to 'private_za_callee' sets up a lazy save for ZA, and we request that all slices be saved
+; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_1_callee' to 'private_za_callee' sets up a lazy save for ZA
   call void @private_za_callee()
   ret void
 }
 
 define void @test_lazy_save_2_callees() nounwind "aarch64_pstate_za_shared" {
-; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA, and we request that all slices be saved
+; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA
   call void @private_za_callee()
-; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA, and we request that all slices be saved
+; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA
   call void @private_za_callee()
   ret void
 }
 
-define void @test_lazy_save_preserved_callee() nounwind "aarch64_pstate_za_shared" {
-; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_preserved_callee' to 'private_za_preserved_callee' sets up a lazy save for ZA, but callee preserves ZA, so we request 0 slices to be saved
-  call void @private_za_preserved_callee()
-  ret void
-}
-
 define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_pstate_za_shared" {
-; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_expanded_intrinsic' to 'cosf' sets up a lazy save for ZA, and we request that all slices be saved
+; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_expanded_intrinsic' to 'cosf' sets up a lazy save for ZA
   %res = call float @llvm.cos.f32(float %a)
   ret float %res
 }

diff  --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
index a173fd40feb280..9625e139bd0bc5 100644
--- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
+++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
@@ -2,7 +2,6 @@
 ; RUN: llc -mtriple=aarch64 -mattr=+sme < %s | FileCheck %s
 
 declare void @private_za_callee()
-declare void @private_za_preserved_callee() "aarch64_pstate_za_preserved"
 declare float @llvm.cos.f32(float)
 
 ; Test lazy-save mechanism for a single callee.
@@ -170,48 +169,3 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_pstate_z
   call void @private_za_callee()
   ret void
 }
-
-
-; Test lazy-save mechanism for an aarch64_pstate_za_shared caller
-; calling a callee with aarch64_pstate_za_preserved.
-define void @za_shared_caller_za_preserved_callee() nounwind "aarch64_pstate_za_shared" "aarch64_pstate_sm_compatible" {
-; CHECK-LABEL: za_shared_caller_za_preserved_callee:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
-; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT:    add x29, sp, #64
-; CHECK-NEXT:    str x19, [sp, #80] // 8-byte Folded Spill
-; CHECK-NEXT:    sub sp, sp, #16
-; CHECK-NEXT:    rdsvl x8, #1
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    msub x8, x8, x8, x9
-; CHECK-NEXT:    mov sp, x8
-; CHECK-NEXT:    sub x9, x29, #80
-; CHECK-NEXT:    stp x8, xzr, [x29, #-80]
-; CHECK-NEXT:    msr TPIDR2_EL0, x9
-; CHECK-NEXT:    bl __arm_sme_state
-; CHECK-NEXT:    and x19, x0, #0x1
-; CHECK-NEXT:    tbz w19, #0, .LBB4_2
-; CHECK-NEXT:  // %bb.1:
-; CHECK-NEXT:    smstop sm
-; CHECK-NEXT:  .LBB4_2:
-; CHECK-NEXT:    bl private_za_preserved_callee
-; CHECK-NEXT:    tbz w19, #0, .LBB4_4
-; CHECK-NEXT:  // %bb.3:
-; CHECK-NEXT:    smstart sm
-; CHECK-NEXT:  .LBB4_4:
-; CHECK-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-NEXT:    sub sp, x29, #64
-; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr x19, [sp, #80] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
-; CHECK-NEXT:    ret
-  call void @private_za_preserved_callee()
-  ret void
-}


        


More information about the llvm-commits mailing list