[llvm] [AArch64][SME] Add remarks to flag lazy ZA saves, and SMSTART/SMSTOP transitions (PR #68255)

Jon Roelofs via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 4 13:33:51 PDT 2023


https://github.com/jroelofs updated https://github.com/llvm/llvm-project/pull/68255

>From d543260ce0b74e8fa686f2363875dbb96a8e20b6 Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Tue, 3 Oct 2023 14:01:59 -0700
Subject: [PATCH] [AArch64][SME] Add remarks to flag lazy ZA saves, and
 SMSTART/SMSTOP transitions

---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 34 ++++++-
 .../AArch64/sme-lazy-save-call-remarks.ll     | 32 +++++++
 .../sme-streaming-interface-remarks.ll        | 90 +++++++++++++++++++
 3 files changed, 155 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll
 create mode 100644 llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3ae7a893ca4e9e3..726e6bcef9a5a2a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/ObjCARCUtil.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/VectorUtils.h"
@@ -7362,6 +7363,18 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
   else if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
     CalleeAttrs = SMEAttrs(ES->getSymbol());
 
+  auto DescribeCallsite =
+      [&](OptimizationRemarkAnalysis &R) -> OptimizationRemarkAnalysis & {
+    R << "call from " << ore::NV("Caller", MF.getName()) << " to ";
+    if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
+      R << ore::NV("Callee", ES->getSymbol());
+    else if (CLI.CB && CLI.CB->getCalledFunction())
+      R << ore::NV("Callee", CLI.CB->getCalledFunction()->getName());
+    else
+      R << "unknown callee";
+    return R;
+  };
+
   bool RequiresLazySave = CallerAttrs.requiresLazySave(CalleeAttrs);
   if (RequiresLazySave) {
     SDValue NumZaSaveSlices;
@@ -7388,13 +7401,32 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
         ISD::INTRINSIC_VOID, DL, MVT::Other, Chain,
         DAG.getConstant(Intrinsic::aarch64_sme_set_tpidr2, DL, MVT::i32),
         TPIDR2ObjAddr);
+    OptimizationRemarkEmitter ORE(&MF.getFunction());
+    ORE.emit([&]() {
+      auto R = CLI.CB ? OptimizationRemarkAnalysis(DEBUG_TYPE, "SMELazySaveZA",
+                                                   CLI.CB)
+                      : OptimizationRemarkAnalysis(DEBUG_TYPE, "SMELazySaveZA",
+                                                   &MF.getFunction());
+      DescribeCallsite(R) << " creates a lazy save ZA area";
+      return R;
+    });
   }
 
   SDValue PStateSM;
   std::optional<bool> RequiresSMChange =
       CallerAttrs.requiresSMChange(CalleeAttrs);
-  if (RequiresSMChange)
+  if (RequiresSMChange) {
     PStateSM = getPStateSM(DAG, Chain, CallerAttrs, DL, MVT::i64);
+    OptimizationRemarkEmitter ORE(&MF.getFunction());
+    ORE.emit([&]() {
+      auto R = CLI.CB ? OptimizationRemarkAnalysis(DEBUG_TYPE, "SMETransition",
+                                                   CLI.CB)
+                      : OptimizationRemarkAnalysis(DEBUG_TYPE, "SMETransition",
+                                                   &MF.getFunction());
+      DescribeCallsite(R) << " requires a streaming mode transition";
+      return R;
+    });
+  }
 
   // Adjust the stack pointer for the new arguments...
   // These operations are automatically eliminated by the prolog/epilog pass
diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll
new file mode 100644
index 000000000000000..82ba81e89ff43cd
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64 -mattr=+sme --pass-remarks-analysis=aarch64-lower -o /dev/null < %s 2>&1 | FileCheck %s
+
+declare void @private_za_callee()
+declare void @private_za_preserved_callee() "aarch64_pstate_za_preserved"
+declare float @llvm.cos.f32(float)
+
+define void @test_lazy_save_1_callee() nounwind "aarch64_pstate_za_shared" {
+; CHECK: remark: <unknown>:0:0: call from test_lazy_save_1_callee to private_za_callee creates a lazy save ZA area
+  call void @private_za_callee()
+  ret void
+}
+
+define void @test_lazy_save_2_callees() nounwind "aarch64_pstate_za_shared" {
+; CHECK: remark: <unknown>:0:0: call from test_lazy_save_2_callees to private_za_callee creates a lazy save ZA area
+  call void @private_za_callee()
+; CHECK: remark: <unknown>:0:0: call from test_lazy_save_2_callees to private_za_callee creates a lazy save ZA area
+  call void @private_za_callee()
+  ret void
+}
+
+define void @test_lazy_save_preserved_callee() nounwind "aarch64_pstate_za_shared" {
+; CHECK: remark: <unknown>:0:0: call from test_lazy_save_preserved_callee to private_za_preserved_callee creates a lazy save ZA area
+  call void @private_za_preserved_callee()
+  ret void
+}
+
+define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_pstate_za_shared" {
+; CHECK: remark: <unknown>:0:0: call from test_lazy_save_expanded_intrinsic to cosf creates a lazy save ZA area
+  %res = call float @llvm.cos.f32(float %a)
+  ret float %res
+}
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll b/llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll
new file mode 100644
index 000000000000000..71c9186a797b2f9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll
@@ -0,0 +1,90 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme,+sve -verify-machineinstrs --pass-remarks-analysis=aarch64-lower -o /dev/null < %s 2>&1 | FileCheck %s
+
+declare void @normal_callee()
+declare void @streaming_callee() "aarch64_pstate_sm_enabled"
+declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible"
+
+; CHECK: remark: <unknown>:0:0: call from normal_caller_streaming_callee to streaming_callee requires a streaming mode transition
+define void @normal_caller_streaming_callee() nounwind {
+  call void @streaming_callee()
+  ret void;
+}
+
+; CHECK: remark: <unknown>:0:0: call from streaming_caller_normal_callee to normal_callee requires a streaming mode transition
+define void @streaming_caller_normal_callee() nounwind "aarch64_pstate_sm_enabled" {
+  call void @normal_callee()
+  ret void;
+}
+
+; CHECK-NOT: streaming_caller_streaming_callee
+define void @streaming_caller_streaming_callee() nounwind "aarch64_pstate_sm_enabled" {
+  call void @streaming_callee()
+  ret void;
+}
+
+; CHECK-NOT: streaming_caller_streaming_compatible_callee
+define void @streaming_caller_streaming_compatible_callee() nounwind "aarch64_pstate_sm_enabled" {
+  call void @streaming_compatible_callee()
+  ret void;
+}
+
+; CHECK: remark: <unknown>:0:0: call from call_to_function_pointer_streaming_enabled to unknown callee requires a streaming mode transition
+define void @call_to_function_pointer_streaming_enabled(ptr %p) nounwind {
+  call void %p() "aarch64_pstate_sm_enabled"
+  ret void
+}
+
+; CHECK: remark: <unknown>:0:0: call from smstart_clobber_simdfp to streaming_callee requires a streaming mode transition
+define <4 x i32> @smstart_clobber_simdfp(<4 x i32> %x) nounwind {
+  call void @streaming_callee()
+  ret <4 x i32> %x;
+}
+
+; CHECK: remark: <unknown>:0:0: call from smstart_clobber_sve to streaming_callee requires a streaming mode transition
+define <vscale x 4 x i32> @smstart_clobber_sve(<vscale x 4 x i32> %x) nounwind {
+  call void @streaming_callee()
+  ret <vscale x 4 x i32> %x;
+}
+
+; CHECK: remark: <unknown>:0:0: call from smstart_clobber_sve_duplicate to streaming_callee requires a streaming mode transition
+; CHECK: remark: <unknown>:0:0: call from smstart_clobber_sve_duplicate to streaming_callee requires a streaming mode transition
+define <vscale x 4 x i32> @smstart_clobber_sve_duplicate(<vscale x 4 x i32> %x) nounwind {
+  call void @streaming_callee()
+  call void @streaming_callee()
+  ret <vscale x 4 x i32> %x;
+}
+
+; CHECK: remark: <unknown>:0:0: call from call_to_intrinsic_without_chain to cos requires a streaming mode transition
+define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_pstate_sm_enabled" {
+entry:
+  %res = call fast double @llvm.cos.f64(double %x)
+  %res.fadd = fadd fast double %res, %x
+  ret double %res.fadd
+}
+
+declare double @llvm.cos.f64(double)
+
+; CHECK: remark: <unknown>:0:0: call from disable_tailcallopt to streaming_callee requires a streaming mode transition
+define void @disable_tailcallopt() nounwind {
+  tail call void @streaming_callee()
+  ret void;
+}
+
+; CHECK: remark: <unknown>:0:0: call from call_to_non_streaming_pass_sve_objects to foo requires a streaming mode transition
+define i8 @call_to_non_streaming_pass_sve_objects(ptr nocapture noundef readnone %ptr) #0 {
+entry:
+  %Data1 = alloca <vscale x 16 x i8>, align 16
+  %Data2 = alloca <vscale x 16 x i8>, align 16
+  %Data3 = alloca <vscale x 16 x i8>, align 16
+  %0 = tail call i64 @llvm.aarch64.sme.cntsb()
+  call void @foo(ptr noundef nonnull %Data1, ptr noundef nonnull %Data2, ptr noundef nonnull %Data3, i64 noundef %0)
+  %1 = load <vscale x 16 x i8>, ptr %Data1, align 16
+  %vecext = extractelement <vscale x 16 x i8> %1, i64 0
+  ret i8 %vecext
+}
+
+declare i64 @llvm.aarch64.sme.cntsb()
+
+declare void @foo(ptr noundef, ptr noundef, ptr noundef, i64 noundef)
+
+attributes #0 = { nounwind vscale_range(1,16) "aarch64_pstate_sm_enabled" }



More information about the llvm-commits mailing list