[llvm] [AArch64][SME] Add remarks to flag lazy ZA saves, and SMSTART/SMSTOP transitions (PR #68255)
Jon Roelofs via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 5 10:14:05 PDT 2023
https://github.com/jroelofs updated https://github.com/llvm/llvm-project/pull/68255
>From 2ed87db9fcfdb2c60bb1bf5ff95db3bf3f4943ad Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Tue, 3 Oct 2023 14:01:59 -0700
Subject: [PATCH] [AArch64][SME] Add remarks to flag lazy ZA saves, and
SMSTART/SMSTOP transitions
---
.../Target/AArch64/AArch64ISelLowering.cpp | 41 ++++++++-
.../AArch64/sme-lazy-save-call-remarks.ll | 32 +++++++
.../sme-streaming-interface-remarks.ll | 90 +++++++++++++++++++
3 files changed, 162 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll
create mode 100644 llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3ae7a893ca4e9e3..6c5a121d5c0f112 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -31,6 +31,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ObjCARCUtil.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
@@ -7362,6 +7363,19 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
else if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
CalleeAttrs = SMEAttrs(ES->getSymbol());
+ auto DescribeCallsite =
+ [&](OptimizationRemarkAnalysis &R) -> OptimizationRemarkAnalysis & {
+ R << "call from '" << ore::NV("Caller", MF.getName()) << "' to '";
+ if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
+ R << ore::NV("Callee", ES->getSymbol());
+ else if (CLI.CB && CLI.CB->getCalledFunction())
+ R << ore::NV("Callee", CLI.CB->getCalledFunction()->getName());
+ else
+ R << "unknown callee";
+ R << "'";
+ return R;
+ };
+
bool RequiresLazySave = CallerAttrs.requiresLazySave(CalleeAttrs);
if (RequiresLazySave) {
SDValue NumZaSaveSlices;
@@ -7388,13 +7402,38 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
ISD::INTRINSIC_VOID, DL, MVT::Other, Chain,
DAG.getConstant(Intrinsic::aarch64_sme_set_tpidr2, DL, MVT::i32),
TPIDR2ObjAddr);
+ OptimizationRemarkEmitter ORE(&MF.getFunction());
+ ORE.emit([&]() {
+ auto R = CLI.CB ? OptimizationRemarkAnalysis("sme", "SMELazySaveZA",
+ CLI.CB)
+ : OptimizationRemarkAnalysis("sme", "SMELazySaveZA",
+ &MF.getFunction());
+ DescribeCallsite(R) << " sets up a lazy save for ZA";
+ if (CalleeAttrs.preservesZA())
+ R << ", but callee preserves ZA, so we request 0 slices to be saved";
+ else
+ R << ", and we request that all slices be saved";
+ R << ore::setExtraArgs()
+ << ore::NV("CalleePreservesZA", CalleeAttrs.preservesZA());
+ return R;
+ });
}
SDValue PStateSM;
std::optional<bool> RequiresSMChange =
CallerAttrs.requiresSMChange(CalleeAttrs);
- if (RequiresSMChange)
+ if (RequiresSMChange) {
PStateSM = getPStateSM(DAG, Chain, CallerAttrs, DL, MVT::i64);
+ OptimizationRemarkEmitter ORE(&MF.getFunction());
+ ORE.emit([&]() {
+ auto R = CLI.CB ? OptimizationRemarkAnalysis("sme", "SMETransition",
+ CLI.CB)
+ : OptimizationRemarkAnalysis("sme", "SMETransition",
+ &MF.getFunction());
+ DescribeCallsite(R) << " requires a streaming mode transition";
+ return R;
+ });
+ }
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll
new file mode 100644
index 000000000000000..6762a768fd5bd41
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64 -mattr=+sme --pass-remarks-analysis=sme -o /dev/null < %s 2>&1 | FileCheck %s
+
+declare void @private_za_callee()
+declare void @private_za_preserved_callee() "aarch64_pstate_za_preserved"
+declare float @llvm.cos.f32(float)
+
+define void @test_lazy_save_1_callee() nounwind "aarch64_pstate_za_shared" {
+; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_1_callee' to 'private_za_callee' sets up a lazy save for ZA, and we request that all slices be saved
+ call void @private_za_callee()
+ ret void
+}
+
+define void @test_lazy_save_2_callees() nounwind "aarch64_pstate_za_shared" {
+; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA, and we request that all slices be saved
+ call void @private_za_callee()
+; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA, and we request that all slices be saved
+ call void @private_za_callee()
+ ret void
+}
+
+define void @test_lazy_save_preserved_callee() nounwind "aarch64_pstate_za_shared" {
+; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_preserved_callee' to 'private_za_preserved_callee' sets up a lazy save for ZA, but callee preserves ZA, so we request 0 slices to be saved
+ call void @private_za_preserved_callee()
+ ret void
+}
+
+define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_pstate_za_shared" {
+; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_expanded_intrinsic' to 'cosf' sets up a lazy save for ZA, and we request that all slices be saved
+ %res = call float @llvm.cos.f32(float %a)
+ ret float %res
+}
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll b/llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll
new file mode 100644
index 000000000000000..e1a474d89823313
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll
@@ -0,0 +1,90 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme,+sve -verify-machineinstrs --pass-remarks-analysis=sme -o /dev/null < %s 2>&1 | FileCheck %s
+
+declare void @normal_callee()
+declare void @streaming_callee() "aarch64_pstate_sm_enabled"
+declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible"
+
+; CHECK: remark: <unknown>:0:0: call from 'normal_caller_streaming_callee' to 'streaming_callee' requires a streaming mode transition
+define void @normal_caller_streaming_callee() nounwind {
+ call void @streaming_callee()
+ ret void;
+}
+
+; CHECK: remark: <unknown>:0:0: call from 'streaming_caller_normal_callee' to 'normal_callee' requires a streaming mode transition
+define void @streaming_caller_normal_callee() nounwind "aarch64_pstate_sm_enabled" {
+ call void @normal_callee()
+ ret void;
+}
+
+; CHECK-NOT: streaming_caller_streaming_callee
+define void @streaming_caller_streaming_callee() nounwind "aarch64_pstate_sm_enabled" {
+ call void @streaming_callee()
+ ret void;
+}
+
+; CHECK-NOT: streaming_caller_streaming_compatible_callee
+define void @streaming_caller_streaming_compatible_callee() nounwind "aarch64_pstate_sm_enabled" {
+ call void @streaming_compatible_callee()
+ ret void;
+}
+
+; CHECK: remark: <unknown>:0:0: call from 'call_to_function_pointer_streaming_enabled' to 'unknown callee' requires a streaming mode transition
+define void @call_to_function_pointer_streaming_enabled(ptr %p) nounwind {
+ call void %p() "aarch64_pstate_sm_enabled"
+ ret void
+}
+
+; CHECK: remark: <unknown>:0:0: call from 'smstart_clobber_simdfp' to 'streaming_callee' requires a streaming mode transition
+define <4 x i32> @smstart_clobber_simdfp(<4 x i32> %x) nounwind {
+ call void @streaming_callee()
+ ret <4 x i32> %x;
+}
+
+; CHECK: remark: <unknown>:0:0: call from 'smstart_clobber_sve' to 'streaming_callee' requires a streaming mode transition
+define <vscale x 4 x i32> @smstart_clobber_sve(<vscale x 4 x i32> %x) nounwind {
+ call void @streaming_callee()
+ ret <vscale x 4 x i32> %x;
+}
+
+; CHECK: remark: <unknown>:0:0: call from 'smstart_clobber_sve_duplicate' to 'streaming_callee' requires a streaming mode transition
+; CHECK: remark: <unknown>:0:0: call from 'smstart_clobber_sve_duplicate' to 'streaming_callee' requires a streaming mode transition
+define <vscale x 4 x i32> @smstart_clobber_sve_duplicate(<vscale x 4 x i32> %x) nounwind {
+ call void @streaming_callee()
+ call void @streaming_callee()
+ ret <vscale x 4 x i32> %x;
+}
+
+; CHECK: remark: <unknown>:0:0: call from 'call_to_intrinsic_without_chain' to 'cos' requires a streaming mode transition
+define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_pstate_sm_enabled" {
+entry:
+ %res = call fast double @llvm.cos.f64(double %x)
+ %res.fadd = fadd fast double %res, %x
+ ret double %res.fadd
+}
+
+declare double @llvm.cos.f64(double)
+
+; CHECK: remark: <unknown>:0:0: call from 'disable_tailcallopt' to 'streaming_callee' requires a streaming mode transition
+define void @disable_tailcallopt() nounwind {
+ tail call void @streaming_callee()
+ ret void;
+}
+
+; CHECK: remark: <unknown>:0:0: call from 'call_to_non_streaming_pass_sve_objects' to 'foo' requires a streaming mode transition
+define i8 @call_to_non_streaming_pass_sve_objects(ptr nocapture noundef readnone %ptr) #0 {
+entry:
+ %Data1 = alloca <vscale x 16 x i8>, align 16
+ %Data2 = alloca <vscale x 16 x i8>, align 16
+ %Data3 = alloca <vscale x 16 x i8>, align 16
+ %0 = tail call i64 @llvm.aarch64.sme.cntsb()
+ call void @foo(ptr noundef nonnull %Data1, ptr noundef nonnull %Data2, ptr noundef nonnull %Data3, i64 noundef %0)
+ %1 = load <vscale x 16 x i8>, ptr %Data1, align 16
+ %vecext = extractelement <vscale x 16 x i8> %1, i64 0
+ ret i8 %vecext
+}
+
+declare i64 @llvm.aarch64.sme.cntsb()
+
+declare void @foo(ptr noundef, ptr noundef, ptr noundef, i64 noundef)
+
+attributes #0 = { nounwind vscale_range(1,16) "aarch64_pstate_sm_enabled" }
More information about the llvm-commits
mailing list