[llvm] [AArch64][SME] Add remarks to flag lazy ZA saves, and SMSTART/SMSTOP transitions (PR #68255)
Jon Roelofs via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 4 13:20:20 PDT 2023
https://github.com/jroelofs created https://github.com/llvm/llvm-project/pull/68255
None
>From 10471605becbff1ad98664648b3f5d6fd211a05b Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Tue, 3 Oct 2023 14:01:59 -0700
Subject: [PATCH] [AArch64][SME] Add remarks to flag lazy ZA saves, and
SMSTART/SMSTOP transitions
---
.../Target/AArch64/AArch64ISelLowering.cpp | 36 +++++++-
.../AArch64/sme-lazy-save-call-remarks.ll | 32 +++++++
.../sme-streaming-interface-remarks.ll | 90 +++++++++++++++++++
3 files changed, 157 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll
create mode 100644 llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3ae7a893ca4e9e3..7c474e3c610cf55 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -31,6 +31,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ObjCARCUtil.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
@@ -7362,6 +7363,20 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
else if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
CalleeAttrs = SMEAttrs(ES->getSymbol());
+ auto DescribeCallsite = [&](OptimizationRemarkAnalysis &R)
+ -> OptimizationRemarkAnalysis & {
+ R << "call from "
+ << ore::NV("Caller", MF.getName())
+ << " to ";
+ if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
+ R << ore::NV("Callee", ES->getSymbol());
+ else if (CLI.CB && CLI.CB->getCalledFunction())
+ R << ore::NV("Callee", CLI.CB->getCalledFunction()->getName());
+ else
+ R << "unknown callee";
+ return R;
+ };
+
bool RequiresLazySave = CallerAttrs.requiresLazySave(CalleeAttrs);
if (RequiresLazySave) {
SDValue NumZaSaveSlices;
@@ -7388,13 +7403,32 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
ISD::INTRINSIC_VOID, DL, MVT::Other, Chain,
DAG.getConstant(Intrinsic::aarch64_sme_set_tpidr2, DL, MVT::i32),
TPIDR2ObjAddr);
+ OptimizationRemarkEmitter ORE(&MF.getFunction());
+ ORE.emit([&](){
+ auto R = CLI.CB
+ ? OptimizationRemarkAnalysis(DEBUG_TYPE, "SMELazySaveZA", CLI.CB)
+ : OptimizationRemarkAnalysis(DEBUG_TYPE, "SMELazySaveZA", &MF.getFunction());
+ DescribeCallsite(R)
+ << " creates a lazy save ZA area";
+ return R;
+ });
}
SDValue PStateSM;
std::optional<bool> RequiresSMChange =
CallerAttrs.requiresSMChange(CalleeAttrs);
- if (RequiresSMChange)
+ if (RequiresSMChange) {
PStateSM = getPStateSM(DAG, Chain, CallerAttrs, DL, MVT::i64);
+ OptimizationRemarkEmitter ORE(&MF.getFunction());
+ ORE.emit([&](){
+ auto R = CLI.CB
+ ? OptimizationRemarkAnalysis(DEBUG_TYPE, "SMETransition", CLI.CB)
+ : OptimizationRemarkAnalysis(DEBUG_TYPE, "SMETransition", &MF.getFunction());
+ DescribeCallsite(R)
+ << " requires a streaming mode transition";
+ return R;
+ });
+ }
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll
new file mode 100644
index 000000000000000..82ba81e89ff43cd
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64 -mattr=+sme --pass-remarks-analysis=aarch64-lower -o /dev/null < %s 2>&1 | FileCheck %s
+
+declare void @private_za_callee()
+declare void @private_za_preserved_callee() "aarch64_pstate_za_preserved"
+declare float @llvm.cos.f32(float)
+
+define void @test_lazy_save_1_callee() nounwind "aarch64_pstate_za_shared" {
+; CHECK: remark: <unknown>:0:0: call from test_lazy_save_1_callee to private_za_callee creates a lazy save ZA area
+ call void @private_za_callee()
+ ret void
+}
+
+define void @test_lazy_save_2_callees() nounwind "aarch64_pstate_za_shared" {
+; CHECK: remark: <unknown>:0:0: call from test_lazy_save_2_callees to private_za_callee creates a lazy save ZA area
+ call void @private_za_callee()
+; CHECK: remark: <unknown>:0:0: call from test_lazy_save_2_callees to private_za_callee creates a lazy save ZA area
+ call void @private_za_callee()
+ ret void
+}
+
+define void @test_lazy_save_preserved_callee() nounwind "aarch64_pstate_za_shared" {
+; CHECK: remark: <unknown>:0:0: call from test_lazy_save_preserved_callee to private_za_preserved_callee creates a lazy save ZA area
+ call void @private_za_preserved_callee()
+ ret void
+}
+
+define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_pstate_za_shared" {
+; CHECK: remark: <unknown>:0:0: call from test_lazy_save_expanded_intrinsic to cosf creates a lazy save ZA area
+ %res = call float @llvm.cos.f32(float %a)
+ ret float %res
+}
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll b/llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll
new file mode 100644
index 000000000000000..71c9186a797b2f9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll
@@ -0,0 +1,90 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme,+sve -verify-machineinstrs --pass-remarks-analysis=aarch64-lower -o /dev/null < %s 2>&1 | FileCheck %s
+
+declare void @normal_callee()
+declare void @streaming_callee() "aarch64_pstate_sm_enabled"
+declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible"
+
+; CHECK: remark: <unknown>:0:0: call from normal_caller_streaming_callee to streaming_callee requires a streaming mode transition
+define void @normal_caller_streaming_callee() nounwind {
+ call void @streaming_callee()
+ ret void;
+}
+
+; CHECK: remark: <unknown>:0:0: call from streaming_caller_normal_callee to normal_callee requires a streaming mode transition
+define void @streaming_caller_normal_callee() nounwind "aarch64_pstate_sm_enabled" {
+ call void @normal_callee()
+ ret void;
+}
+
+; CHECK-NOT: streaming_caller_streaming_callee
+define void @streaming_caller_streaming_callee() nounwind "aarch64_pstate_sm_enabled" {
+ call void @streaming_callee()
+ ret void;
+}
+
+; CHECK-NOT: streaming_caller_streaming_compatible_callee
+define void @streaming_caller_streaming_compatible_callee() nounwind "aarch64_pstate_sm_enabled" {
+ call void @streaming_compatible_callee()
+ ret void;
+}
+
+; CHECK: remark: <unknown>:0:0: call from call_to_function_pointer_streaming_enabled to unknown callee requires a streaming mode transition
+define void @call_to_function_pointer_streaming_enabled(ptr %p) nounwind {
+ call void %p() "aarch64_pstate_sm_enabled"
+ ret void
+}
+
+; CHECK: remark: <unknown>:0:0: call from smstart_clobber_simdfp to streaming_callee requires a streaming mode transition
+define <4 x i32> @smstart_clobber_simdfp(<4 x i32> %x) nounwind {
+ call void @streaming_callee()
+ ret <4 x i32> %x;
+}
+
+; CHECK: remark: <unknown>:0:0: call from smstart_clobber_sve to streaming_callee requires a streaming mode transition
+define <vscale x 4 x i32> @smstart_clobber_sve(<vscale x 4 x i32> %x) nounwind {
+ call void @streaming_callee()
+ ret <vscale x 4 x i32> %x;
+}
+
+; CHECK: remark: <unknown>:0:0: call from smstart_clobber_sve_duplicate to streaming_callee requires a streaming mode transition
+; CHECK: remark: <unknown>:0:0: call from smstart_clobber_sve_duplicate to streaming_callee requires a streaming mode transition
+define <vscale x 4 x i32> @smstart_clobber_sve_duplicate(<vscale x 4 x i32> %x) nounwind {
+ call void @streaming_callee()
+ call void @streaming_callee()
+ ret <vscale x 4 x i32> %x;
+}
+
+; CHECK: remark: <unknown>:0:0: call from call_to_intrinsic_without_chain to cos requires a streaming mode transition
+define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_pstate_sm_enabled" {
+entry:
+ %res = call fast double @llvm.cos.f64(double %x)
+ %res.fadd = fadd fast double %res, %x
+ ret double %res.fadd
+}
+
+declare double @llvm.cos.f64(double)
+
+; CHECK: remark: <unknown>:0:0: call from disable_tailcallopt to streaming_callee requires a streaming mode transition
+define void @disable_tailcallopt() nounwind {
+ tail call void @streaming_callee()
+ ret void;
+}
+
+; CHECK: remark: <unknown>:0:0: call from call_to_non_streaming_pass_sve_objects to foo requires a streaming mode transition
+define i8 @call_to_non_streaming_pass_sve_objects(ptr nocapture noundef readnone %ptr) #0 {
+entry:
+ %Data1 = alloca <vscale x 16 x i8>, align 16
+ %Data2 = alloca <vscale x 16 x i8>, align 16
+ %Data3 = alloca <vscale x 16 x i8>, align 16
+ %0 = tail call i64 @llvm.aarch64.sme.cntsb()
+ call void @foo(ptr noundef nonnull %Data1, ptr noundef nonnull %Data2, ptr noundef nonnull %Data3, i64 noundef %0)
+ %1 = load <vscale x 16 x i8>, ptr %Data1, align 16
+ %vecext = extractelement <vscale x 16 x i8> %1, i64 0
+ ret i8 %vecext
+}
+
+declare i64 @llvm.aarch64.sme.cntsb()
+
+declare void @foo(ptr noundef, ptr noundef, ptr noundef, i64 noundef)
+
+attributes #0 = { nounwind vscale_range(1,16) "aarch64_pstate_sm_enabled" }
More information about the llvm-commits
mailing list