[clang] [llvm] [AArch64][SME] Add pass remarks to the MachineSMEABIPass (PR #170277)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 16 06:47:32 PST 2025
https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/170277
>From af452bdabbad1131817f3c898ff0060a4ed2e665 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 1 Dec 2025 11:33:14 +0000
Subject: [PATCH 1/4] [AArch64][SME] Add pass remarks to the MachineSMEABIPass
This patch implements SME ABI pass remarks for when:
* a lazy ZA save is set up
* a full/agnostic ZA save is set up
* a ZT0 spill is emitted
These remarks can be enabled with `--pass-remarks-analysis=sme` (like
the current SME remarks).
The remarks work by noting where the save/spill was emitted, then
pointing out calls later in the function that require the save. The
remarks stop at the first call found along any path.
For example:
```
7: void merge_paths(bool a) __arm_inout("za") {
8: if (a)
9: private_za_callee_a();
10: else
11: private_za_callee_b();
12: private_za_callee_c(); // no remark (does not require a new save)
13: }
```
Results in the following remarks:
```
remark: foo.c:8:7: lazy save of ZA emitted in 'merge_paths'
remark: foo.c:9:5: call to 'private_za_callee_a' requires ZA save
remark: foo.c:11:5: call to 'private_za_callee_b' requires ZA save
```
Change-Id: Ia7abd6f4dbeca3c9ac359df7f5251f41d1f68a8c
---
llvm/lib/Target/AArch64/MachineSMEABIPass.cpp | 124 +++++++++++++++++-
.../AArch64/sme-abi-save-call-remarks.ll | 119 +++++++++++++++++
.../AArch64/sme-lazy-save-call-remarks.ll | 25 ----
3 files changed, 239 insertions(+), 29 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/sme-abi-save-call-remarks.ll
delete mode 100644 llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll
diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
index b3e1ddbb91f79..98d14d238aad1 100644
--- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
+++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
@@ -63,6 +63,7 @@
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -303,6 +304,7 @@ struct MachineSMEABI : public MachineFunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<EdgeBundlesWrapperLegacy>();
+ AU.addRequired<MachineOptimizationRemarkEmitterPass>();
AU.addPreservedID(MachineLoopInfoID);
AU.addPreservedID(MachineDominatorsID);
MachineFunctionPass::getAnalysisUsage(AU);
@@ -391,6 +393,19 @@ struct MachineSMEABI : public MachineFunctionPass {
return emitAllocateLazySaveBuffer(Context, MBB, MBBI);
}
+ /// Collects the reachable calls from \p MBBI marked with \p Marker. This is
+ /// intended to be used to emit lazy save remarks. Note: This stops at the
+ /// first marked call along any path.
+ void collectReachableMarkedCalls(const MachineBasicBlock &MBB,
+ MachineBasicBlock::const_iterator MBBI,
+ SmallVectorImpl<const MachineInstr *> &Calls,
+ unsigned Marker) const;
+
+ void emitCallSaveRemarks(const MachineBasicBlock &MBB,
+ MachineBasicBlock::const_iterator MBBI, DebugLoc DL,
+ unsigned Marker, StringRef RemarkName,
+ StringRef SaveName) const;
+
/// Save live physical registers to virtual registers.
PhysRegSave createPhysRegSave(LiveRegs PhysLiveRegs, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc DL);
@@ -406,6 +421,8 @@ struct MachineSMEABI : public MachineFunctionPass {
const AArch64RegisterInfo *TRI = nullptr;
const AArch64FunctionInfo *AFI = nullptr;
const TargetInstrInfo *TII = nullptr;
+
+ MachineOptimizationRemarkEmitter *ORE = nullptr;
MachineRegisterInfo *MRI = nullptr;
MachineLoopInfo *MLI = nullptr;
};
@@ -706,9 +723,91 @@ void MachineSMEABI::insertStateChanges(EmitContext &Context,
static DebugLoc getDebugLoc(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
- if (MBBI != MBB.end())
- return MBBI->getDebugLoc();
- return DebugLoc();
+ if (MBB.empty())
+ return DebugLoc();
+ return MBBI != MBB.end() ? MBBI->getDebugLoc() : MBB.back().getDebugLoc();
+}
+
+static bool findMarkedCall(const MachineBasicBlock &MBB,
+ MachineBasicBlock::const_iterator MBBI,
+ SmallVectorImpl<const MachineInstr *> &Calls,
+ unsigned Marker, unsigned CallDestroyOpcode) {
+ auto IsMarker = [&](auto &MI) { return MI.getOpcode() == Marker; };
+ auto MarkerInst = std::find_if(MBBI, MBB.end(), IsMarker);
+ if (MarkerInst == MBB.end())
+ return false;
+ MachineBasicBlock::const_iterator I = MarkerInst;
+ while (++I != MBB.end()) {
+ if (I->isCall() || I->getOpcode() == CallDestroyOpcode)
+ break;
+ }
+ if (I != MBB.end() && I->isCall())
+ Calls.push_back(&*I);
+ // Note: This function always returns true if a "Marker" was found.
+ return true;
+}
+
+void MachineSMEABI::collectReachableMarkedCalls(
+ const MachineBasicBlock &StartMBB,
+ MachineBasicBlock::const_iterator StartInst,
+ SmallVectorImpl<const MachineInstr *> &Calls, unsigned Marker) const {
+ assert(Marker == AArch64::InOutZAUsePseudo ||
+ Marker == AArch64::RequiresZASavePseudo ||
+ Marker == AArch64::RequiresZT0SavePseudo);
+ unsigned CallDestroyOpcode = TII->getCallFrameDestroyOpcode();
+ if (findMarkedCall(StartMBB, StartInst, Calls, Marker, CallDestroyOpcode))
+ return;
+
+ SmallPtrSet<const MachineBasicBlock *, 4> Visited;
+ SmallVector<const MachineBasicBlock *> Worklist(StartMBB.succ_rbegin(),
+ StartMBB.succ_rend());
+ while (!Worklist.empty()) {
+ const MachineBasicBlock *MBB = Worklist.pop_back_val();
+ auto [_, Inserted] = Visited.insert(MBB);
+ if (!Inserted)
+ continue;
+
+ if (!findMarkedCall(*MBB, MBB->begin(), Calls, Marker, CallDestroyOpcode))
+ Worklist.append(MBB->succ_rbegin(), MBB->succ_rend());
+ }
+}
+
+static StringRef getCalleeName(const MachineInstr &CallInst) {
+ assert(CallInst.isCall() && "expected a call");
+ for (const MachineOperand &MO : CallInst.operands()) {
+ if (MO.isSymbol())
+ return MO.getSymbolName();
+ if (MO.isGlobal())
+ return MO.getGlobal()->getName();
+ }
+ return {};
+}
+
+void MachineSMEABI::emitCallSaveRemarks(const MachineBasicBlock &MBB,
+ MachineBasicBlock::const_iterator MBBI,
+ DebugLoc DL, unsigned Marker,
+ StringRef RemarkName,
+ StringRef SaveName) const {
+ auto SaveRemark = [&](DebugLoc DL, const MachineBasicBlock &MBB) {
+ return MachineOptimizationRemarkAnalysis("sme", RemarkName, DL, &MBB);
+ };
+ StringRef StateName = Marker == AArch64::RequiresZT0SavePseudo ? "ZT0" : "ZA";
+ ORE->emit([&] {
+ return SaveRemark(DL, MBB) << SaveName << " of " << StateName
+ << " emitted in '" << MF->getName() << "'";
+ });
+ if (!ORE->allowExtraAnalysis("sme"))
+ return;
+ SmallVector<const MachineInstr *> CallsRequiringSaves;
+ collectReachableMarkedCalls(MBB, MBBI, CallsRequiringSaves, Marker);
+ for (const MachineInstr *CallInst : CallsRequiringSaves) {
+ auto R = SaveRemark(CallInst->getDebugLoc(), *CallInst->getParent());
+ R << "call";
+ if (StringRef CalleeName = getCalleeName(*CallInst); !CalleeName.empty())
+ R << " to '" << CalleeName << "'";
+ R << " requires " << StateName << " save";
+ ORE->emit(R);
+ }
}
void MachineSMEABI::emitSetupLazySave(EmitContext &Context,
@@ -716,6 +815,9 @@ void MachineSMEABI::emitSetupLazySave(EmitContext &Context,
MachineBasicBlock::iterator MBBI) {
DebugLoc DL = getDebugLoc(MBB, MBBI);
+ emitCallSaveRemarks(MBB, MBBI, DL, AArch64::RequiresZASavePseudo,
+ "SMELazySaveZA", "lazy save");
+
// Get pointer to TPIDR2 block.
Register TPIDR2 = MRI->createVirtualRegister(&AArch64::GPR64spRegClass);
Register TPIDR2Ptr = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
@@ -926,12 +1028,17 @@ void MachineSMEABI::emitFullZASaveRestore(EmitContext &Context,
MachineBasicBlock::iterator MBBI,
LiveRegs PhysLiveRegs, bool IsSave) {
auto *TLI = Subtarget->getTargetLowering();
+
DebugLoc DL = getDebugLoc(MBB, MBBI);
- Register BufferPtr = AArch64::X0;
+
+ if (IsSave)
+ emitCallSaveRemarks(MBB, MBBI, DL, AArch64::RequiresZASavePseudo,
+ "SMEFullZASave", "full save");
PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs, MBB, MBBI, DL);
// Copy the buffer pointer into X0.
+ Register BufferPtr = AArch64::X0;
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), BufferPtr)
.addReg(Context.getAgnosticZABufferPtr(*MF));
@@ -952,6 +1059,14 @@ void MachineSMEABI::emitZT0SaveRestore(EmitContext &Context,
MachineBasicBlock::iterator MBBI,
bool IsSave) {
DebugLoc DL = getDebugLoc(MBB, MBBI);
+
+ // Note: This will report calls that _only_ need ZT0 saved. Call that save
+ // both ZA and ZT0 will be under the SMELazySaveZA remark. This prevents
+ // reporting the same calls twice.
+ if (IsSave)
+ emitCallSaveRemarks(MBB, MBBI, DL, AArch64::RequiresZT0SavePseudo,
+ "SMEZT0Save", "spill");
+
Register ZT0Save = MRI->createVirtualRegister(&AArch64::GPR64spRegClass);
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), ZT0Save)
@@ -1139,6 +1254,7 @@ bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) {
this->MF = &MF;
Subtarget = &MF.getSubtarget<AArch64Subtarget>();
+ ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
TII = Subtarget->getInstrInfo();
TRI = Subtarget->getRegisterInfo();
MRI = &MF.getRegInfo();
diff --git a/llvm/test/CodeGen/AArch64/sme-abi-save-call-remarks.ll b/llvm/test/CodeGen/AArch64/sme-abi-save-call-remarks.ll
new file mode 100644
index 0000000000000..2ce2231fa740f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-abi-save-call-remarks.ll
@@ -0,0 +1,119 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64 -mattr=+sme2 --pass-remarks-analysis=sme -o /dev/null < %s 2>&1 | FileCheck %s
+; RUN: llc -mtriple=aarch64 -mattr=+sme2 --aarch64-new-sme-abi --pass-remarks-analysis=sme -o /dev/null < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NEWLOWERING
+
+declare void @private_za_callee()
+declare void @private_za_callee_a()
+declare void @private_za_callee_b()
+declare void @private_za_callee_c()
+
+declare void @shared_za_callee() "aarch64_inout_za"
+declare void @shared_za_zt0_callee() "aarch64_inout_za" "aarch64_inout_zt0"
+
+; Note: These remarks are more useful with source debug info (which gives line numbers for `<unknown>:0:0`).
+
+define void @test_lazy_save_1_callee() nounwind "aarch64_inout_za" {
+; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_1_callee' to 'private_za_callee' sets up a lazy save for ZA
+
+; CHECK-NEWLOWERING: remark: <unknown>:0:0: lazy save of ZA emitted in 'test_lazy_save_1_callee'
+; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call to 'private_za_callee' requires ZA save
+ call void @private_za_callee()
+ ret void
+}
+
+define void @test_lazy_save_2_callees() nounwind "aarch64_inout_za" {
+; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA
+; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA
+
+; CHECK-NEWLOWERING: remark: <unknown>:0:0: lazy save of ZA emitted in 'test_lazy_save_2_callees'
+; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call to 'private_za_callee' requires ZA save
+ call void @private_za_callee()
+ call void @private_za_callee()
+ ret void
+}
+
+define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_inout_za" {
+; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_expanded_intrinsic' to 'cosf' sets up a lazy save for ZA
+
+; CHECK-NEWLOWERING: remark: <unknown>:0:0: lazy save of ZA emitted in 'test_lazy_save_expanded_intrinsic'
+; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call to 'cosf' requires ZA save
+ %res = call float @llvm.cos.f32(float %a)
+ ret float %res
+}
+
+define void @test_lazy_save_multiple_paths(i1 %a) "aarch64_inout_za" {
+; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_multiple_paths' to 'private_za_callee_a' sets up a lazy save for ZA
+; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_multiple_paths' to 'private_za_callee_b' sets up a lazy save for ZA
+; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_multiple_paths' to 'private_za_callee_c' sets up a lazy save for ZA
+
+; CHECK-NEWLOWERING: remark: <unknown>:0:0: lazy save of ZA emitted in 'test_lazy_save_multiple_paths'
+; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call to 'private_za_callee_b' requires ZA save
+; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call to 'private_za_callee_a' requires ZA save
+entry:
+ br i1 %a, label %if.end, label %if.else
+
+if.else:
+ call void @private_za_callee_a()
+ br label %if.end
+
+if.end:
+ call void @private_za_callee_b()
+ ; The new lowering won't report this call as the save is already needed due to
+ ; the call to `private_za_callee_b()`.
+ call void @private_za_callee_c()
+
+ ret void
+}
+
+define void @test_lazy_save_with_zt0() "aarch64_inout_za" "aarch64_inout_zt0"
+{
+; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_with_zt0' to 'private_za_callee' sets up a lazy save for ZA
+
+; CHECK-NEWLOWERING: remark: <unknown>:0:0: spill of ZT0 emitted in 'test_lazy_save_with_zt0'
+; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call to 'shared_za_callee' requires ZT0 save
+; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: lazy save of ZA emitted in 'test_lazy_save_with_zt0'
+; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call to 'private_za_callee' requires ZA save
+ call void @shared_za_callee() ; Save ZT0 (remark ZT0 spill)
+ call void @private_za_callee() ; Save ZA (remark ZA save)
+ ret void
+}
+
+define void @test_lazy_save_with_zt0_reload() "aarch64_inout_za" "aarch64_inout_zt0"
+{
+; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_with_zt0_reload' to 'private_za_callee' sets up a lazy save for ZA
+
+; CHECK-NEWLOWERING: remark: <unknown>:0:0: spill of ZT0 emitted in 'test_lazy_save_with_zt0_reload'
+; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call to 'shared_za_callee' requires ZT0 save
+; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: spill of ZT0 emitted in 'test_lazy_save_with_zt0_reload'
+; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: lazy save of ZA emitted in 'test_lazy_save_with_zt0_reload'
+; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call to 'private_za_callee' requires ZA save
+ call void @shared_za_callee() ; Save ZT0 (remark ZT0 spill)
+ call void @shared_za_zt0_callee() ; Reload ZT0
+ call void @private_za_callee() ; Save ZA, ZT0 (remark ZT0 spill and ZA save)
+ ret void
+}
+
+define void @test_za_merge_paths(i1 %a) "aarch64_za_state_agnostic" {
+;; Note: The old lowering does not emit any remarks for agnostic ZA saves.
+
+; CHECK-NEWLOWERING: remark: <unknown>:0:0: full save of ZA emitted in 'test_za_merge_paths'
+; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call to 'private_za_callee_b' requires ZA save
+; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call to 'private_za_callee_a' requires ZA save
+entry:
+ br i1 %a, label %if.end, label %if.else
+
+if.else:
+ call void @private_za_callee_a()
+ br label %exit
+
+if.end:
+ call void @private_za_callee_b()
+ br label %exit
+
+exit:
+ ; The new lowering won't report this call as the save is already needed due to
+ ; the call to `private_za_callee_*()` calls on both paths to this BB.
+ call void @private_za_callee_c()
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll
deleted file mode 100644
index 65e50842d5d78..0000000000000
--- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64 -mattr=+sme --pass-remarks-analysis=sme -o /dev/null < %s 2>&1 | FileCheck %s
-
-declare void @private_za_callee()
-declare float @llvm.cos.f32(float)
-
-define void @test_lazy_save_1_callee() nounwind "aarch64_inout_za" {
-; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_1_callee' to 'private_za_callee' sets up a lazy save for ZA
- call void @private_za_callee()
- ret void
-}
-
-define void @test_lazy_save_2_callees() nounwind "aarch64_inout_za" {
-; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA
- call void @private_za_callee()
-; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA
- call void @private_za_callee()
- ret void
-}
-
-define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_inout_za" {
-; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_expanded_intrinsic' to 'cosf' sets up a lazy save for ZA
- %res = call float @llvm.cos.f32(float %a)
- ret float %res
-}
>From f19ed905253051aa5b5ccdef8b647f7825b02406 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 16 Dec 2025 11:46:48 +0000
Subject: [PATCH 2/4] Fixups
Change-Id: I55f2ce3ba25c7b79ed4b07d78fd0cdc5f07f2a38
---
clang/test/CodeGen/AArch64/sme-remarks.c | 69 +++++++++++++++++++
llvm/lib/Target/AArch64/MachineSMEABIPass.cpp | 4 ++
.../AArch64/sme-abi-save-call-remarks.ll | 9 +++
3 files changed, 82 insertions(+)
create mode 100644 clang/test/CodeGen/AArch64/sme-remarks.c
diff --git a/clang/test/CodeGen/AArch64/sme-remarks.c b/clang/test/CodeGen/AArch64/sme-remarks.c
new file mode 100644
index 0000000000000..6dc52a69f9d1c
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sme-remarks.c
@@ -0,0 +1,69 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -o /dev/null -Rpass-analysis=sme %s 2>&1 | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -o /dev/null -Rpass-analysis=sme %s -mllvm -aarch64-new-sme-abi 2>&1 | FileCheck %s --check-prefix=CHECK-NEWLOWERING
+
+void private_za_callee_a();
+void private_za_callee_b();
+void private_za_callee_c();
+
+void test_za_merge_paths(int a) __arm_inout("za") {
+ if (a != 0)
+ private_za_callee_a();
+ else
+ private_za_callee_b();
+ // The new lowering won't report this call as the save is already needed due
+ // to the call to `private_za_callee_*()` calls on both paths to this BB.
+ private_za_callee_c();
+}
+
+void test_lazy_save_multiple_paths(int a) __arm_inout("za") {
+ if (a != 0)
+ private_za_callee_a();
+ else {
+ private_za_callee_b();
+ // The new lowering won't report this call as the save is already needed due
+ // to the call to `private_za_callee_b()`.
+ private_za_callee_c();
+ }
+}
+
+// CHECK: sme-remarks.c:14:5: remark: call from 'test_za_merge_paths' to 'unknown callee' sets up a lazy save for ZA [-Rpass-analysis=sme]
+// CHECK-NEXT: 14 | private_za_callee_b();
+// CHECK-NEXT: | ^
+// CHECK-NEXT: sme-remarks.c:12:5: remark: call from 'test_za_merge_paths' to 'unknown callee' sets up a lazy save for ZA [-Rpass-analysis=sme]
+// CHECK-NEXT: 12 | private_za_callee_a();
+// CHECK-NEXT: | ^
+// CHECK-NEXT: sme-remarks.c:17:3: remark: call from 'test_za_merge_paths' to 'unknown callee' sets up a lazy save for ZA [-Rpass-analysis=sme]
+// CHECK-NEXT: 17 | private_za_callee_c();
+// CHECK-NEXT: | ^
+
+// CHECK: sme-remarks.c:24:5: remark: call from 'test_lazy_save_multiple_paths' to 'unknown callee' sets up a lazy save for ZA [-Rpass-analysis=sme]
+// CHECK-NEXT: 24 | private_za_callee_b();
+// CHECK-NEXT: | ^
+// CHECK-NEXT: sme-remarks.c:27:5: remark: call from 'test_lazy_save_multiple_paths' to 'unknown callee' sets up a lazy save for ZA [-Rpass-analysis=sme]
+// CHECK-NEXT: 27 | private_za_callee_c();
+// CHECK-NEXT: | ^
+// CHECK-NEXT: sme-remarks.c:22:5: remark: call from 'test_lazy_save_multiple_paths' to 'unknown callee' sets up a lazy save for ZA [-Rpass-analysis=sme]
+// CHECK-NEXT: 22 | private_za_callee_a();
+// CHECK-NEXT: | ^
+
+// CHECK-NEWLOWERING: sme-remarks.c:11:9: remark: lazy save of ZA emitted in 'test_za_merge_paths' [-Rpass-analysis=sme]
+// CHECK-NEWLOWERING-NEXT: 11 | if (a != 0)
+// CHECK-NEWLOWERING-NEXT: | ^
+// CHECK-NEWLOWERING-NEXT: sme-remarks.c:12:5: remark: call to 'private_za_callee_a' requires ZA save [-Rpass-analysis=sme]
+// CHECK-NEWLOWERING-NEXT: 12 | private_za_callee_a();
+// CHECK-NEWLOWERING-NEXT: | ^
+// CHECK-NEWLOWERING-NEXT: sme-remarks.c:14:5: remark: call to 'private_za_callee_b' requires ZA save [-Rpass-analysis=sme]
+// CHECK-NEWLOWERING-NEXT: 14 | private_za_callee_b();
+// CHECK-NEWLOWERING-NEXT: | ^
+
+// CHECK-NEWLOWERING: sme-remarks.c:21:9: remark: lazy save of ZA emitted in 'test_lazy_save_multiple_paths' [-Rpass-analysis=sme]
+// CHECK-NEWLOWERING-NEXT: 21 | if (a != 0)
+// CHECK-NEWLOWERING-NEXT: | ^
+// CHECK-NEWLOWERING-NEXT: sme-remarks.c:22:5: remark: call to 'private_za_callee_a' requires ZA save [-Rpass-analysis=sme]
+// CHECK-NEWLOWERING-NEXT: 22 | private_za_callee_a();
+// CHECK-NEWLOWERING-NEXT: | ^
+// CHECK-NEWLOWERING-NEXT: sme-remarks.c:24:5: remark: call to 'private_za_callee_b' requires ZA save [-Rpass-analysis=sme]
+// CHECK-NEWLOWERING-NEXT: 24 | private_za_callee_b();
+// CHECK-NEWLOWERING-NEXT: | ^
diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
index 98d14d238aad1..823c754a0ac05 100644
--- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
+++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
@@ -728,6 +728,10 @@ static DebugLoc getDebugLoc(MachineBasicBlock &MBB,
return MBBI != MBB.end() ? MBBI->getDebugLoc() : MBB.back().getDebugLoc();
}
+/// Finds the first call (as determined by MachineInstr::isCall()) starting from
+/// \p MBBI in \p MBB marked with \p Marker (which is a marker opcode such as
+/// RequiresZASavePseudo). If a marked call is found, it is pushed to \p Calls
+/// and the function returns true.
static bool findMarkedCall(const MachineBasicBlock &MBB,
MachineBasicBlock::const_iterator MBBI,
SmallVectorImpl<const MachineInstr *> &Calls,
diff --git a/llvm/test/CodeGen/AArch64/sme-abi-save-call-remarks.ll b/llvm/test/CodeGen/AArch64/sme-abi-save-call-remarks.ll
index 2ce2231fa740f..755dcfbf17ba4 100644
--- a/llvm/test/CodeGen/AArch64/sme-abi-save-call-remarks.ll
+++ b/llvm/test/CodeGen/AArch64/sme-abi-save-call-remarks.ll
@@ -117,3 +117,12 @@ exit:
ret void
}
+
+define void @test_lazy_save_function_ptr_callee(ptr %private_za_callee) nounwind "aarch64_inout_za" {
+; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_function_ptr_callee' to 'unknown callee' sets up a lazy save for ZA
+
+; CHECK-NEWLOWERING: remark: <unknown>:0:0: lazy save of ZA emitted in 'test_lazy_save_function_ptr_callee'
+; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call requires ZA save
+ call void %private_za_callee()
+ ret void
+}
>From eed6e0f197e62d506c05c3ddf42b5a360cb8c35d Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 16 Dec 2025 14:22:06 +0000
Subject: [PATCH 3/4] Update checks
Change-Id: I156deeb2162bed195de6529f1d1fbe805c9a3589
---
clang/test/CodeGen/AArch64/sme-remarks.c | 60 ++++++------------------
1 file changed, 14 insertions(+), 46 deletions(-)
diff --git a/clang/test/CodeGen/AArch64/sme-remarks.c b/clang/test/CodeGen/AArch64/sme-remarks.c
index 6dc52a69f9d1c..85ddb346e0379 100644
--- a/clang/test/CodeGen/AArch64/sme-remarks.c
+++ b/clang/test/CodeGen/AArch64/sme-remarks.c
@@ -1,69 +1,37 @@
// REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -o /dev/null -Rpass-analysis=sme %s 2>&1 | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -o /dev/null -Rpass-analysis=sme %s -mllvm -aarch64-new-sme-abi 2>&1 | FileCheck %s --check-prefix=CHECK-NEWLOWERING
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -Rpass-analysis=sme -verify %s -S -o /dev/null
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -mllvm -aarch64-new-sme-abi -Rpass-analysis=sme -verify=expected-new %s -S -o /dev/null %s
void private_za_callee_a();
void private_za_callee_b();
void private_za_callee_c();
void test_za_merge_paths(int a) __arm_inout("za") {
+ // expected-new-remark at +1 {{lazy save of ZA emitted in 'test_za_merge_paths'}}
if (a != 0)
+ // expected-remark at +2 {{call from 'test_za_merge_paths' to 'unknown callee' sets up a lazy save for ZA}}
+ // expected-new-remark at +1 {{call to 'private_za_callee_a' requires ZA save}}
private_za_callee_a();
else
+ // expected-remark at +2 {{call from 'test_za_merge_paths' to 'unknown callee' sets up a lazy save for ZA}}
+ // expected-new-remark at +1 {{call to 'private_za_callee_b' requires ZA save}}
private_za_callee_b();
- // The new lowering won't report this call as the save is already needed due
- // to the call to `private_za_callee_*()` calls on both paths to this BB.
+ // expected-remark at +1 {{call from 'test_za_merge_paths' to 'unknown callee' sets up a lazy save for ZA}}
private_za_callee_c();
}
void test_lazy_save_multiple_paths(int a) __arm_inout("za") {
+ // expected-new-remark at +1 {{lazy save of ZA emitted in 'test_lazy_save_multiple_paths'}}
if (a != 0)
+ // expected-remark at +2 {{call from 'test_lazy_save_multiple_paths' to 'unknown callee' sets up a lazy save for ZA}}
+ // expected-new-remark at +1 {{call to 'private_za_callee_a' requires ZA save}}
private_za_callee_a();
else {
+ // expected-remark at +2 {{call from 'test_lazy_save_multiple_paths' to 'unknown callee' sets up a lazy save for ZA}}
+ // expected-new-remark at +1 {{call to 'private_za_callee_b' requires ZA save}}
private_za_callee_b();
- // The new lowering won't report this call as the save is already needed due
- // to the call to `private_za_callee_b()`.
+ // expected-remark at +1 {{call from 'test_lazy_save_multiple_paths' to 'unknown callee' sets up a lazy save for ZA}}
private_za_callee_c();
}
}
-
-// CHECK: sme-remarks.c:14:5: remark: call from 'test_za_merge_paths' to 'unknown callee' sets up a lazy save for ZA [-Rpass-analysis=sme]
-// CHECK-NEXT: 14 | private_za_callee_b();
-// CHECK-NEXT: | ^
-// CHECK-NEXT: sme-remarks.c:12:5: remark: call from 'test_za_merge_paths' to 'unknown callee' sets up a lazy save for ZA [-Rpass-analysis=sme]
-// CHECK-NEXT: 12 | private_za_callee_a();
-// CHECK-NEXT: | ^
-// CHECK-NEXT: sme-remarks.c:17:3: remark: call from 'test_za_merge_paths' to 'unknown callee' sets up a lazy save for ZA [-Rpass-analysis=sme]
-// CHECK-NEXT: 17 | private_za_callee_c();
-// CHECK-NEXT: | ^
-
-// CHECK: sme-remarks.c:24:5: remark: call from 'test_lazy_save_multiple_paths' to 'unknown callee' sets up a lazy save for ZA [-Rpass-analysis=sme]
-// CHECK-NEXT: 24 | private_za_callee_b();
-// CHECK-NEXT: | ^
-// CHECK-NEXT: sme-remarks.c:27:5: remark: call from 'test_lazy_save_multiple_paths' to 'unknown callee' sets up a lazy save for ZA [-Rpass-analysis=sme]
-// CHECK-NEXT: 27 | private_za_callee_c();
-// CHECK-NEXT: | ^
-// CHECK-NEXT: sme-remarks.c:22:5: remark: call from 'test_lazy_save_multiple_paths' to 'unknown callee' sets up a lazy save for ZA [-Rpass-analysis=sme]
-// CHECK-NEXT: 22 | private_za_callee_a();
-// CHECK-NEXT: | ^
-
-// CHECK-NEWLOWERING: sme-remarks.c:11:9: remark: lazy save of ZA emitted in 'test_za_merge_paths' [-Rpass-analysis=sme]
-// CHECK-NEWLOWERING-NEXT: 11 | if (a != 0)
-// CHECK-NEWLOWERING-NEXT: | ^
-// CHECK-NEWLOWERING-NEXT: sme-remarks.c:12:5: remark: call to 'private_za_callee_a' requires ZA save [-Rpass-analysis=sme]
-// CHECK-NEWLOWERING-NEXT: 12 | private_za_callee_a();
-// CHECK-NEWLOWERING-NEXT: | ^
-// CHECK-NEWLOWERING-NEXT: sme-remarks.c:14:5: remark: call to 'private_za_callee_b' requires ZA save [-Rpass-analysis=sme]
-// CHECK-NEWLOWERING-NEXT: 14 | private_za_callee_b();
-// CHECK-NEWLOWERING-NEXT: | ^
-
-// CHECK-NEWLOWERING: sme-remarks.c:21:9: remark: lazy save of ZA emitted in 'test_lazy_save_multiple_paths' [-Rpass-analysis=sme]
-// CHECK-NEWLOWERING-NEXT: 21 | if (a != 0)
-// CHECK-NEWLOWERING-NEXT: | ^
-// CHECK-NEWLOWERING-NEXT: sme-remarks.c:22:5: remark: call to 'private_za_callee_a' requires ZA save [-Rpass-analysis=sme]
-// CHECK-NEWLOWERING-NEXT: 22 | private_za_callee_a();
-// CHECK-NEWLOWERING-NEXT: | ^
-// CHECK-NEWLOWERING-NEXT: sme-remarks.c:24:5: remark: call to 'private_za_callee_b' requires ZA save [-Rpass-analysis=sme]
-// CHECK-NEWLOWERING-NEXT: 24 | private_za_callee_b();
-// CHECK-NEWLOWERING-NEXT: | ^
>From 526f805f586ebf0b293f3a82a15d2b71571e3d9c Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 16 Dec 2025 14:46:07 +0000
Subject: [PATCH 4/4] Add comments
Change-Id: If96d9eae728b2d58e4e8c54a974e123ccb45fbd4
---
clang/test/CodeGen/AArch64/sme-remarks.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/clang/test/CodeGen/AArch64/sme-remarks.c b/clang/test/CodeGen/AArch64/sme-remarks.c
index 85ddb346e0379..fd144b8a6c425 100644
--- a/clang/test/CodeGen/AArch64/sme-remarks.c
+++ b/clang/test/CodeGen/AArch64/sme-remarks.c
@@ -17,7 +17,9 @@ void test_za_merge_paths(int a) __arm_inout("za") {
// expected-remark at +2 {{call from 'test_za_merge_paths' to 'unknown callee' sets up a lazy save for ZA}}
// expected-new-remark at +1 {{call to 'private_za_callee_b' requires ZA save}}
private_za_callee_b();
- // expected-remark at +1 {{call from 'test_za_merge_paths' to 'unknown callee' sets up a lazy save for ZA}}
+ // expected-remark at +3 {{call from 'test_za_merge_paths' to 'unknown callee' sets up a lazy save for ZA}}
+ /// The new lowering won't report this call as the save is already needed due
+ /// to the call to `private_za_callee_a/b()` calls on both paths to this call.
private_za_callee_c();
}
@@ -31,7 +33,9 @@ void test_lazy_save_multiple_paths(int a) __arm_inout("za") {
// expected-remark at +2 {{call from 'test_lazy_save_multiple_paths' to 'unknown callee' sets up a lazy save for ZA}}
// expected-new-remark at +1 {{call to 'private_za_callee_b' requires ZA save}}
private_za_callee_b();
- // expected-remark at +1 {{call from 'test_lazy_save_multiple_paths' to 'unknown callee' sets up a lazy save for ZA}}
+ // expected-remark at +3 {{call from 'test_lazy_save_multiple_paths' to 'unknown callee' sets up a lazy save for ZA}}
+ /// The new lowering won't report this call as the save is already needed
+ /// due to the call to `private_za_callee_b()`.
private_za_callee_c();
}
}
More information about the llvm-commits
mailing list