[llvm] Reland "[AArch64][SME] Split SMECallAttrs out of SMEAttrs" (PR #138671)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Wed May 14 03:50:23 PDT 2025
https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/138671
>From e349e1c7e0174f70448e77d2bd8bb8fee7ac34bf Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 25 Apr 2025 07:58:43 +0000
Subject: [PATCH 1/8] [AArch64][SME] Split SMECallAttrs out of SMEAttrs (NFC)
SMECallAttrs is a new helper class that holds all the SMEAttrs for a
call. The interfaces to query actions needed for the call (e.g. change
streaming mode) have been moved to the SMECallAttrs class.
The main motivation for this change is to make the split between caller,
callee, and callsite attributes more apparent. Places that previously
implicitly checked callsite attributes have been updated to make these
checks explicit. Similarly, places known to only check callee or
callsite attributes have also been updated to make this clear.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 74 ++++++------
.../AArch64/AArch64TargetTransformInfo.cpp | 25 +++--
.../AArch64/Utils/AArch64SMEAttributes.cpp | 58 +++++-----
.../AArch64/Utils/AArch64SMEAttributes.h | 104 ++++++++++++-----
.../Target/AArch64/SMEAttributesTest.cpp | 106 +++++++++---------
5 files changed, 206 insertions(+), 161 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 13fb6a32233fe..8c0e758b6bc7a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8641,6 +8641,16 @@ static void analyzeCallOperands(const AArch64TargetLowering &TLI,
}
}
+static SMECallAttrs
+getSMECallAttrs(const Function &Function,
+ const TargetLowering::CallLoweringInfo &CLI) {
+ if (CLI.CB)
+ return SMECallAttrs(*CLI.CB);
+ if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
+ return SMECallAttrs(SMEAttrs(Function), SMEAttrs(ES->getSymbol()));
+ return SMECallAttrs(SMEAttrs(Function), SMEAttrs(SMEAttrs::Normal));
+}
+
bool AArch64TargetLowering::isEligibleForTailCallOptimization(
const CallLoweringInfo &CLI) const {
CallingConv::ID CalleeCC = CLI.CallConv;
@@ -8659,12 +8669,10 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
// SME Streaming functions are not eligible for TCO as they may require
// the streaming mode or ZA to be restored after returning from the call.
- SMEAttrs CallerAttrs(MF.getFunction());
- auto CalleeAttrs = CLI.CB ? SMEAttrs(*CLI.CB) : SMEAttrs(SMEAttrs::Normal);
- if (CallerAttrs.requiresSMChange(CalleeAttrs) ||
- CallerAttrs.requiresLazySave(CalleeAttrs) ||
- CallerAttrs.requiresPreservingAllZAState(CalleeAttrs) ||
- CallerAttrs.hasStreamingBody())
+ SMECallAttrs CallAttrs = getSMECallAttrs(CallerF, CLI);
+ if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() ||
+ CallAttrs.requiresPreservingAllZAState() ||
+ CallAttrs.caller().hasStreamingBody())
return false;
// Functions using the C or Fast calling convention that have an SVE signature
@@ -8956,14 +8964,13 @@ static SDValue emitSMEStateSaveRestore(const AArch64TargetLowering &TLI,
return TLI.LowerCallTo(CLI).second;
}
-static unsigned getSMCondition(const SMEAttrs &CallerAttrs,
- const SMEAttrs &CalleeAttrs) {
- if (!CallerAttrs.hasStreamingCompatibleInterface() ||
- CallerAttrs.hasStreamingBody())
+static unsigned getSMCondition(const SMECallAttrs &CallAttrs) {
+ if (!CallAttrs.caller().hasStreamingCompatibleInterface() ||
+ CallAttrs.caller().hasStreamingBody())
return AArch64SME::Always;
- if (CalleeAttrs.hasNonStreamingInterface())
+ if (CallAttrs.calleeOrCallsite().hasNonStreamingInterface())
return AArch64SME::IfCallerIsStreaming;
- if (CalleeAttrs.hasStreamingInterface())
+ if (CallAttrs.calleeOrCallsite().hasStreamingInterface())
return AArch64SME::IfCallerIsNonStreaming;
llvm_unreachable("Unsupported attributes");
@@ -9096,11 +9103,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
}
// Determine whether we need any streaming mode changes.
- SMEAttrs CalleeAttrs, CallerAttrs(MF.getFunction());
- if (CLI.CB)
- CalleeAttrs = SMEAttrs(*CLI.CB);
- else if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
- CalleeAttrs = SMEAttrs(ES->getSymbol());
+ SMECallAttrs CallAttrs = getSMECallAttrs(MF.getFunction(), CLI);
auto DescribeCallsite =
[&](OptimizationRemarkAnalysis &R) -> OptimizationRemarkAnalysis & {
@@ -9115,9 +9118,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
return R;
};
- bool RequiresLazySave = CallerAttrs.requiresLazySave(CalleeAttrs);
- bool RequiresSaveAllZA =
- CallerAttrs.requiresPreservingAllZAState(CalleeAttrs);
+ bool RequiresLazySave = CallAttrs.requiresLazySave();
+ bool RequiresSaveAllZA = CallAttrs.requiresPreservingAllZAState();
if (RequiresLazySave) {
const TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
MachinePointerInfo MPI =
@@ -9145,18 +9147,18 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
return DescribeCallsite(R) << " sets up a lazy save for ZA";
});
} else if (RequiresSaveAllZA) {
- assert(!CalleeAttrs.hasSharedZAInterface() &&
+ assert(!CallAttrs.calleeOrCallsite().hasSharedZAInterface() &&
"Cannot share state that may not exist");
Chain = emitSMEStateSaveRestore(*this, DAG, FuncInfo, DL, Chain,
/*IsSave=*/true);
}
SDValue PStateSM;
- bool RequiresSMChange = CallerAttrs.requiresSMChange(CalleeAttrs);
+ bool RequiresSMChange = CallAttrs.requiresSMChange();
if (RequiresSMChange) {
- if (CallerAttrs.hasStreamingInterfaceOrBody())
+ if (CallAttrs.caller().hasStreamingInterfaceOrBody())
PStateSM = DAG.getConstant(1, DL, MVT::i64);
- else if (CallerAttrs.hasNonStreamingInterface())
+ else if (CallAttrs.caller().hasNonStreamingInterface())
PStateSM = DAG.getConstant(0, DL, MVT::i64);
else
PStateSM = getRuntimePStateSM(DAG, Chain, DL, MVT::i64);
@@ -9173,7 +9175,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
SDValue ZTFrameIdx;
MachineFrameInfo &MFI = MF.getFrameInfo();
- bool ShouldPreserveZT0 = CallerAttrs.requiresPreservingZT0(CalleeAttrs);
+ bool ShouldPreserveZT0 = CallAttrs.requiresPreservingZT0();
// If the caller has ZT0 state which will not be preserved by the callee,
// spill ZT0 before the call.
@@ -9189,7 +9191,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// If caller shares ZT0 but the callee is not shared ZA, we need to stop
// PSTATE.ZA before the call if there is no lazy-save active.
- bool DisableZA = CallerAttrs.requiresDisablingZABeforeCall(CalleeAttrs);
+ bool DisableZA = CallAttrs.requiresDisablingZABeforeCall();
assert((!DisableZA || !RequiresLazySave) &&
"Lazy-save should have PSTATE.SM=1 on entry to the function");
@@ -9472,8 +9474,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
}
SDValue NewChain = changeStreamingMode(
- DAG, DL, CalleeAttrs.hasStreamingInterface(), Chain, InGlue,
- getSMCondition(CallerAttrs, CalleeAttrs), PStateSM);
+ DAG, DL, CallAttrs.calleeOrCallsite().hasStreamingInterface(), Chain,
+ InGlue, getSMCondition(CallAttrs), PStateSM);
Chain = NewChain.getValue(0);
InGlue = NewChain.getValue(1);
}
@@ -9659,8 +9661,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
if (RequiresSMChange) {
assert(PStateSM && "Expected a PStateSM to be set");
Result = changeStreamingMode(
- DAG, DL, !CalleeAttrs.hasStreamingInterface(), Result, InGlue,
- getSMCondition(CallerAttrs, CalleeAttrs), PStateSM);
+ DAG, DL, !CallAttrs.calleeOrCallsite().hasStreamingInterface(), Result,
+ InGlue, getSMCondition(CallAttrs), PStateSM);
if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
InGlue = Result.getValue(1);
@@ -9670,7 +9672,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
}
}
- if (CallerAttrs.requiresEnablingZAAfterCall(CalleeAttrs))
+ if (CallAttrs.requiresEnablingZAAfterCall())
// Unconditionally resume ZA.
Result = DAG.getNode(
AArch64ISD::SMSTART, DL, MVT::Other, Result,
@@ -28559,12 +28561,10 @@ bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
// Checks to allow the use of SME instructions
if (auto *Base = dyn_cast<CallBase>(&Inst)) {
- auto CallerAttrs = SMEAttrs(*Inst.getFunction());
- auto CalleeAttrs = SMEAttrs(*Base);
- if (CallerAttrs.requiresSMChange(CalleeAttrs) ||
- CallerAttrs.requiresLazySave(CalleeAttrs) ||
- CallerAttrs.requiresPreservingZT0(CalleeAttrs) ||
- CallerAttrs.requiresPreservingAllZAState(CalleeAttrs))
+ auto CallAttrs = SMECallAttrs(*Base);
+ if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() ||
+ CallAttrs.requiresPreservingZT0() ||
+ CallAttrs.requiresPreservingAllZAState())
return true;
}
return false;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 97e4993d52b4f..031d41d6ffdce 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -268,22 +268,21 @@ const FeatureBitset AArch64TTIImpl::InlineInverseFeatures = {
bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
- SMEAttrs CallerAttrs(*Caller), CalleeAttrs(*Callee);
+ SMECallAttrs CallAttrs(*Caller, *Callee);
// When inlining, we should consider the body of the function, not the
// interface.
- if (CalleeAttrs.hasStreamingBody()) {
- CalleeAttrs.set(SMEAttrs::SM_Compatible, false);
- CalleeAttrs.set(SMEAttrs::SM_Enabled, true);
+ if (CallAttrs.callee().hasStreamingBody()) {
+ CallAttrs.callee().set(SMEAttrs::SM_Compatible, false);
+ CallAttrs.callee().set(SMEAttrs::SM_Enabled, true);
}
- if (CalleeAttrs.isNewZA() || CalleeAttrs.isNewZT0())
+ if (CallAttrs.callee().isNewZA() || CallAttrs.callee().isNewZT0())
return false;
- if (CallerAttrs.requiresLazySave(CalleeAttrs) ||
- CallerAttrs.requiresSMChange(CalleeAttrs) ||
- CallerAttrs.requiresPreservingZT0(CalleeAttrs) ||
- CallerAttrs.requiresPreservingAllZAState(CalleeAttrs)) {
+ if (CallAttrs.requiresLazySave() || CallAttrs.requiresSMChange() ||
+ CallAttrs.requiresPreservingZT0() ||
+ CallAttrs.requiresPreservingAllZAState()) {
if (hasPossibleIncompatibleOps(Callee))
return false;
}
@@ -349,12 +348,14 @@ AArch64TTIImpl::getInlineCallPenalty(const Function *F, const CallBase &Call,
// streaming-mode change, and the call to G from F would also require a
// streaming-mode change, then there is benefit to do the streaming-mode
// change only once and avoid inlining of G into F.
+
SMEAttrs FAttrs(*F);
- SMEAttrs CalleeAttrs(Call);
- if (FAttrs.requiresSMChange(CalleeAttrs)) {
+ SMECallAttrs CallAttrs(Call);
+
+ if (SMECallAttrs(FAttrs, CallAttrs.calleeOrCallsite()).requiresSMChange()) {
if (F == Call.getCaller()) // (1)
return CallPenaltyChangeSM * DefaultCallPenalty;
- if (FAttrs.requiresSMChange(SMEAttrs(*Call.getCaller()))) // (2)
+ if (SMECallAttrs(FAttrs, CallAttrs.caller()).requiresSMChange()) // (2)
return InlineCallPenaltyChangeSM * DefaultCallPenalty;
}
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
index 76d2ac6a601e5..1085d618116eb 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
@@ -27,15 +27,14 @@ void SMEAttrs::set(unsigned M, bool Enable) {
"ZA_New and SME_ABI_Routine are mutually exclusive");
assert(
- (!sharesZA() ||
- (isNewZA() ^ isInZA() ^ isInOutZA() ^ isOutZA() ^ isPreservesZA())) &&
+ (isNewZA() + isInZA() + isOutZA() + isInOutZA() + isPreservesZA()) <= 1 &&
"Attributes 'aarch64_new_za', 'aarch64_in_za', 'aarch64_out_za', "
"'aarch64_inout_za' and 'aarch64_preserves_za' are mutually exclusive");
// ZT0 Attrs
assert(
- (!sharesZT0() || (isNewZT0() ^ isInZT0() ^ isInOutZT0() ^ isOutZT0() ^
- isPreservesZT0())) &&
+ (isNewZT0() + isInZT0() + isOutZT0() + isInOutZT0() + isPreservesZT0()) <=
+ 1 &&
"Attributes 'aarch64_new_zt0', 'aarch64_in_zt0', 'aarch64_out_zt0', "
"'aarch64_inout_zt0' and 'aarch64_preserves_zt0' are mutually exclusive");
@@ -44,27 +43,6 @@ void SMEAttrs::set(unsigned M, bool Enable) {
"interface");
}
-SMEAttrs::SMEAttrs(const CallBase &CB) {
- *this = SMEAttrs(CB.getAttributes());
- if (auto *F = CB.getCalledFunction()) {
- set(SMEAttrs(*F).Bitmask | SMEAttrs(F->getName()).Bitmask);
- }
-}
-
-SMEAttrs::SMEAttrs(StringRef FuncName) : Bitmask(0) {
- if (FuncName == "__arm_tpidr2_save" || FuncName == "__arm_sme_state")
- Bitmask |= (SMEAttrs::SM_Compatible | SMEAttrs::SME_ABI_Routine);
- if (FuncName == "__arm_tpidr2_restore")
- Bitmask |= SMEAttrs::SM_Compatible | encodeZAState(StateValue::In) |
- SMEAttrs::SME_ABI_Routine;
- if (FuncName == "__arm_sc_memcpy" || FuncName == "__arm_sc_memset" ||
- FuncName == "__arm_sc_memmove" || FuncName == "__arm_sc_memchr")
- Bitmask |= SMEAttrs::SM_Compatible;
- if (FuncName == "__arm_sme_save" || FuncName == "__arm_sme_restore" ||
- FuncName == "__arm_sme_state_size")
- Bitmask |= SMEAttrs::SM_Compatible | SMEAttrs::SME_ABI_Routine;
-}
-
SMEAttrs::SMEAttrs(const AttributeList &Attrs) {
Bitmask = 0;
if (Attrs.hasFnAttr("aarch64_pstate_sm_enabled"))
@@ -99,17 +77,39 @@ SMEAttrs::SMEAttrs(const AttributeList &Attrs) {
Bitmask |= encodeZT0State(StateValue::New);
}
-bool SMEAttrs::requiresSMChange(const SMEAttrs &Callee) const {
- if (Callee.hasStreamingCompatibleInterface())
+void SMEAttrs::addKnownFunctionAttrs(StringRef FuncName) {
+ unsigned KnownAttrs = SMEAttrs::Normal;
+ if (FuncName == "__arm_tpidr2_save" || FuncName == "__arm_sme_state")
+ KnownAttrs |= (SMEAttrs::SM_Compatible | SMEAttrs::SME_ABI_Routine);
+ if (FuncName == "__arm_tpidr2_restore")
+ KnownAttrs |= SMEAttrs::SM_Compatible | encodeZAState(StateValue::In) |
+ SMEAttrs::SME_ABI_Routine;
+ if (FuncName == "__arm_sc_memcpy" || FuncName == "__arm_sc_memset" ||
+ FuncName == "__arm_sc_memmove" || FuncName == "__arm_sc_memchr")
+ KnownAttrs |= SMEAttrs::SM_Compatible;
+ if (FuncName == "__arm_sme_save" || FuncName == "__arm_sme_restore" ||
+ FuncName == "__arm_sme_state_size")
+ KnownAttrs |= SMEAttrs::SM_Compatible | SMEAttrs::SME_ABI_Routine;
+ set(KnownAttrs, /*Enable=*/true);
+}
+
+bool SMECallAttrs::requiresSMChange() const {
+ if ((Callsite | Callee).hasStreamingCompatibleInterface())
return false;
// Both non-streaming
- if (hasNonStreamingInterfaceAndBody() && Callee.hasNonStreamingInterface())
+ if (Caller.hasNonStreamingInterfaceAndBody() &&
+ (Callsite | Callee).hasNonStreamingInterface())
return false;
// Both streaming
- if (hasStreamingInterfaceOrBody() && Callee.hasStreamingInterface())
+ if (Caller.hasStreamingInterfaceOrBody() &&
+ (Callsite | Callee).hasStreamingInterface())
return false;
return true;
}
+
+SMECallAttrs::SMECallAttrs(const CallBase &CB)
+ : SMECallAttrs(*CB.getFunction(), CB.getCalledFunction(),
+ CB.getAttributes()) {}
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
index 1691d4fec8b68..791bb891a18b7 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
@@ -18,12 +18,9 @@ class CallBase;
class AttributeList;
/// SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
-/// It helps determine a function's requirements for PSTATE.ZA and PSTATE.SM. It
-/// has interfaces to query whether a streaming mode change or lazy-save
-/// mechanism is required when going from one function to another (e.g. through
-/// a call).
+/// It helps determine a function's requirements for PSTATE.ZA and PSTATE.SM.
class SMEAttrs {
- unsigned Bitmask;
+ unsigned Bitmask = Normal;
public:
enum class StateValue {
@@ -43,18 +40,23 @@ class SMEAttrs {
SM_Body = 1 << 2, // aarch64_pstate_sm_body
SME_ABI_Routine = 1 << 3, // Used for SME ABI routines to avoid lazy saves
ZA_State_Agnostic = 1 << 4,
- ZT0_Undef = 1 << 5, // Use to mark ZT0 as undef to avoid spills
+ ZT0_Undef = 1 << 5, // Use to mark ZT0 as undef to avoid spills
ZA_Shift = 6,
ZA_Mask = 0b111 << ZA_Shift,
ZT0_Shift = 9,
ZT0_Mask = 0b111 << ZT0_Shift
};
- SMEAttrs(unsigned Mask = Normal) : Bitmask(0) { set(Mask); }
- SMEAttrs(const Function &F) : SMEAttrs(F.getAttributes()) {}
- SMEAttrs(const CallBase &CB);
+ SMEAttrs() = default;
+ SMEAttrs(unsigned Mask) { set(Mask); }
+ SMEAttrs(const Function *F)
+ : SMEAttrs(F ? F->getAttributes() : AttributeList()) {
+ if (F)
+ addKnownFunctionAttrs(F->getName());
+ }
+ SMEAttrs(const Function &F) : SMEAttrs(&F) {}
SMEAttrs(const AttributeList &L);
- SMEAttrs(StringRef FuncName);
+ SMEAttrs(StringRef FuncName) { addKnownFunctionAttrs(FuncName); };
void set(unsigned M, bool Enable = true);
@@ -74,10 +76,6 @@ class SMEAttrs {
return hasNonStreamingInterface() && !hasStreamingBody();
}
- /// \return true if a call from Caller -> Callee requires a change in
- /// streaming mode.
- bool requiresSMChange(const SMEAttrs &Callee) const;
-
// Interfaces to query ZA
static StateValue decodeZAState(unsigned Bitmask) {
return static_cast<StateValue>((Bitmask & ZA_Mask) >> ZA_Shift);
@@ -104,10 +102,7 @@ class SMEAttrs {
return !hasSharedZAInterface() && !hasAgnosticZAInterface();
}
bool hasZAState() const { return isNewZA() || sharesZA(); }
- bool requiresLazySave(const SMEAttrs &Callee) const {
- return hasZAState() && Callee.hasPrivateZAInterface() &&
- !(Callee.Bitmask & SME_ABI_Routine);
- }
+ bool isSMEABIRoutine() const { return Bitmask & SME_ABI_Routine; }
// Interfaces to query ZT0 State
static StateValue decodeZT0State(unsigned Bitmask) {
@@ -126,27 +121,76 @@ class SMEAttrs {
bool isPreservesZT0() const {
return decodeZT0State(Bitmask) == StateValue::Preserved;
}
- bool isUndefZT0() const { return Bitmask & ZT0_Undef; }
+ bool hasUndefZT0() const { return Bitmask & ZT0_Undef; }
bool sharesZT0() const {
StateValue State = decodeZT0State(Bitmask);
return State == StateValue::In || State == StateValue::Out ||
State == StateValue::InOut || State == StateValue::Preserved;
}
bool hasZT0State() const { return isNewZT0() || sharesZT0(); }
- bool requiresPreservingZT0(const SMEAttrs &Callee) const {
- return hasZT0State() && !Callee.isUndefZT0() && !Callee.sharesZT0() &&
- !Callee.hasAgnosticZAInterface();
+
+ SMEAttrs operator|(SMEAttrs Other) const {
+ SMEAttrs Merged(*this);
+ Merged.set(Other.Bitmask, /*Enable=*/true);
+ return Merged;
}
- bool requiresDisablingZABeforeCall(const SMEAttrs &Callee) const {
- return hasZT0State() && !hasZAState() && Callee.hasPrivateZAInterface() &&
- !(Callee.Bitmask & SME_ABI_Routine);
+
+private:
+ void addKnownFunctionAttrs(StringRef FuncName);
+};
+
+/// SMECallAttrs is a utility class to hold the SMEAttrs for a callsite. It has
+/// interfaces to query whether a streaming mode change or lazy-save mechanism
+/// is required when going from one function to another (e.g. through a call).
+class SMECallAttrs {
+ SMEAttrs Caller;
+ SMEAttrs Callee;
+ SMEAttrs Callsite;
+
+public:
+ SMECallAttrs(SMEAttrs Caller, SMEAttrs Callee,
+ SMEAttrs Callsite = SMEAttrs::Normal)
+ : Caller(Caller), Callee(Callee), Callsite(Callsite) {}
+
+ SMECallAttrs(const CallBase &CB);
+
+ SMEAttrs &caller() { return Caller; }
+ SMEAttrs &callee() { return Callee; }
+ SMEAttrs &callsite() { return Callsite; }
+ SMEAttrs const &caller() const { return Caller; }
+ SMEAttrs const &callee() const { return Callee; }
+ SMEAttrs const &callsite() const { return Callsite; }
+ SMEAttrs calleeOrCallsite() const { return Callsite | Callee; }
+
+ /// \return true if a call from Caller -> Callee requires a change in
+ /// streaming mode.
+ bool requiresSMChange() const;
+
+ bool requiresLazySave() const {
+ return Caller.hasZAState() && (Callsite | Callee).hasPrivateZAInterface() &&
+ !Callee.isSMEABIRoutine();
}
- bool requiresEnablingZAAfterCall(const SMEAttrs &Callee) const {
- return requiresLazySave(Callee) || requiresDisablingZABeforeCall(Callee);
+
+ bool requiresPreservingZT0() const {
+ return Caller.hasZT0State() && !Callsite.hasUndefZT0() &&
+ !(Callsite | Callee).sharesZT0() &&
+ !(Callsite | Callee).hasAgnosticZAInterface();
}
- bool requiresPreservingAllZAState(const SMEAttrs &Callee) const {
- return hasAgnosticZAInterface() && !Callee.hasAgnosticZAInterface() &&
- !(Callee.Bitmask & SME_ABI_Routine);
+
+ bool requiresDisablingZABeforeCall() const {
+ return Caller.hasZT0State() && !Caller.hasZAState() &&
+ (Callsite | Callee).hasPrivateZAInterface() &&
+ !Callee.isSMEABIRoutine();
+ }
+
+ bool requiresEnablingZAAfterCall() const {
+ return requiresLazySave() || requiresDisablingZABeforeCall();
+ }
+
+ bool requiresPreservingAllZAState() const {
+ return Caller.hasAgnosticZAInterface() &&
+ !(Callsite | Callee).hasAgnosticZAInterface() &&
+ !Callee.isSMEABIRoutine();
}
};
diff --git a/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp b/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp
index f8c77fcba19cf..f13252f3a4c28 100644
--- a/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp
+++ b/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp
@@ -9,6 +9,7 @@
using namespace llvm;
using SA = SMEAttrs;
+using CA = SMECallAttrs;
std::unique_ptr<Module> parseIR(const char *IR) {
static LLVMContext C;
@@ -70,15 +71,14 @@ TEST(SMEAttributes, Constructors) {
ASSERT_TRUE(SA(*parseIR("declare void @foo() \"aarch64_new_zt0\"")
->getFunction("foo"))
.isNewZT0());
- ASSERT_TRUE(
- SA(cast<CallBase>((parseIR("declare void @callee()\n"
- "define void @foo() {"
- "call void @callee() \"aarch64_zt0_undef\"\n"
- "ret void\n}")
- ->getFunction("foo")
- ->begin()
- ->front())))
- .isUndefZT0());
+
+ auto CallModule = parseIR("declare void @callee()\n"
+ "define void @foo() {"
+ "call void @callee() \"aarch64_zt0_undef\"\n"
+ "ret void\n}");
+ CallBase &Call =
+ cast<CallBase>((CallModule->getFunction("foo")->begin()->front()));
+ ASSERT_TRUE(SMECallAttrs(Call).callsite().hasUndefZT0());
// Invalid combinations.
EXPECT_DEBUG_DEATH(SA(SA::SM_Enabled | SA::SM_Compatible),
@@ -235,7 +235,7 @@ TEST(SMEAttributes, Basics) {
ASSERT_TRUE(ZT0_Undef.hasZT0State());
ASSERT_FALSE(ZT0_Undef.hasSharedZAInterface());
ASSERT_TRUE(ZT0_Undef.hasPrivateZAInterface());
- ASSERT_TRUE(ZT0_Undef.isUndefZT0());
+ ASSERT_TRUE(ZT0_Undef.hasUndefZT0());
ASSERT_FALSE(SA(SA::Normal).isInZT0());
ASSERT_FALSE(SA(SA::Normal).isOutZT0());
@@ -248,59 +248,57 @@ TEST(SMEAttributes, Basics) {
TEST(SMEAttributes, Transitions) {
// Normal -> Normal
- ASSERT_FALSE(SA(SA::Normal).requiresSMChange(SA(SA::Normal)));
- ASSERT_FALSE(SA(SA::Normal).requiresPreservingZT0(SA(SA::Normal)));
- ASSERT_FALSE(SA(SA::Normal).requiresDisablingZABeforeCall(SA(SA::Normal)));
- ASSERT_FALSE(SA(SA::Normal).requiresEnablingZAAfterCall(SA(SA::Normal)));
+ ASSERT_FALSE(CA(SA::Normal, SA::Normal).requiresSMChange());
+ ASSERT_FALSE(CA(SA::Normal, SA::Normal).requiresPreservingZT0());
+ ASSERT_FALSE(CA(SA::Normal, SA::Normal).requiresDisablingZABeforeCall());
+ ASSERT_FALSE(CA(SA::Normal, SA::Normal).requiresEnablingZAAfterCall());
// Normal -> Normal + LocallyStreaming
- ASSERT_FALSE(SA(SA::Normal).requiresSMChange(SA(SA::Normal | SA::SM_Body)));
+ ASSERT_FALSE(CA(SA::Normal, SA::Normal | SA::SM_Body).requiresSMChange());
// Normal -> Streaming
- ASSERT_TRUE(SA(SA::Normal).requiresSMChange(SA(SA::SM_Enabled)));
+ ASSERT_TRUE(CA(SA::Normal, SA::SM_Enabled).requiresSMChange());
// Normal -> Streaming + LocallyStreaming
- ASSERT_TRUE(
- SA(SA::Normal).requiresSMChange(SA(SA::SM_Enabled | SA::SM_Body)));
+ ASSERT_TRUE(CA(SA::Normal, SA::SM_Enabled | SA::SM_Body).requiresSMChange());
// Normal -> Streaming-compatible
- ASSERT_FALSE(SA(SA::Normal).requiresSMChange(SA(SA::SM_Compatible)));
+ ASSERT_FALSE(CA(SA::Normal, SA::SM_Compatible).requiresSMChange());
// Normal -> Streaming-compatible + LocallyStreaming
ASSERT_FALSE(
- SA(SA::Normal).requiresSMChange(SA(SA::SM_Compatible | SA::SM_Body)));
+ CA(SA::Normal, SA::SM_Compatible | SA::SM_Body).requiresSMChange());
// Streaming -> Normal
- ASSERT_TRUE(SA(SA::SM_Enabled).requiresSMChange(SA(SA::Normal)));
+ ASSERT_TRUE(CA(SA::SM_Enabled, SA::Normal).requiresSMChange());
// Streaming -> Normal + LocallyStreaming
- ASSERT_TRUE(
- SA(SA::SM_Enabled).requiresSMChange(SA(SA::Normal | SA::SM_Body)));
+ ASSERT_TRUE(CA(SA::SM_Enabled, SA::Normal | SA::SM_Body).requiresSMChange());
// Streaming -> Streaming
- ASSERT_FALSE(SA(SA::SM_Enabled).requiresSMChange(SA(SA::SM_Enabled)));
+ ASSERT_FALSE(CA(SA::SM_Enabled, SA::SM_Enabled).requiresSMChange());
// Streaming -> Streaming + LocallyStreaming
ASSERT_FALSE(
- SA(SA::SM_Enabled).requiresSMChange(SA(SA::SM_Enabled | SA::SM_Body)));
+ CA(SA::SM_Enabled, SA::SM_Enabled | SA::SM_Body).requiresSMChange());
// Streaming -> Streaming-compatible
- ASSERT_FALSE(SA(SA::SM_Enabled).requiresSMChange(SA(SA::SM_Compatible)));
+ ASSERT_FALSE(CA(SA::SM_Enabled, SA::SM_Compatible).requiresSMChange());
// Streaming -> Streaming-compatible + LocallyStreaming
ASSERT_FALSE(
- SA(SA::SM_Enabled).requiresSMChange(SA(SA::SM_Compatible | SA::SM_Body)));
+ CA(SA::SM_Enabled, SA::SM_Compatible | SA::SM_Body).requiresSMChange());
// Streaming-compatible -> Normal
- ASSERT_TRUE(SA(SA::SM_Compatible).requiresSMChange(SA(SA::Normal)));
+ ASSERT_TRUE(CA(SA::SM_Compatible, SA::Normal).requiresSMChange());
ASSERT_TRUE(
- SA(SA::SM_Compatible).requiresSMChange(SA(SA::Normal | SA::SM_Body)));
+ CA(SA::SM_Compatible, SA::Normal | SA::SM_Body).requiresSMChange());
// Streaming-compatible -> Streaming
- ASSERT_TRUE(SA(SA::SM_Compatible).requiresSMChange(SA(SA::SM_Enabled)));
+ ASSERT_TRUE(CA(SA::SM_Compatible, SA::SM_Enabled).requiresSMChange());
// Streaming-compatible -> Streaming + LocallyStreaming
ASSERT_TRUE(
- SA(SA::SM_Compatible).requiresSMChange(SA(SA::SM_Enabled | SA::SM_Body)));
+ CA(SA::SM_Compatible, SA::SM_Enabled | SA::SM_Body).requiresSMChange());
// Streaming-compatible -> Streaming-compatible
- ASSERT_FALSE(SA(SA::SM_Compatible).requiresSMChange(SA(SA::SM_Compatible)));
+ ASSERT_FALSE(CA(SA::SM_Compatible, SA::SM_Compatible).requiresSMChange());
// Streaming-compatible -> Streaming-compatible + LocallyStreaming
- ASSERT_FALSE(SA(SA::SM_Compatible)
- .requiresSMChange(SA(SA::SM_Compatible | SA::SM_Body)));
+ ASSERT_FALSE(CA(SA::SM_Compatible, SA::SM_Compatible | SA::SM_Body)
+ .requiresSMChange());
SA Private_ZA = SA(SA::Normal);
SA ZA_Shared = SA(SA::encodeZAState(SA::StateValue::In));
@@ -310,37 +308,39 @@ TEST(SMEAttributes, Transitions) {
SA Undef_ZT0 = SA(SA::ZT0_Undef);
// Shared ZA -> Private ZA Interface
- ASSERT_FALSE(ZA_Shared.requiresDisablingZABeforeCall(Private_ZA));
- ASSERT_TRUE(ZA_Shared.requiresEnablingZAAfterCall(Private_ZA));
+ ASSERT_FALSE(CA(ZA_Shared, Private_ZA).requiresDisablingZABeforeCall());
+ ASSERT_TRUE(CA(ZA_Shared, Private_ZA).requiresEnablingZAAfterCall());
// Shared ZT0 -> Private ZA Interface
- ASSERT_TRUE(ZT0_Shared.requiresDisablingZABeforeCall(Private_ZA));
- ASSERT_TRUE(ZT0_Shared.requiresPreservingZT0(Private_ZA));
- ASSERT_TRUE(ZT0_Shared.requiresEnablingZAAfterCall(Private_ZA));
+ ASSERT_TRUE(CA(ZT0_Shared, Private_ZA).requiresDisablingZABeforeCall());
+ ASSERT_TRUE(CA(ZT0_Shared, Private_ZA).requiresPreservingZT0());
+ ASSERT_TRUE(CA(ZT0_Shared, Private_ZA).requiresEnablingZAAfterCall());
// Shared Undef ZT0 -> Private ZA Interface
// Note: "Undef ZT0" is a callsite attribute that means ZT0 is undefined at
// point the of the call.
- ASSERT_TRUE(ZT0_Shared.requiresDisablingZABeforeCall(Undef_ZT0));
- ASSERT_FALSE(ZT0_Shared.requiresPreservingZT0(Undef_ZT0));
- ASSERT_TRUE(ZT0_Shared.requiresEnablingZAAfterCall(Undef_ZT0));
+ ASSERT_TRUE(
+ CA(ZT0_Shared, Private_ZA, Undef_ZT0).requiresDisablingZABeforeCall());
+ ASSERT_FALSE(CA(ZT0_Shared, Private_ZA, Undef_ZT0).requiresPreservingZT0());
+ ASSERT_TRUE(
+ CA(ZT0_Shared, Private_ZA, Undef_ZT0).requiresEnablingZAAfterCall());
// Shared ZA & ZT0 -> Private ZA Interface
- ASSERT_FALSE(ZA_ZT0_Shared.requiresDisablingZABeforeCall(Private_ZA));
- ASSERT_TRUE(ZA_ZT0_Shared.requiresPreservingZT0(Private_ZA));
- ASSERT_TRUE(ZA_ZT0_Shared.requiresEnablingZAAfterCall(Private_ZA));
+ ASSERT_FALSE(CA(ZA_ZT0_Shared, Private_ZA).requiresDisablingZABeforeCall());
+ ASSERT_TRUE(CA(ZA_ZT0_Shared, Private_ZA).requiresPreservingZT0());
+ ASSERT_TRUE(CA(ZA_ZT0_Shared, Private_ZA).requiresEnablingZAAfterCall());
// Shared ZA -> Shared ZA Interface
- ASSERT_FALSE(ZA_Shared.requiresDisablingZABeforeCall(ZT0_Shared));
- ASSERT_FALSE(ZA_Shared.requiresEnablingZAAfterCall(ZT0_Shared));
+ ASSERT_FALSE(CA(ZA_Shared, ZT0_Shared).requiresDisablingZABeforeCall());
+ ASSERT_FALSE(CA(ZA_Shared, ZT0_Shared).requiresEnablingZAAfterCall());
// Shared ZT0 -> Shared ZA Interface
- ASSERT_FALSE(ZT0_Shared.requiresDisablingZABeforeCall(ZT0_Shared));
- ASSERT_FALSE(ZT0_Shared.requiresPreservingZT0(ZT0_Shared));
- ASSERT_FALSE(ZT0_Shared.requiresEnablingZAAfterCall(ZT0_Shared));
+ ASSERT_FALSE(CA(ZT0_Shared, ZT0_Shared).requiresDisablingZABeforeCall());
+ ASSERT_FALSE(CA(ZT0_Shared, ZT0_Shared).requiresPreservingZT0());
+ ASSERT_FALSE(CA(ZT0_Shared, ZT0_Shared).requiresEnablingZAAfterCall());
// Shared ZA & ZT0 -> Shared ZA Interface
- ASSERT_FALSE(ZA_ZT0_Shared.requiresDisablingZABeforeCall(ZT0_Shared));
- ASSERT_FALSE(ZA_ZT0_Shared.requiresPreservingZT0(ZT0_Shared));
- ASSERT_FALSE(ZA_ZT0_Shared.requiresEnablingZAAfterCall(ZT0_Shared));
+ ASSERT_FALSE(CA(ZA_ZT0_Shared, ZT0_Shared).requiresDisablingZABeforeCall());
+ ASSERT_FALSE(CA(ZA_ZT0_Shared, ZT0_Shared).requiresPreservingZT0());
+ ASSERT_FALSE(CA(ZA_ZT0_Shared, ZT0_Shared).requiresEnablingZAAfterCall());
}
>From 5a489c08b166de9bf81667b0bf03d6b6bf15ae25 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 25 Apr 2025 11:34:59 +0000
Subject: [PATCH 2/8] [AArch64][SME] Disallow SME attributes on direct function
calls
This was only used in a handful of tests (mainly to avoid making
multiple function declarations). These tests can easily be updated to
use indirect calls or attributes on declarations.
This allows us to remove checks that looked at both the "callee" and
"callsite" attributes, which makes the API of SMECallAttrs a clearer
and less error-prone (as you can't accidentally use .callee() when you
should have used .calleeOrCallsite()).
Note: This currently still allows non-conflicting attributes on direct
calls (as clang currently duplicates streaming mode attributes at each
callsite).
---
.../Target/AArch64/AArch64ISelLowering.cpp | 16 ++---
.../AArch64/AArch64TargetTransformInfo.cpp | 2 +-
.../AArch64/Utils/AArch64SMEAttributes.cpp | 20 ++++--
.../AArch64/Utils/AArch64SMEAttributes.h | 52 ++++++++------
.../test/CodeGen/AArch64/sme-peephole-opts.ll | 23 ++++---
llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll | 4 +-
llvm/test/CodeGen/AArch64/sme-zt0-state.ll | 68 +++++++++----------
7 files changed, 101 insertions(+), 84 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8c0e758b6bc7a..20dfc16313f04 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8968,9 +8968,9 @@ static unsigned getSMCondition(const SMECallAttrs &CallAttrs) {
if (!CallAttrs.caller().hasStreamingCompatibleInterface() ||
CallAttrs.caller().hasStreamingBody())
return AArch64SME::Always;
- if (CallAttrs.calleeOrCallsite().hasNonStreamingInterface())
+ if (CallAttrs.callee().hasNonStreamingInterface())
return AArch64SME::IfCallerIsStreaming;
- if (CallAttrs.calleeOrCallsite().hasStreamingInterface())
+ if (CallAttrs.callee().hasStreamingInterface())
return AArch64SME::IfCallerIsNonStreaming;
llvm_unreachable("Unsupported attributes");
@@ -9147,7 +9147,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
return DescribeCallsite(R) << " sets up a lazy save for ZA";
});
} else if (RequiresSaveAllZA) {
- assert(!CallAttrs.calleeOrCallsite().hasSharedZAInterface() &&
+ assert(!CallAttrs.callee().hasSharedZAInterface() &&
"Cannot share state that may not exist");
Chain = emitSMEStateSaveRestore(*this, DAG, FuncInfo, DL, Chain,
/*IsSave=*/true);
@@ -9473,9 +9473,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
InGlue = Chain.getValue(1);
}
- SDValue NewChain = changeStreamingMode(
- DAG, DL, CallAttrs.calleeOrCallsite().hasStreamingInterface(), Chain,
- InGlue, getSMCondition(CallAttrs), PStateSM);
+ SDValue NewChain =
+ changeStreamingMode(DAG, DL, CallAttrs.callee().hasStreamingInterface(),
+ Chain, InGlue, getSMCondition(CallAttrs), PStateSM);
Chain = NewChain.getValue(0);
InGlue = NewChain.getValue(1);
}
@@ -9661,8 +9661,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
if (RequiresSMChange) {
assert(PStateSM && "Expected a PStateSM to be set");
Result = changeStreamingMode(
- DAG, DL, !CallAttrs.calleeOrCallsite().hasStreamingInterface(), Result,
- InGlue, getSMCondition(CallAttrs), PStateSM);
+ DAG, DL, !CallAttrs.callee().hasStreamingInterface(), Result, InGlue,
+ getSMCondition(CallAttrs), PStateSM);
if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
InGlue = Result.getValue(1);
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 031d41d6ffdce..97f8569b63529 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -352,7 +352,7 @@ AArch64TTIImpl::getInlineCallPenalty(const Function *F, const CallBase &Call,
SMEAttrs FAttrs(*F);
SMECallAttrs CallAttrs(Call);
- if (SMECallAttrs(FAttrs, CallAttrs.calleeOrCallsite()).requiresSMChange()) {
+ if (SMECallAttrs(FAttrs, CallAttrs.callee()).requiresSMChange()) {
if (F == Call.getCaller()) // (1)
return CallPenaltyChangeSM * DefaultCallPenalty;
if (SMECallAttrs(FAttrs, CallAttrs.caller()).requiresSMChange()) // (2)
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
index 1085d618116eb..16ae5434e596a 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
@@ -94,22 +94,28 @@ void SMEAttrs::addKnownFunctionAttrs(StringRef FuncName) {
}
bool SMECallAttrs::requiresSMChange() const {
- if ((Callsite | Callee).hasStreamingCompatibleInterface())
+ if (callee().hasStreamingCompatibleInterface())
return false;
// Both non-streaming
- if (Caller.hasNonStreamingInterfaceAndBody() &&
- (Callsite | Callee).hasNonStreamingInterface())
+ if (caller().hasNonStreamingInterfaceAndBody() &&
+ callee().hasNonStreamingInterface())
return false;
// Both streaming
- if (Caller.hasStreamingInterfaceOrBody() &&
- (Callsite | Callee).hasStreamingInterface())
+ if (caller().hasStreamingInterfaceOrBody() &&
+ callee().hasStreamingInterface())
return false;
return true;
}
SMECallAttrs::SMECallAttrs(const CallBase &CB)
- : SMECallAttrs(*CB.getFunction(), CB.getCalledFunction(),
- CB.getAttributes()) {}
+ : CallerFn(*CB.getFunction()), CalledFn(CB.getCalledFunction()),
+ Callsite(CB.getAttributes()), IsIndirect(CB.isIndirectCall()) {
+ // FIXME: We probably should not allow SME attributes on direct calls but
+ // clang duplicates streaming mode attributes at each callsite.
+ assert((IsIndirect ||
+ ((Callsite.withoutPerCallsiteFlags() | CalledFn) == CalledFn)) &&
+ "SME attributes at callsite do not match declaration");
+}
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
index 791bb891a18b7..628c55ce3cbaa 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
@@ -44,7 +44,8 @@ class SMEAttrs {
ZA_Shift = 6,
ZA_Mask = 0b111 << ZA_Shift,
ZT0_Shift = 9,
- ZT0_Mask = 0b111 << ZT0_Shift
+ ZT0_Mask = 0b111 << ZT0_Shift,
+ Callsite_Flags = ZT0_Undef
};
SMEAttrs() = default;
@@ -135,6 +136,14 @@ class SMEAttrs {
return Merged;
}
+ SMEAttrs withoutPerCallsiteFlags() const {
+ return (Bitmask & ~Callsite_Flags);
+ }
+
+ bool operator==(SMEAttrs const &Other) const {
+ return Bitmask == Other.Bitmask;
+ }
+
private:
void addKnownFunctionAttrs(StringRef FuncName);
};
@@ -143,44 +152,48 @@ class SMEAttrs {
/// interfaces to query whether a streaming mode change or lazy-save mechanism
/// is required when going from one function to another (e.g. through a call).
class SMECallAttrs {
- SMEAttrs Caller;
- SMEAttrs Callee;
+ SMEAttrs CallerFn;
+ SMEAttrs CalledFn;
SMEAttrs Callsite;
+ bool IsIndirect = false;
public:
SMECallAttrs(SMEAttrs Caller, SMEAttrs Callee,
SMEAttrs Callsite = SMEAttrs::Normal)
- : Caller(Caller), Callee(Callee), Callsite(Callsite) {}
+ : CallerFn(Caller), CalledFn(Callee), Callsite(Callsite) {}
SMECallAttrs(const CallBase &CB);
- SMEAttrs &caller() { return Caller; }
- SMEAttrs &callee() { return Callee; }
+ SMEAttrs &caller() { return CallerFn; }
+ SMEAttrs &callee() {
+ if (IsIndirect)
+ return Callsite;
+ return CalledFn;
+ }
SMEAttrs &callsite() { return Callsite; }
- SMEAttrs const &caller() const { return Caller; }
- SMEAttrs const &callee() const { return Callee; }
+ SMEAttrs const &caller() const { return CallerFn; }
+ SMEAttrs const &callee() const {
+ return const_cast<SMECallAttrs *>(this)->callee();
+ }
SMEAttrs const &callsite() const { return Callsite; }
- SMEAttrs calleeOrCallsite() const { return Callsite | Callee; }
/// \return true if a call from Caller -> Callee requires a change in
/// streaming mode.
bool requiresSMChange() const;
bool requiresLazySave() const {
- return Caller.hasZAState() && (Callsite | Callee).hasPrivateZAInterface() &&
- !Callee.isSMEABIRoutine();
+ return caller().hasZAState() && callee().hasPrivateZAInterface() &&
+ !callee().isSMEABIRoutine();
}
bool requiresPreservingZT0() const {
- return Caller.hasZT0State() && !Callsite.hasUndefZT0() &&
- !(Callsite | Callee).sharesZT0() &&
- !(Callsite | Callee).hasAgnosticZAInterface();
+ return caller().hasZT0State() && !callsite().hasUndefZT0() &&
+ !callee().sharesZT0() && !callee().hasAgnosticZAInterface();
}
bool requiresDisablingZABeforeCall() const {
- return Caller.hasZT0State() && !Caller.hasZAState() &&
- (Callsite | Callee).hasPrivateZAInterface() &&
- !Callee.isSMEABIRoutine();
+ return caller().hasZT0State() && !caller().hasZAState() &&
+ callee().hasPrivateZAInterface() && !callee().isSMEABIRoutine();
}
bool requiresEnablingZAAfterCall() const {
@@ -188,9 +201,8 @@ class SMECallAttrs {
}
bool requiresPreservingAllZAState() const {
- return Caller.hasAgnosticZAInterface() &&
- !(Callsite | Callee).hasAgnosticZAInterface() &&
- !Callee.isSMEABIRoutine();
+ return caller().hasAgnosticZAInterface() &&
+ !callee().hasAgnosticZAInterface() && !callee().isSMEABIRoutine();
}
};
diff --git a/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll b/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll
index 6ea2267cd22e6..130a316bcc2ba 100644
--- a/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll
+++ b/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll
@@ -2,11 +2,12 @@
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve,+sme2 < %s | FileCheck %s
declare void @callee()
+declare void @callee_sm() "aarch64_pstate_sm_enabled"
declare void @callee_farg(float)
declare float @callee_farg_fret(float)
; normal caller -> streaming callees
-define void @test0() nounwind {
+define void @test0(ptr %callee) nounwind {
; CHECK-LABEL: test0:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
@@ -16,8 +17,8 @@ define void @test0() nounwind {
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: bl callee
-; CHECK-NEXT: bl callee
+; CHECK-NEXT: bl callee_sm
+; CHECK-NEXT: bl callee_sm
; CHECK-NEXT: smstop sm
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
@@ -25,8 +26,8 @@ define void @test0() nounwind {
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
; CHECK-NEXT: ret
- call void @callee() "aarch64_pstate_sm_enabled"
- call void @callee() "aarch64_pstate_sm_enabled"
+ call void @callee_sm()
+ call void @callee_sm()
ret void
}
@@ -118,7 +119,7 @@ define void @test3() nounwind "aarch64_pstate_sm_compatible" {
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB3_2:
-; CHECK-NEXT: bl callee
+; CHECK-NEXT: bl callee_sm
; CHECK-NEXT: tbnz w19, #0, .LBB3_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstop sm
@@ -140,7 +141,7 @@ define void @test3() nounwind "aarch64_pstate_sm_compatible" {
; CHECK-NEXT: // %bb.9:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB3_10:
-; CHECK-NEXT: bl callee
+; CHECK-NEXT: bl callee_sm
; CHECK-NEXT: tbnz w19, #0, .LBB3_12
; CHECK-NEXT: // %bb.11:
; CHECK-NEXT: smstop sm
@@ -152,9 +153,9 @@ define void @test3() nounwind "aarch64_pstate_sm_compatible" {
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
; CHECK-NEXT: ret
- call void @callee() "aarch64_pstate_sm_enabled"
+ call void @callee_sm()
call void @callee()
- call void @callee() "aarch64_pstate_sm_enabled"
+ call void @callee_sm()
ret void
}
@@ -342,7 +343,7 @@ define void @test10() "aarch64_pstate_sm_body" {
; CHECK-NEXT: bl callee
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .cfi_restore vg
-; CHECK-NEXT: bl callee
+; CHECK-NEXT: bl callee_sm
; CHECK-NEXT: .cfi_offset vg, -24
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl callee
@@ -363,7 +364,7 @@ define void @test10() "aarch64_pstate_sm_body" {
; CHECK-NEXT: .cfi_restore b15
; CHECK-NEXT: ret
call void @callee()
- call void @callee() "aarch64_pstate_sm_enabled"
+ call void @callee_sm()
call void @callee()
ret void
}
diff --git a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
index 17d689d2c9eb5..0853325e449af 100644
--- a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
+++ b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
@@ -1098,11 +1098,11 @@ define void @test_rdsvl_right_after_prologue(i64 %x0) nounwind {
; NO-SVE-CHECK-NEXT: ret
%some_alloc = alloca i64, align 8
%rdsvl = tail call i64 @llvm.aarch64.sme.cntsd()
- call void @bar(i64 %rdsvl, i64 %x0) "aarch64_pstate_sm_enabled"
+ call void @bar(i64 %rdsvl, i64 %x0)
ret void
}
-declare void @bar(i64, i64)
+declare void @bar(i64, i64) "aarch64_pstate_sm_enabled"
; Ensure we still emit async unwind information with -fno-asynchronous-unwind-tables
; if the function contains a streaming-mode change.
diff --git a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
index 7361e850d713e..63577e4d217a8 100644
--- a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
+++ b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
@@ -1,15 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s
-declare void @callee();
-
;
; Private-ZA Callee
;
; Expect spill & fill of ZT0 around call
; Expect smstop/smstart za around call
-define void @zt0_in_caller_no_state_callee() "aarch64_in_zt0" nounwind {
+define void @zt0_in_caller_no_state_callee(ptr %callee) "aarch64_in_zt0" nounwind {
; CHECK-LABEL: zt0_in_caller_no_state_callee:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #80
@@ -17,20 +15,20 @@ define void @zt0_in_caller_no_state_callee() "aarch64_in_zt0" nounwind {
; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: str zt0, [x19]
; CHECK-NEXT: smstop za
-; CHECK-NEXT: bl callee
+; CHECK-NEXT: blr x0
; CHECK-NEXT: smstart za
; CHECK-NEXT: ldr zt0, [x19]
; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #80
; CHECK-NEXT: ret
- call void @callee();
+ call void %callee();
ret void;
}
; Expect spill & fill of ZT0 around call
; Expect setup and restore lazy-save around call
; Expect smstart za after call
-define void @za_zt0_shared_caller_no_state_callee() "aarch64_inout_za" "aarch64_in_zt0" nounwind {
+define void @za_zt0_shared_caller_no_state_callee(ptr %callee) "aarch64_inout_za" "aarch64_in_zt0" nounwind {
; CHECK-LABEL: za_zt0_shared_caller_no_state_callee:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
@@ -49,7 +47,7 @@ define void @za_zt0_shared_caller_no_state_callee() "aarch64_inout_za" "aarch64_
; CHECK-NEXT: sturh w8, [x29, #-8]
; CHECK-NEXT: msr TPIDR2_EL0, x9
; CHECK-NEXT: str zt0, [x19]
-; CHECK-NEXT: bl callee
+; CHECK-NEXT: blr x0
; CHECK-NEXT: smstart za
; CHECK-NEXT: ldr zt0, [x19]
; CHECK-NEXT: mrs x8, TPIDR2_EL0
@@ -63,7 +61,7 @@ define void @za_zt0_shared_caller_no_state_callee() "aarch64_inout_za" "aarch64_
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
- call void @callee();
+ call void %callee();
ret void;
}
@@ -72,43 +70,43 @@ define void @za_zt0_shared_caller_no_state_callee() "aarch64_inout_za" "aarch64_
;
; Caller and callee have shared ZT0 state, no spill/fill of ZT0 required
-define void @zt0_shared_caller_zt0_shared_callee() "aarch64_in_zt0" nounwind {
+define void @zt0_shared_caller_zt0_shared_callee(ptr %callee) "aarch64_in_zt0" nounwind {
; CHECK-LABEL: zt0_shared_caller_zt0_shared_callee:
; CHECK: // %bb.0:
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: bl callee
+; CHECK-NEXT: blr x0
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
- call void @callee() "aarch64_in_zt0";
+ call void %callee() "aarch64_in_zt0";
ret void;
}
; Expect spill & fill of ZT0 around call
-define void @za_zt0_shared_caller_za_shared_callee() "aarch64_inout_za" "aarch64_in_zt0" nounwind {
+define void @za_zt0_shared_caller_za_shared_callee(ptr %callee) "aarch64_inout_za" "aarch64_in_zt0" nounwind {
; CHECK-LABEL: za_zt0_shared_caller_za_shared_callee:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #80
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: str zt0, [x19]
-; CHECK-NEXT: bl callee
+; CHECK-NEXT: blr x0
; CHECK-NEXT: ldr zt0, [x19]
; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #80
; CHECK-NEXT: ret
- call void @callee() "aarch64_inout_za";
+ call void %callee() "aarch64_inout_za";
ret void;
}
; Caller and callee have shared ZA & ZT0
-define void @za_zt0_shared_caller_za_zt0_shared_callee() "aarch64_inout_za" "aarch64_in_zt0" nounwind {
+define void @za_zt0_shared_caller_za_zt0_shared_callee(ptr %callee) "aarch64_inout_za" "aarch64_in_zt0" nounwind {
; CHECK-LABEL: za_zt0_shared_caller_za_zt0_shared_callee:
; CHECK: // %bb.0:
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: bl callee
+; CHECK-NEXT: blr x0
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
- call void @callee() "aarch64_inout_za" "aarch64_in_zt0";
+ call void %callee() "aarch64_inout_za" "aarch64_in_zt0";
ret void;
}
@@ -116,7 +114,7 @@ define void @za_zt0_shared_caller_za_zt0_shared_callee() "aarch64_inout_za" "aar
; Expect spill & fill of ZT0 around call
; Expect smstop/smstart za around call
-define void @zt0_in_caller_zt0_new_callee() "aarch64_in_zt0" nounwind {
+define void @zt0_in_caller_zt0_new_callee(ptr %callee) "aarch64_in_zt0" nounwind {
; CHECK-LABEL: zt0_in_caller_zt0_new_callee:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #80
@@ -124,13 +122,13 @@ define void @zt0_in_caller_zt0_new_callee() "aarch64_in_zt0" nounwind {
; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: str zt0, [x19]
; CHECK-NEXT: smstop za
-; CHECK-NEXT: bl callee
+; CHECK-NEXT: blr x0
; CHECK-NEXT: smstart za
; CHECK-NEXT: ldr zt0, [x19]
; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #80
; CHECK-NEXT: ret
- call void @callee() "aarch64_new_zt0";
+ call void %callee() "aarch64_new_zt0";
ret void;
}
@@ -140,7 +138,7 @@ define void @zt0_in_caller_zt0_new_callee() "aarch64_in_zt0" nounwind {
; Expect smstart ZA & clear ZT0
; Expect spill & fill of ZT0 around call
; Before return, expect smstop ZA
-define void @zt0_new_caller_zt0_new_callee() "aarch64_new_zt0" nounwind {
+define void @zt0_new_caller_zt0_new_callee(ptr %callee) "aarch64_new_zt0" nounwind {
; CHECK-LABEL: zt0_new_caller_zt0_new_callee:
; CHECK: // %bb.0: // %prelude
; CHECK-NEXT: sub sp, sp, #80
@@ -156,14 +154,14 @@ define void @zt0_new_caller_zt0_new_callee() "aarch64_new_zt0" nounwind {
; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: str zt0, [x19]
; CHECK-NEXT: smstop za
-; CHECK-NEXT: bl callee
+; CHECK-NEXT: blr x0
; CHECK-NEXT: smstart za
; CHECK-NEXT: ldr zt0, [x19]
; CHECK-NEXT: smstop za
; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #80
; CHECK-NEXT: ret
- call void @callee() "aarch64_new_zt0";
+ call void %callee() "aarch64_new_zt0";
ret void;
}
@@ -207,7 +205,7 @@ declare {i64, i64} @__arm_sme_state()
; Expect commit of lazy-save if ZA is dormant
; Expect smstart ZA & clear ZT0
; Before return, expect smstop ZA
-define void @zt0_new_caller() "aarch64_new_zt0" nounwind {
+define void @zt0_new_caller(ptr %callee) "aarch64_new_zt0" nounwind {
; CHECK-LABEL: zt0_new_caller:
; CHECK: // %bb.0: // %prelude
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -219,18 +217,18 @@ define void @zt0_new_caller() "aarch64_new_zt0" nounwind {
; CHECK-NEXT: .LBB8_2:
; CHECK-NEXT: smstart za
; CHECK-NEXT: zero { zt0 }
-; CHECK-NEXT: bl callee
+; CHECK-NEXT: blr x0
; CHECK-NEXT: smstop za
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
- call void @callee() "aarch64_in_zt0";
+ call void %callee() "aarch64_in_zt0";
ret void;
}
; Expect commit of lazy-save if ZA is dormant
; Expect smstart ZA, clear ZA & clear ZT0
; Before return, expect smstop ZA
-define void @new_za_zt0_caller() "aarch64_new_za" "aarch64_new_zt0" nounwind {
+define void @new_za_zt0_caller(ptr %callee) "aarch64_new_za" "aarch64_new_zt0" nounwind {
; CHECK-LABEL: new_za_zt0_caller:
; CHECK: // %bb.0: // %prelude
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -243,36 +241,36 @@ define void @new_za_zt0_caller() "aarch64_new_za" "aarch64_new_zt0" nounwind {
; CHECK-NEXT: smstart za
; CHECK-NEXT: zero {za}
; CHECK-NEXT: zero { zt0 }
-; CHECK-NEXT: bl callee
+; CHECK-NEXT: blr x0
; CHECK-NEXT: smstop za
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
- call void @callee() "aarch64_inout_za" "aarch64_in_zt0";
+ call void %callee() "aarch64_inout_za" "aarch64_in_zt0";
ret void;
}
; Expect clear ZA on entry
-define void @new_za_shared_zt0_caller() "aarch64_new_za" "aarch64_in_zt0" nounwind {
+define void @new_za_shared_zt0_caller(ptr %callee) "aarch64_new_za" "aarch64_in_zt0" nounwind {
; CHECK-LABEL: new_za_shared_zt0_caller:
; CHECK: // %bb.0:
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: zero {za}
-; CHECK-NEXT: bl callee
+; CHECK-NEXT: blr x0
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
- call void @callee() "aarch64_inout_za" "aarch64_in_zt0";
+ call void %callee() "aarch64_inout_za" "aarch64_in_zt0";
ret void;
}
; Expect clear ZT0 on entry
-define void @shared_za_new_zt0() "aarch64_inout_za" "aarch64_new_zt0" nounwind {
+define void @shared_za_new_zt0(ptr %callee) "aarch64_inout_za" "aarch64_new_zt0" nounwind {
; CHECK-LABEL: shared_za_new_zt0:
; CHECK: // %bb.0:
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: zero { zt0 }
-; CHECK-NEXT: bl callee
+; CHECK-NEXT: blr x0
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
- call void @callee() "aarch64_inout_za" "aarch64_in_zt0";
+ call void %callee() "aarch64_inout_za" "aarch64_in_zt0";
ret void;
}
>From 109d321416b6dc063ff8532fdf09bc1478c88e0a Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 25 Apr 2025 14:24:21 +0000
Subject: [PATCH 3/8] Use ternary
---
llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
index 628c55ce3cbaa..c4f132ba6ddf1 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
@@ -165,11 +165,7 @@ class SMECallAttrs {
SMECallAttrs(const CallBase &CB);
SMEAttrs &caller() { return CallerFn; }
- SMEAttrs &callee() {
- if (IsIndirect)
- return Callsite;
- return CalledFn;
- }
+ SMEAttrs &callee() { return IsIndirect ? Callsite : CalledFn; }
SMEAttrs &callsite() { return Callsite; }
SMEAttrs const &caller() const { return CallerFn; }
SMEAttrs const &callee() const {
>From 7b931f5c771f9a0e486fbf300b70bf3142c10c16 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 6 May 2025 10:17:50 +0000
Subject: [PATCH 4/8] Only infer attributes for callees (not definitions)
This matches the old behaviour and allows stubbing the ABI routines
in C++.
---
llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp | 7 ++++++-
llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h | 10 ++++------
2 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
index 16ae5434e596a..23de691fafef2 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
@@ -111,8 +111,13 @@ bool SMECallAttrs::requiresSMChange() const {
}
SMECallAttrs::SMECallAttrs(const CallBase &CB)
- : CallerFn(*CB.getFunction()), CalledFn(CB.getCalledFunction()),
+ : CallerFn(*CB.getFunction()), CalledFn(SMEAttrs::Normal),
Callsite(CB.getAttributes()), IsIndirect(CB.isIndirectCall()) {
+ if (auto *CalledFunction = CB.getCalledFunction()) {
+ CalledFn = SMEAttrs(*CalledFunction);
+ CalledFn.addKnownFunctionAttrs(CalledFunction->getName());
+ }
+
// FIXME: We probably should not allow SME attributes on direct calls but
// clang duplicates streaming mode attributes at each callsite.
assert((IsIndirect ||
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
index c4f132ba6ddf1..369caaef47eca 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
@@ -16,6 +16,7 @@ namespace llvm {
class Function;
class CallBase;
class AttributeList;
+class SMECallAttrs;
/// SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
/// It helps determine a function's requirements for PSTATE.ZA and PSTATE.SM.
@@ -50,12 +51,7 @@ class SMEAttrs {
SMEAttrs() = default;
SMEAttrs(unsigned Mask) { set(Mask); }
- SMEAttrs(const Function *F)
- : SMEAttrs(F ? F->getAttributes() : AttributeList()) {
- if (F)
- addKnownFunctionAttrs(F->getName());
- }
- SMEAttrs(const Function &F) : SMEAttrs(&F) {}
+ SMEAttrs(const Function &F) : SMEAttrs(F.getAttributes()) {}
SMEAttrs(const AttributeList &L);
SMEAttrs(StringRef FuncName) { addKnownFunctionAttrs(FuncName); };
@@ -144,6 +140,8 @@ class SMEAttrs {
return Bitmask == Other.Bitmask;
}
+ friend SMECallAttrs;
+
private:
void addKnownFunctionAttrs(StringRef FuncName);
};
>From 57efde040b2a331bd98e202a4b8e4070915fd3e8 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 6 May 2025 11:06:47 +0000
Subject: [PATCH 5/8] Add test
---
.../test/CodeGen/AArch64/aarch64-sme-stubs.ll | 47 +++++++++++++++++++
1 file changed, 47 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/aarch64-sme-stubs.ll
diff --git a/llvm/test/CodeGen/AArch64/aarch64-sme-stubs.ll b/llvm/test/CodeGen/AArch64/aarch64-sme-stubs.ll
new file mode 100644
index 0000000000000..ed3690b4f9d64
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-sme-stubs.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple aarch64-none-linux-gnu | FileCheck %s
+
+; Checks SME ABI routines can be implemented as stubs without +sme.
+
+define i1 @__aarch64_sme_accessible() {
+; CHECK-LABEL: __aarch64_sme_accessible:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w0, #1 // =0x1
+; CHECK-NEXT: ret
+entry:
+ ret i1 true
+}
+
+define [2 x i64] @__arm_sme_state() {
+; CHECK-LABEL: __arm_sme_state:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov x0, xzr
+; CHECK-NEXT: mov x1, xzr
+; CHECK-NEXT: ret
+entry:
+ ret [2 x i64] zeroinitializer
+}
+
+define void @__arm_tpidr2_restore() {
+; CHECK-LABEL: __arm_tpidr2_restore:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ret
+entry:
+ ret void
+}
+
+define void @__arm_tpidr2_save() {
+; CHECK-LABEL: __arm_tpidr2_save:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ret
+entry:
+ ret void
+}
+
+define void @__arm_za_disable() {
+; CHECK-LABEL: __arm_za_disable:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ret
+gientry:
+ ret void
+}
>From 94e52293ff62a6c1eb92e7e370390a5ce76987ee Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 6 May 2025 11:10:40 +0000
Subject: [PATCH 6/8] Tidy
---
.../lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp | 6 ++----
llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h | 11 +++++++----
2 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
index 23de691fafef2..a67cc25e2c9a5 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
@@ -113,10 +113,8 @@ bool SMECallAttrs::requiresSMChange() const {
SMECallAttrs::SMECallAttrs(const CallBase &CB)
: CallerFn(*CB.getFunction()), CalledFn(SMEAttrs::Normal),
Callsite(CB.getAttributes()), IsIndirect(CB.isIndirectCall()) {
- if (auto *CalledFunction = CB.getCalledFunction()) {
- CalledFn = SMEAttrs(*CalledFunction);
- CalledFn.addKnownFunctionAttrs(CalledFunction->getName());
- }
+ if (auto *CalledFunction = CB.getCalledFunction())
+ CalledFn = SMEAttrs(*CalledFunction, SMEAttrs::InferAttrsFromName::Yes);
// FIXME: We probably should not allow SME attributes on direct calls but
// clang duplicates streaming mode attributes at each callsite.
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
index 369caaef47eca..e611a744f0589 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
@@ -16,7 +16,6 @@ namespace llvm {
class Function;
class CallBase;
class AttributeList;
-class SMECallAttrs;
/// SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
/// It helps determine a function's requirements for PSTATE.ZA and PSTATE.SM.
@@ -49,9 +48,15 @@ class SMEAttrs {
Callsite_Flags = ZT0_Undef
};
+ enum class InferAttrsFromName { No, Yes };
+
SMEAttrs() = default;
SMEAttrs(unsigned Mask) { set(Mask); }
- SMEAttrs(const Function &F) : SMEAttrs(F.getAttributes()) {}
+ SMEAttrs(const Function &F, InferAttrsFromName Infer = InferAttrsFromName::No)
+ : SMEAttrs(F.getAttributes()) {
+ if (Infer == InferAttrsFromName::Yes)
+ addKnownFunctionAttrs(F.getName());
+ }
SMEAttrs(const AttributeList &L);
SMEAttrs(StringRef FuncName) { addKnownFunctionAttrs(FuncName); };
@@ -140,8 +145,6 @@ class SMEAttrs {
return Bitmask == Other.Bitmask;
}
- friend SMECallAttrs;
-
private:
void addKnownFunctionAttrs(StringRef FuncName);
};
>From 2427417e55a208bc3b8adb758cea4743794bac59 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 6 May 2025 11:14:29 +0000
Subject: [PATCH 7/8] Fix typo
---
llvm/test/CodeGen/AArch64/aarch64-sme-stubs.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-sme-stubs.ll b/llvm/test/CodeGen/AArch64/aarch64-sme-stubs.ll
index ed3690b4f9d64..f7182e2a166a5 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-sme-stubs.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-sme-stubs.ll
@@ -42,6 +42,6 @@ define void @__arm_za_disable() {
; CHECK-LABEL: __arm_za_disable:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ret
-gientry:
+entry:
ret void
}
>From 9ddaf096b9696cf75eaffa9fee55582cc96a415d Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Wed, 14 May 2025 10:49:02 +0000
Subject: [PATCH 8/8] Fixups
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 17 +++++++++--------
.../AArch64/Utils/AArch64SMEAttributes.cpp | 2 +-
.../Target/AArch64/Utils/AArch64SMEAttributes.h | 6 +++---
3 files changed, 13 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 20dfc16313f04..d60f02be63495 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8642,13 +8642,13 @@ static void analyzeCallOperands(const AArch64TargetLowering &TLI,
}
static SMECallAttrs
-getSMECallAttrs(const Function &Function,
+getSMECallAttrs(const Function &Caller,
const TargetLowering::CallLoweringInfo &CLI) {
if (CLI.CB)
return SMECallAttrs(*CLI.CB);
if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
- return SMECallAttrs(SMEAttrs(Function), SMEAttrs(ES->getSymbol()));
- return SMECallAttrs(SMEAttrs(Function), SMEAttrs(SMEAttrs::Normal));
+ return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(ES->getSymbol()));
+ return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(SMEAttrs::Normal));
}
bool AArch64TargetLowering::isEligibleForTailCallOptimization(
@@ -8964,7 +8964,8 @@ static SDValue emitSMEStateSaveRestore(const AArch64TargetLowering &TLI,
return TLI.LowerCallTo(CLI).second;
}
-static unsigned getSMCondition(const SMECallAttrs &CallAttrs) {
+static AArch64SME::ToggleCondition
+getSMToggleCondition(const SMECallAttrs &CallAttrs) {
if (!CallAttrs.caller().hasStreamingCompatibleInterface() ||
CallAttrs.caller().hasStreamingBody())
return AArch64SME::Always;
@@ -9473,9 +9474,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
InGlue = Chain.getValue(1);
}
- SDValue NewChain =
- changeStreamingMode(DAG, DL, CallAttrs.callee().hasStreamingInterface(),
- Chain, InGlue, getSMCondition(CallAttrs), PStateSM);
+ SDValue NewChain = changeStreamingMode(
+ DAG, DL, CallAttrs.callee().hasStreamingInterface(), Chain, InGlue,
+ getSMToggleCondition(CallAttrs), PStateSM);
Chain = NewChain.getValue(0);
InGlue = NewChain.getValue(1);
}
@@ -9662,7 +9663,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
assert(PStateSM && "Expected a PStateSM to be set");
Result = changeStreamingMode(
DAG, DL, !CallAttrs.callee().hasStreamingInterface(), Result, InGlue,
- getSMCondition(CallAttrs), PStateSM);
+ getSMToggleCondition(CallAttrs), PStateSM);
if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
InGlue = Result.getValue(1);
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
index a67cc25e2c9a5..271094f935e0e 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
@@ -90,7 +90,7 @@ void SMEAttrs::addKnownFunctionAttrs(StringRef FuncName) {
if (FuncName == "__arm_sme_save" || FuncName == "__arm_sme_restore" ||
FuncName == "__arm_sme_state_size")
KnownAttrs |= SMEAttrs::SM_Compatible | SMEAttrs::SME_ABI_Routine;
- set(KnownAttrs, /*Enable=*/true);
+ set(KnownAttrs);
}
bool SMECallAttrs::requiresSMChange() const {
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
index e611a744f0589..f1be0ecbee7ed 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
@@ -45,7 +45,7 @@ class SMEAttrs {
ZA_Mask = 0b111 << ZA_Shift,
ZT0_Shift = 9,
ZT0_Mask = 0b111 << ZT0_Shift,
- Callsite_Flags = ZT0_Undef
+ CallSiteFlags_Mask = ZT0_Undef
};
enum class InferAttrsFromName { No, Yes };
@@ -133,12 +133,12 @@ class SMEAttrs {
SMEAttrs operator|(SMEAttrs Other) const {
SMEAttrs Merged(*this);
- Merged.set(Other.Bitmask, /*Enable=*/true);
+ Merged.set(Other.Bitmask);
return Merged;
}
SMEAttrs withoutPerCallsiteFlags() const {
- return (Bitmask & ~Callsite_Flags);
+ return (Bitmask & ~CallSiteFlags_Mask);
}
bool operator==(SMEAttrs const &Other) const {
More information about the llvm-commits
mailing list