[llvm] 831527a - [FMV][GlobalOpt] Statically resolve calls to versioned functions. (#87939)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 17 02:49:47 PST 2025
Author: Alexandros Lamprineas
Date: 2025-01-17T10:49:43Z
New Revision: 831527a5ef63d24d056afc92509caf5ceb1d3682
URL: https://github.com/llvm/llvm-project/commit/831527a5ef63d24d056afc92509caf5ceb1d3682
DIFF: https://github.com/llvm/llvm-project/commit/831527a5ef63d24d056afc92509caf5ceb1d3682.diff
LOG: [FMV][GlobalOpt] Statically resolve calls to versioned functions. (#87939)
To deduce whether the optimization is legal we need to compare the target
features between caller and callee versions. The criteria for bypassing
the resolver are the following:
* If the callee's feature set is a subset of the caller's feature set,
then the callee is a candidate for direct call.
* Among such candidates the one of highest priority is the best match
and it shall be picked, unless there is a version of the callee with
higher priority than the best match which cannot be picked from a
higher priority caller (directly or through the resolver).
* For every higher priority callee version than the best match, there
is a higher priority caller version whose feature set availability
is implied by the callee's feature set.
Example:
Callers and Callees are ordered in decreasing priority.
The arrows indicate successful call redirections.
Caller Callee Explanation
=========================================================================
mops+sve2 --+--> mops all the callee versions are subsets of the
| caller but mops has the highest priority
|
mops --+ sve2 between mops and default callees, mops wins
sve sve between sve and default callees, sve wins
but sve2 does not have a high priority caller
default -----> default sve (callee) implies sve (caller),
sve2(callee) implies sve (caller),
mops(callee) implies mops(caller)
Added:
llvm/test/Transforms/GlobalOpt/resolve-fmv-ifunc.ll
Modified:
llvm/include/llvm/Analysis/TargetTransformInfo.h
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/include/llvm/TargetParser/AArch64TargetParser.h
llvm/lib/Analysis/TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
llvm/lib/TargetParser/AArch64TargetParser.cpp
llvm/lib/Transforms/IPO/GlobalOpt.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index fe13fc676e3031..71b204f9c3fec7 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1870,6 +1870,13 @@ class TargetTransformInfo {
/// false, but it shouldn't matter what it returns anyway.
bool hasArmWideBranch(bool Thumb) const;
+ /// Returns a bitmask constructed from the target-features or fmv-features
+ /// metadata of a function.
+ uint64_t getFeatureMask(const Function &F) const;
+
+ /// Returns true if this is an instance of a function with multiple versions.
+ bool isMultiversionedFunction(const Function &F) const;
+
/// \return The maximum number of function arguments the target supports.
unsigned getMaxNumArgs() const;
@@ -2312,6 +2319,8 @@ class TargetTransformInfo::Concept {
virtual VPLegalization
getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
virtual bool hasArmWideBranch(bool Thumb) const = 0;
+ virtual uint64_t getFeatureMask(const Function &F) const = 0;
+ virtual bool isMultiversionedFunction(const Function &F) const = 0;
virtual unsigned getMaxNumArgs() const = 0;
virtual unsigned getNumBytesToPadGlobalArray(unsigned Size,
Type *ArrayType) const = 0;
@@ -3144,6 +3153,14 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
return Impl.hasArmWideBranch(Thumb);
}
+ uint64_t getFeatureMask(const Function &F) const override {
+ return Impl.getFeatureMask(F);
+ }
+
+ bool isMultiversionedFunction(const Function &F) const override {
+ return Impl.isMultiversionedFunction(F);
+ }
+
unsigned getMaxNumArgs() const override {
return Impl.getMaxNumArgs();
}
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 7ac3063ca9a37f..dcef4a1abcfa3d 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1039,6 +1039,10 @@ class TargetTransformInfoImplBase {
bool hasArmWideBranch(bool) const { return false; }
+ uint64_t getFeatureMask(const Function &F) const { return 0; }
+
+ bool isMultiversionedFunction(const Function &F) const { return false; }
+
unsigned getMaxNumArgs() const { return UINT_MAX; }
unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const {
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index 63f06a3a692982..0338770593bc4b 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -270,13 +270,16 @@ void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values);
bool isX18ReservedByDefault(const Triple &TT);
-// Return the priority for a given set of FMV features.
+// For a given set of feature names, which can be either target-features, or
+// fmv-features metadata, expand their dependencies and then return a bitmask
+// corresponding to the entries of AArch64::FeatPriorities.
uint64_t getFMVPriority(ArrayRef<StringRef> Features);
-// For given feature names, return a bitmask corresponding to the entries of
-// AArch64::CPUFeatures. The values in CPUFeatures are not bitmasks themselves,
-// they are sequential (0, 1, 2, 3, ...). The resulting bitmask is used at
-// runtime to test whether a certain FMV feature is available on the host.
+// For a given set of FMV feature names, expand their dependencies and then
+// return a bitmask corresponding to the entries of AArch64::CPUFeatures.
+// The values in CPUFeatures are not bitmasks themselves, they are sequential
+// (0, 1, 2, 3, ...). The resulting bitmask is used at runtime to test whether
+// a certain FMV feature is available on the host.
uint64_t getCpuSupportsMask(ArrayRef<StringRef> Features);
void PrintSupportedExtensions();
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index df42dc2746dafc..8b9722d047edc7 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1383,6 +1383,14 @@ bool TargetTransformInfo::hasArmWideBranch(bool Thumb) const {
return TTIImpl->hasArmWideBranch(Thumb);
}
+uint64_t TargetTransformInfo::getFeatureMask(const Function &F) const {
+ return TTIImpl->getFeatureMask(F);
+}
+
+bool TargetTransformInfo::isMultiversionedFunction(const Function &F) const {
+ return TTIImpl->isMultiversionedFunction(F);
+}
+
unsigned TargetTransformInfo::getMaxNumArgs() const {
return TTIImpl->getMaxNumArgs();
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 932a6f9ce23fd2..7f10bfed739b41 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
+#include "llvm/TargetParser/AArch64TargetParser.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include <algorithm>
@@ -248,6 +249,19 @@ static bool hasPossibleIncompatibleOps(const Function *F) {
return false;
}
+uint64_t AArch64TTIImpl::getFeatureMask(const Function &F) const {
+ StringRef AttributeStr =
+ isMultiversionedFunction(F) ? "fmv-features" : "target-features";
+ StringRef FeatureStr = F.getFnAttribute(AttributeStr).getValueAsString();
+ SmallVector<StringRef, 8> Features;
+ FeatureStr.split(Features, ",");
+ return AArch64::getFMVPriority(Features);
+}
+
+bool AArch64TTIImpl::isMultiversionedFunction(const Function &F) const {
+ return F.hasFnAttribute("fmv-features");
+}
+
bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
SMEAttrs CallerAttrs(*Caller), CalleeAttrs(*Callee);
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 8e7e590c173ff2..1eb805ae00b1bb 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -89,6 +89,10 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
unsigned DefaultCallPenalty) const;
+ uint64_t getFeatureMask(const Function &F) const;
+
+ bool isMultiversionedFunction(const Function &F) const;
+
/// \name Scalar TTI Implementations
/// @{
diff --git a/llvm/lib/TargetParser/AArch64TargetParser.cpp b/llvm/lib/TargetParser/AArch64TargetParser.cpp
index 34ca03a47e0a4a..e13c6e6d28c2bb 100644
--- a/llvm/lib/TargetParser/AArch64TargetParser.cpp
+++ b/llvm/lib/TargetParser/AArch64TargetParser.cpp
@@ -48,12 +48,33 @@ std::optional<AArch64::ArchInfo> AArch64::ArchInfo::findBySubArch(StringRef SubA
return {};
}
+std::optional<AArch64::FMVInfo> lookupFMVByID(AArch64::ArchExtKind ExtID) {
+ for (const AArch64::FMVInfo &Info : AArch64::getFMVInfo())
+ if (Info.ID && *Info.ID == ExtID)
+ return Info;
+ return {};
+}
+
uint64_t AArch64::getFMVPriority(ArrayRef<StringRef> Features) {
- uint64_t Priority = 0;
- for (StringRef Feature : Features)
- if (std::optional<FMVInfo> Info = parseFMVExtension(Feature))
- Priority |= (1ULL << Info->PriorityBit);
- return Priority;
+ // Transitively enable the Arch Extensions which correspond to each feature.
+ ExtensionSet FeatureBits;
+ for (const StringRef Feature : Features) {
+ std::optional<FMVInfo> FMV = parseFMVExtension(Feature);
+ if (!FMV) {
+ if (std::optional<ExtensionInfo> Info = targetFeatureToExtension(Feature))
+ FMV = lookupFMVByID(Info->ID);
+ }
+ if (FMV && FMV->ID)
+ FeatureBits.enable(*FMV->ID);
+ }
+
+ // Construct a bitmask for all the transitively enabled Arch Extensions.
+ uint64_t PriorityMask = 0;
+ for (const FMVInfo &Info : getFMVInfo())
+ if (Info.ID && FeatureBits.Enabled.test(*Info.ID))
+ PriorityMask |= (1ULL << Info.PriorityBit);
+
+ return PriorityMask;
}
uint64_t AArch64::getCpuSupportsMask(ArrayRef<StringRef> Features) {
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 78cd249c9c16a7..bf0cacc6224be8 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -2641,6 +2641,165 @@ DeleteDeadIFuncs(Module &M,
return Changed;
}
+// Follows the use-def chain of \p V backwards until it finds a Function,
+// in which case it collects in \p Versions. Return true on successful
+// use-def chain traversal, false otherwise.
+static bool collectVersions(TargetTransformInfo &TTI, Value *V,
+ SmallVectorImpl<Function *> &Versions) {
+ if (auto *F = dyn_cast<Function>(V)) {
+ if (!TTI.isMultiversionedFunction(*F))
+ return false;
+ Versions.push_back(F);
+ } else if (auto *Sel = dyn_cast<SelectInst>(V)) {
+ if (!collectVersions(TTI, Sel->getTrueValue(), Versions))
+ return false;
+ if (!collectVersions(TTI, Sel->getFalseValue(), Versions))
+ return false;
+ } else if (auto *Phi = dyn_cast<PHINode>(V)) {
+ for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I)
+ if (!collectVersions(TTI, Phi->getIncomingValue(I), Versions))
+ return false;
+ } else {
+ // Unknown instruction type. Bail.
+ return false;
+ }
+ return true;
+}
+
+// Bypass the IFunc Resolver of MultiVersioned functions when possible. To
+// deduce whether the optimization is legal we need to compare the target
+// features between caller and callee versions. The criteria for bypassing
+// the resolver are the following:
+//
+// * If the callee's feature set is a subset of the caller's feature set,
+// then the callee is a candidate for direct call.
+//
+// * Among such candidates the one of highest priority is the best match
+// and it shall be picked, unless there is a version of the callee with
+// higher priority than the best match which cannot be picked from a
+// higher priority caller (directly or through the resolver).
+//
+// * For every higher priority callee version than the best match, there
+// is a higher priority caller version whose feature set availability
+// is implied by the callee's feature set.
+//
+static bool OptimizeNonTrivialIFuncs(
+ Module &M, function_ref<TargetTransformInfo &(Function &)> GetTTI) {
+ bool Changed = false;
+
+ // Cache containing the mask constructed from a function's target features.
+ DenseMap<Function *, uint64_t> FeatureMask;
+
+ for (GlobalIFunc &IF : M.ifuncs()) {
+ if (IF.isInterposable())
+ continue;
+
+ Function *Resolver = IF.getResolverFunction();
+ if (!Resolver)
+ continue;
+
+ if (Resolver->isInterposable())
+ continue;
+
+ TargetTransformInfo &TTI = GetTTI(*Resolver);
+
+ // Discover the callee versions.
+ SmallVector<Function *> Callees;
+ if (any_of(*Resolver, [&TTI, &Callees](BasicBlock &BB) {
+ if (auto *Ret = dyn_cast_or_null<ReturnInst>(BB.getTerminator()))
+ if (!collectVersions(TTI, Ret->getReturnValue(), Callees))
+ return true;
+ return false;
+ }))
+ continue;
+
+ assert(!Callees.empty() && "Expecting successful collection of versions");
+
+ // Cache the feature mask for each callee.
+ for (Function *Callee : Callees) {
+ auto [It, Inserted] = FeatureMask.try_emplace(Callee);
+ if (Inserted)
+ It->second = TTI.getFeatureMask(*Callee);
+ }
+
+ // Sort the callee versions in decreasing priority order.
+ sort(Callees, [&](auto *LHS, auto *RHS) {
+ return FeatureMask[LHS] > FeatureMask[RHS];
+ });
+
+ // Find the callsites and cache the feature mask for each caller.
+ SmallVector<Function *> Callers;
+ DenseMap<Function *, SmallVector<CallBase *>> CallSites;
+ for (User *U : IF.users()) {
+ if (auto *CB = dyn_cast<CallBase>(U)) {
+ if (CB->getCalledOperand() == &IF) {
+ Function *Caller = CB->getFunction();
+ auto [FeatIt, FeatInserted] = FeatureMask.try_emplace(Caller);
+ if (FeatInserted)
+ FeatIt->second = TTI.getFeatureMask(*Caller);
+ auto [CallIt, CallInserted] = CallSites.try_emplace(Caller);
+ if (CallInserted)
+ Callers.push_back(Caller);
+ CallIt->second.push_back(CB);
+ }
+ }
+ }
+
+ // Sort the caller versions in decreasing priority order.
+ sort(Callers, [&](auto *LHS, auto *RHS) {
+ return FeatureMask[LHS] > FeatureMask[RHS];
+ });
+
+ auto implies = [](uint64_t A, uint64_t B) { return (A & B) == B; };
+
+ // Index to the highest priority candidate.
+ unsigned I = 0;
+ // Now try to redirect calls starting from higher priority callers.
+ for (Function *Caller : Callers) {
+ assert(I < Callees.size() && "Found callers of equal priority");
+
+ Function *Callee = Callees[I];
+ uint64_t CallerBits = FeatureMask[Caller];
+ uint64_t CalleeBits = FeatureMask[Callee];
+
+ // In the case of FMV callers, we know that all higher priority callers
+ // than the current one did not get selected at runtime, which helps
+ // reason about the callees (if they have versions that mandate presence
+ // of the features which we already know are unavailable on this target).
+ if (TTI.isMultiversionedFunction(*Caller)) {
+ // If the feature set of the caller implies the feature set of the
+ // highest priority candidate then it shall be picked. In case of
+ // identical sets advance the candidate index one position.
+ if (CallerBits == CalleeBits)
+ ++I;
+ else if (!implies(CallerBits, CalleeBits)) {
+ // Keep advancing the candidate index as long as the caller's
+ // features are a subset of the current candidate's.
+ while (implies(CalleeBits, CallerBits)) {
+ if (++I == Callees.size())
+ break;
+ CalleeBits = FeatureMask[Callees[I]];
+ }
+ continue;
+ }
+ } else {
+ // We can't reason much about non-FMV callers. Just pick the highest
+ // priority callee if it matches, otherwise bail.
+ if (I > 0 || !implies(CallerBits, CalleeBits))
+ continue;
+ }
+ auto &Calls = CallSites[Caller];
+ for (CallBase *CS : Calls)
+ CS->setCalledOperand(Callee);
+ Changed = true;
+ }
+ if (IF.use_empty() ||
+ all_of(IF.users(), [](User *U) { return isa<GlobalAlias>(U); }))
+ NumIFuncsResolved++;
+ }
+ return Changed;
+}
+
static bool
optimizeGlobalsInModule(Module &M, const DataLayout &DL,
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
@@ -2707,6 +2866,9 @@ optimizeGlobalsInModule(Module &M, const DataLayout &DL,
// Optimize IFuncs whose callee's are statically known.
LocalChange |= OptimizeStaticIFuncs(M);
+ // Optimize IFuncs based on the target features of the caller.
+ LocalChange |= OptimizeNonTrivialIFuncs(M, GetTTI);
+
// Remove any IFuncs that are now dead.
LocalChange |= DeleteDeadIFuncs(M, NotDiscardableComdats);
diff --git a/llvm/test/Transforms/GlobalOpt/resolve-fmv-ifunc.ll b/llvm/test/Transforms/GlobalOpt/resolve-fmv-ifunc.ll
new file mode 100644
index 00000000000000..90bd98a9b0d381
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/resolve-fmv-ifunc.ll
@@ -0,0 +1,365 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call i32 @(test_single_bb_resolver|test_multi_bb_resolver|test_caller_feats_not_implied|test_non_fmv_caller|test_priority|test_alternative_names)" --version 4
+; RUN: opt --passes=globalopt -o - -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+$test_single_bb_resolver.resolver = comdat any
+$test_multi_bb_resolver.resolver = comdat any
+$test_caller_feats_not_implied.resolver = comdat any
+$test_non_fmv_caller.resolver = comdat any
+$test_priority.resolver = comdat any
+$test_alternative_names.resolver = comdat any
+
+ at __aarch64_cpu_features = external local_unnamed_addr global { i64 }
+
+ at test_single_bb_resolver = weak_odr ifunc i32 (), ptr @test_single_bb_resolver.resolver
+ at test_multi_bb_resolver = weak_odr ifunc i32 (), ptr @test_multi_bb_resolver.resolver
+ at test_caller_feats_not_implied = weak_odr ifunc i32 (), ptr @test_caller_feats_not_implied.resolver
+ at test_non_fmv_caller = weak_odr ifunc i32 (), ptr @test_non_fmv_caller.resolver
+ at test_priority = weak_odr ifunc i32 (), ptr @test_priority.resolver
+ at test_alternative_names = weak_odr ifunc i32 (), ptr @test_alternative_names.resolver
+
+declare void @__init_cpu_features_resolver() local_unnamed_addr
+
+declare i32 @test_single_bb_resolver.default() #0
+declare i32 @test_single_bb_resolver._Msve() #1
+declare i32 @test_single_bb_resolver._Msve2() #2
+
+define weak_odr ptr @test_single_bb_resolver.resolver() comdat {
+; CHECK-LABEL: define weak_odr ptr @test_single_bb_resolver.resolver() comdat {
+resolver_entry:
+ tail call void @__init_cpu_features_resolver()
+ %0 = load i64, ptr @__aarch64_cpu_features, align 8
+ %1 = and i64 %0, 68719476736
+ %.not = icmp eq i64 %1, 0
+ %2 = and i64 %0, 1073741824
+ %.not3 = icmp eq i64 %2, 0
+ %test_single_bb_resolver._Msve.test_single_bb_resolver.default = select i1 %.not3, ptr @test_single_bb_resolver.default, ptr @test_single_bb_resolver._Msve
+ %common.ret.op = select i1 %.not, ptr %test_single_bb_resolver._Msve.test_single_bb_resolver.default, ptr @test_single_bb_resolver._Msve2
+ ret ptr %common.ret.op
+}
+
+define i32 @caller1._Msve() #1 {
+; CHECK-LABEL: define i32 @caller1._Msve(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+; CHECK: [[CALL:%.*]] = tail call i32 @test_single_bb_resolver._Msve()
+;
+entry:
+ %call = tail call i32 @test_single_bb_resolver()
+ ret i32 %call
+}
+
+define i32 @caller1._Msve2() #2 {
+; CHECK-LABEL: define i32 @caller1._Msve2(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+; CHECK: [[CALL:%.*]] = tail call i32 @test_single_bb_resolver._Msve2()
+;
+entry:
+ %call = tail call i32 @test_single_bb_resolver()
+ ret i32 %call
+}
+
+define i32 @caller1.default() #0 {
+; CHECK-LABEL: define i32 @caller1.default(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK: [[CALL:%.*]] = tail call i32 @test_single_bb_resolver.default()
+;
+entry:
+ %call = tail call i32 @test_single_bb_resolver()
+ ret i32 %call
+}
+
+declare i32 @test_multi_bb_resolver._Mmops() #3
+declare i32 @test_multi_bb_resolver._Msve2() #2
+declare i32 @test_multi_bb_resolver._Msve() #1
+declare i32 @test_multi_bb_resolver.default() #0
+
+define weak_odr ptr @test_multi_bb_resolver.resolver() comdat {
+; CHECK-LABEL: define weak_odr ptr @test_multi_bb_resolver.resolver() comdat {
+resolver_entry:
+ tail call void @__init_cpu_features_resolver()
+ %0 = load i64, ptr @__aarch64_cpu_features, align 8
+ %1 = and i64 %0, 576460752303423488
+ %.not = icmp eq i64 %1, 0
+ br i1 %.not, label %resolver_else, label %common.ret
+
+common.ret: ; preds = %resolver_else2, %resolver_else, %resolver_entry
+ %common.ret.op = phi ptr [ @test_multi_bb_resolver._Mmops, %resolver_entry ], [ @test_multi_bb_resolver._Msve2, %resolver_else ], [ %test_multi_bb_resolver._Msve.test_multi_bb_resolver.default, %resolver_else2 ]
+ ret ptr %common.ret.op
+
+resolver_else: ; preds = %resolver_entry
+ %2 = and i64 %0, 68719476736
+ %.not5 = icmp eq i64 %2, 0
+ br i1 %.not5, label %resolver_else2, label %common.ret
+
+resolver_else2: ; preds = %resolver_else
+ %3 = and i64 %0, 1073741824
+ %.not6 = icmp eq i64 %3, 0
+ %test_multi_bb_resolver._Msve.test_multi_bb_resolver.default = select i1 %.not6, ptr @test_multi_bb_resolver.default, ptr @test_multi_bb_resolver._Msve
+ br label %common.ret
+}
+
+define i32 @caller2._MmopsMsve2() #4 {
+; CHECK-LABEL: define i32 @caller2._MmopsMsve2(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR4:[0-9]+]] {
+; CHECK: [[CALL:%.*]] = tail call i32 @test_multi_bb_resolver._Mmops()
+;
+entry:
+ %call = tail call i32 @test_multi_bb_resolver()
+ ret i32 %call
+}
+
+define i32 @caller2._Mmops() #3 {
+; CHECK-LABEL: define i32 @caller2._Mmops(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR3:[0-9]+]] {
+; CHECK: [[CALL:%.*]] = tail call i32 @test_multi_bb_resolver._Mmops()
+;
+entry:
+ %call = tail call i32 @test_multi_bb_resolver()
+ ret i32 %call
+}
+
+define i32 @caller2._Msve() #1 {
+; CHECK-LABEL: define i32 @caller2._Msve(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] {
+; CHECK: [[CALL:%.*]] = tail call i32 @test_multi_bb_resolver()
+;
+entry:
+ %call = tail call i32 @test_multi_bb_resolver()
+ ret i32 %call
+}
+
+define i32 @caller2.default() #0 {
+; CHECK-LABEL: define i32 @caller2.default(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+; CHECK: [[CALL:%.*]] = tail call i32 @test_multi_bb_resolver.default()
+;
+entry:
+ %call = tail call i32 @test_multi_bb_resolver()
+ ret i32 %call
+}
+
+declare i32 @test_caller_feats_not_implied._Mmops() #3
+declare i32 @test_caller_feats_not_implied._Msme() #5
+declare i32 @test_caller_feats_not_implied._Msve() #1
+declare i32 @test_caller_feats_not_implied.default() #0
+
+define weak_odr ptr @test_caller_feats_not_implied.resolver() comdat {
+; CHECK-LABEL: define weak_odr ptr @test_caller_feats_not_implied.resolver() comdat {
+resolver_entry:
+ tail call void @__init_cpu_features_resolver()
+ %0 = load i64, ptr @__aarch64_cpu_features, align 8
+ %1 = and i64 %0, 576460752303423488
+ %.not = icmp eq i64 %1, 0
+ br i1 %.not, label %resolver_else, label %common.ret
+
+common.ret: ; preds = %resolver_else2, %resolver_else, %resolver_entry
+ %common.ret.op = phi ptr [ @test_caller_feats_not_implied._Mmops, %resolver_entry ], [ @test_caller_feats_not_implied._Msme, %resolver_else ], [ %test_caller_feats_not_implied._Msve.test_caller_feats_not_implied.default, %resolver_else2 ]
+ ret ptr %common.ret.op
+
+resolver_else: ; preds = %resolver_entry
+ %2 = and i64 %0, 4398046511104
+ %.not5 = icmp eq i64 %2, 0
+ br i1 %.not5, label %resolver_else2, label %common.ret
+
+resolver_else2: ; preds = %resolver_else
+ %3 = and i64 %0, 1073741824
+ %.not6 = icmp eq i64 %3, 0
+ %test_caller_feats_not_implied._Msve.test_caller_feats_not_implied.default = select i1 %.not6, ptr @test_caller_feats_not_implied.default, ptr @test_caller_feats_not_implied._Msve
+ br label %common.ret
+}
+
+define i32 @caller3._Mmops() #3 {
+; CHECK-LABEL: define i32 @caller3._Mmops(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR3]] {
+; CHECK: [[CALL:%.*]] = tail call i32 @test_caller_feats_not_implied._Mmops()
+;
+entry:
+ %call = tail call i32 @test_caller_feats_not_implied()
+ ret i32 %call
+}
+
+define i32 @caller3._Msve() #1 {
+; CHECK-LABEL: define i32 @caller3._Msve(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] {
+; CHECK: [[CALL:%.*]] = tail call i32 @test_caller_feats_not_implied()
+;
+entry:
+ %call = tail call i32 @test_caller_feats_not_implied()
+ ret i32 %call
+}
+
+define i32 @caller3.default() #0 {
+; CHECK-LABEL: define i32 @caller3.default(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+; CHECK: [[CALL:%.*]] = tail call i32 @test_caller_feats_not_implied()
+;
+entry:
+ %call = tail call i32 @test_caller_feats_not_implied()
+ ret i32 %call
+}
+
+declare i32 @test_non_fmv_caller._Maes() #6
+declare i32 @test_non_fmv_caller._Msm4() #7
+declare i32 @test_non_fmv_caller.default() #0
+
+define weak_odr ptr @test_non_fmv_caller.resolver() comdat {
+; CHECK-LABEL: define weak_odr ptr @test_non_fmv_caller.resolver() comdat {
+resolver_entry:
+ tail call void @__init_cpu_features_resolver()
+ %0 = load i64, ptr @__aarch64_cpu_features, align 8
+ %1 = and i64 %0, 32768
+ %.not = icmp eq i64 %1, 0
+ %test_non_fmv_caller._Maes.test_non_fmv_caller.default = select i1 %.not, ptr @test_non_fmv_caller.default, ptr @test_non_fmv_caller._Maes
+ ret ptr %test_non_fmv_caller._Maes.test_non_fmv_caller.default
+}
+
+define i32 @caller4() #8 {
+; CHECK-LABEL: define i32 @caller4(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR7:[0-9]+]] {
+; CHECK: [[CALL:%.*]] = tail call i32 @test_non_fmv_caller._Maes()
+;
+entry:
+ %call = tail call i32 @test_non_fmv_caller()
+ ret i32 %call
+}
+
+define i32 @caller5() #9 {
+; CHECK-LABEL: define i32 @caller5(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR8:[0-9]+]] {
+; CHECK: [[CALL:%.*]] = tail call i32 @test_non_fmv_caller()
+;
+entry:
+ %call = tail call i32 @test_non_fmv_caller()
+ ret i32 %call
+}
+
+declare i32 @test_priority._Msve2-sha3() #10
+declare i32 @test_priority._Mls64Mssbs() #11
+declare i32 @test_priority._MflagmMlseMrng() #12
+declare i32 @test_priority.default() #0
+
+define weak_odr ptr @test_priority.resolver() comdat {
+; CHECK-LABEL: define weak_odr ptr @test_priority.resolver() comdat {
+resolver_entry:
+ tail call void @__init_cpu_features_resolver()
+ %0 = load i64, ptr @__aarch64_cpu_features, align 8
+ %1 = and i64 %0, 131
+ %2 = icmp eq i64 %1, 131
+ br i1 %2, label %common.ret, label %resolver_else
+
+common.ret: ; preds = %resolver_else2, %resolver_else, %resolver_entry
+ %common.ret.op = phi ptr [ @test_priority._MflagmMlseMrng, %resolver_entry ], [ @test_priority._Mls64Mssbs, %resolver_else ], [ %test_priority._Msve2-sha3.test_priority.default, %resolver_else2 ]
+ ret ptr %common.ret.op
+
+resolver_else: ; preds = %resolver_entry
+ %3 = and i64 %0, 9570149208162304
+ %4 = icmp eq i64 %3, 9570149208162304
+ br i1 %4, label %common.ret, label %resolver_else2
+
+resolver_else2: ; preds = %resolver_else
+ %5 = and i64 %0, 1099511627776
+ %.not = icmp eq i64 %5, 0
+ %test_priority._Msve2-sha3.test_priority.default = select i1 %.not, ptr @test_priority.default, ptr @test_priority._Msve2-sha3
+ br label %common.ret
+}
+
+define i32 @caller6._MflagmMls64MlseMrngMssbsMsve2-sha3() #13 {
+; CHECK-LABEL: define i32 @caller6._MflagmMls64MlseMrngMssbsMsve2-sha3(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR12:[0-9]+]] {
+; CHECK: [[CALL:%.*]] = tail call i32 @test_priority._Mls64Mssbs()
+;
+entry:
+ %call = tail call i32 @test_priority()
+ ret i32 %call
+}
+
+declare i32 @test_alternative_names._Mdpb2Mfrintts() #14
+declare i32 @test_alternative_names._Mflagm2Mfrintts() #15
+declare i32 @test_alternative_names._Mrcpc2() #16
+declare i32 @test_alternative_names.default() #0
+
+define weak_odr ptr @test_alternative_names.resolver() comdat {
+; CHECK-LABEL: define weak_odr ptr @test_alternative_names.resolver() comdat {
+resolver_entry:
+ tail call void @__init_cpu_features_resolver()
+ %0 = load i64, ptr @__aarch64_cpu_features, align 8
+ %1 = and i64 %0, 17563904
+ %2 = icmp eq i64 %1, 17563904
+ br i1 %2, label %common.ret, label %resolver_else
+
+common.ret: ; preds = %resolver_else2, %resolver_else, %resolver_entry
+ %common.ret.op = phi ptr [ @test_alternative_names._Mdpb2Mfrintts, %resolver_entry ], [ @test_alternative_names._Mflagm2Mfrintts, %resolver_else ], [ %test_alternative_names._Mrcpc2.test_alternative_names.default, %resolver_else2 ]
+ ret ptr %common.ret.op
+
+resolver_else: ; preds = %resolver_entry
+ %3 = and i64 %0, 16777478
+ %4 = icmp eq i64 %3, 16777478
+ br i1 %4, label %common.ret, label %resolver_else2
+
+resolver_else2: ; preds = %resolver_else
+ %5 = and i64 %0, 12582912
+ %6 = icmp eq i64 %5, 12582912
+ %test_alternative_names._Mrcpc2.test_alternative_names.default = select i1 %6, ptr @test_alternative_names._Mrcpc2, ptr @test_alternative_names.default
+ br label %common.ret
+}
+
+define i32 @caller7._Mdpb2Mfrintts() #14 {
+; CHECK-LABEL: define i32 @caller7._Mdpb2Mfrintts(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR13:[0-9]+]] {
+; CHECK: [[CALL:%.*]] = tail call i32 @test_alternative_names._Mdpb2Mfrintts()
+;
+entry:
+ %call = tail call i32 @test_alternative_names()
+ ret i32 %call
+}
+
+define i32 @caller7._Mfrintts() #17 {
+; CHECK-LABEL: define i32 @caller7._Mfrintts(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR16:[0-9]+]] {
+; CHECK: [[CALL:%.*]] = tail call i32 @test_alternative_names()
+;
+entry:
+ %call = tail call i32 @test_alternative_names()
+ ret i32 %call
+}
+
+define i32 @caller7._Mrcpc2() #16 {
+; CHECK-LABEL: define i32 @caller7._Mrcpc2(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR15:[0-9]+]] {
+; CHECK: [[CALL:%.*]] = tail call i32 @test_alternative_names._Mrcpc2()
+;
+entry:
+ %call = tail call i32 @test_alternative_names()
+ ret i32 %call
+}
+
+define i32 @caller7.default() #0 {
+; CHECK-LABEL: define i32 @caller7.default(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+; CHECK: [[CALL:%.*]] = tail call i32 @test_alternative_names.default()
+;
+entry:
+ %call = tail call i32 @test_alternative_names()
+ ret i32 %call
+}
+
+attributes #0 = { "fmv-features" }
+attributes #1 = { "fmv-features"="sve" }
+attributes #2 = { "fmv-features"="sve2" }
+attributes #3 = { "fmv-features"="mops" }
+attributes #4 = { "fmv-features"="mops,sve2" }
+attributes #5 = { "fmv-features"="sme" }
+attributes #6 = { "fmv-features"="aes" }
+attributes #7 = { "fmv-features"="sm4" }
+attributes #8 = { "target-features"="+aes,+fp-armv8,+neon,+outline-atomics,+v8a" }
+attributes #9 = { "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,+sm4" }
+attributes #10 = { "fmv-features"="sve2-sha3" }
+attributes #11 = { "fmv-features"="ls64,ssbs" }
+attributes #12 = { "fmv-features"="flagm,lse,rng" }
+attributes #13 = { "fmv-features"="flagm,ls64,lse,rng,ssbs,sve2-sha3" }
+attributes #14 = { "fmv-features"="dpb2,frintts" }
+attributes #15 = { "fmv-features"="flagm2,frintts" }
+attributes #16 = { "fmv-features"="rcpc2" }
+attributes #17 = { "fmv-features"="frintts" }
More information about the llvm-commits
mailing list