[clang] e43924a - [AArch64] FMV support and necessary target features dependencies.
Pavel Iliin via cfe-commits
cfe-commits at lists.llvm.org
Tue Dec 20 07:42:49 PST 2022
Author: Pavel Iliin
Date: 2022-12-20T15:42:25Z
New Revision: e43924a75145d2f9e722f74b673145c3e62bfd07
URL: https://github.com/llvm/llvm-project/commit/e43924a75145d2f9e722f74b673145c3e62bfd07
DIFF: https://github.com/llvm/llvm-project/commit/e43924a75145d2f9e722f74b673145c3e62bfd07.diff
LOG: [AArch64] FMV support and necessary target features dependencies.
This is Function Multi Versioning (FMV) implementation for AArch64 target in
accordance with Beta Arm C Language Extensions specification
https://github.com/ARM-software/acle/blob/main/main/acle.md#function-multi-versioning
It supports new "target_version" function attribute and extends existing
"target_clones" one. Also missing dependencies for target features were added.
Differential Revision: https://reviews.llvm.org/D127812
Added:
clang/test/AST/attr-target-version.c
clang/test/CodeGen/attr-target-clones-aarch64.c
clang/test/CodeGen/attr-target-version.c
clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp
clang/test/CodeGenCXX/attr-target-version.cpp
clang/test/Sema/attr-target-clones-aarch64.c
clang/test/Sema/attr-target-version.c
clang/test/SemaCXX/attr-target-clones-aarch64.cpp
clang/test/SemaCXX/attr-target-version.cpp
Modified:
clang/include/clang/AST/ASTContext.h
clang/include/clang/AST/Decl.h
clang/include/clang/Basic/Attr.td
clang/include/clang/Basic/AttrDocs.td
clang/include/clang/Basic/DiagnosticSemaKinds.td
clang/include/clang/Basic/TargetInfo.h
clang/include/clang/Driver/Options.td
clang/include/clang/Sema/Sema.h
clang/lib/AST/ASTContext.cpp
clang/lib/AST/Decl.cpp
clang/lib/Basic/Targets/AArch64.cpp
clang/lib/Basic/Targets/AArch64.h
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/CodeGen/CGOpenMPRuntime.cpp
clang/lib/CodeGen/CodeGenFunction.cpp
clang/lib/CodeGen/CodeGenFunction.h
clang/lib/CodeGen/CodeGenModule.cpp
clang/lib/Driver/ToolChains/Clang.cpp
clang/lib/Sema/Sema.cpp
clang/lib/Sema/SemaDecl.cpp
clang/lib/Sema/SemaDeclAttr.cpp
clang/lib/Sema/SemaOverload.cpp
clang/test/CodeGen/aarch64-targetattr.c
clang/test/Driver/aarch64-features.c
clang/test/Misc/pragma-attribute-supported-attributes-list.test
clang/test/Preprocessor/aarch64-target-features.c
clang/test/Preprocessor/init-aarch64.c
compiler-rt/lib/builtins/cpu_model.c
llvm/include/llvm/TargetParser/AArch64TargetParser.def
llvm/include/llvm/TargetParser/AArch64TargetParser.h
llvm/lib/Target/AArch64/AArch64.td
llvm/lib/TargetParser/AArch64TargetParser.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index f45c84406d2a2..326f115f1bda0 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -3116,6 +3116,9 @@ class ASTContext : public RefCountedBase<ASTContext> {
/// valid feature names.
ParsedTargetAttr filterFunctionTargetAttrs(const TargetAttr *TD) const;
+ std::vector<std::string>
+ filterFunctionTargetVersionAttrs(const TargetVersionAttr *TV) const;
+
void getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
const FunctionDecl *) const;
void getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h
index cd33fcef56619..858cd00efc4ec 100644
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -1891,7 +1891,8 @@ enum class MultiVersionKind {
Target,
CPUSpecific,
CPUDispatch,
- TargetClones
+ TargetClones,
+ TargetVersion
};
/// Represents a function declaration or definition.
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index aef3356ed405b..65b62422785c0 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -2740,6 +2740,31 @@ def Target : InheritableAttr {
}];
}
+def TargetVersion : InheritableAttr {
+ let Spellings = [GCC<"target_version">];
+ let Args = [StringArgument<"NamesStr">];
+ let Subjects = SubjectList<[Function], ErrorDiag>;
+ let Documentation = [TargetVersionDocs];
+ let AdditionalMembers = [{
+ StringRef getName() const { return getNamesStr().trim(); }
+ bool isDefaultVersion() const {
+ return getName() == "default";
+ }
+ void getFeatures(llvm::SmallVectorImpl<StringRef> &Out) const {
+ if (isDefaultVersion()) return;
+ StringRef Features = getName();
+
+ SmallVector<StringRef, 8> AttrFeatures;
+ Features.split(AttrFeatures, "+");
+
+ for (auto &Feature : AttrFeatures) {
+ Feature = Feature.trim();
+ Out.push_back(Feature);
+ }
+ }
+ }];
+}
+
def TargetClones : InheritableAttr {
let Spellings = [GCC<"target_clones">];
let Args = [VariadicStringArgument<"featuresStrs">];
@@ -2773,11 +2798,12 @@ def TargetClones : InheritableAttr {
return 0 == std::count_if(
featuresStrs_begin(), featuresStrs_begin() + Index,
[FeatureStr](StringRef S) { return S == FeatureStr; });
+
}
}];
}
-def : MutualExclusions<[TargetClones, Target, CPUDispatch, CPUSpecific]>;
+def : MutualExclusions<[TargetClones, TargetVersion, Target, CPUDispatch, CPUSpecific]>;
def MinVectorWidth : InheritableAttr {
let Spellings = [Clang<"min_vector_width">];
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 77f72d2b2bd4e..2dc5e7d61817a 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -2377,6 +2377,19 @@ Additionally, a function may not become multiversioned after its first use.
}];
}
+def TargetVersionDocs : Documentation {
+ let Category = DocCatFunction;
+ let Content = [{
+For AArch64 target clang supports function multiversioning by
+``__attribute__((target_version("OPTIONS")))`` attribute. When applied to a
+function it instructs compiler to emit multiple function versions based on
+``target_version`` attribute strings, which resolved at runtime depend on their
+priority and target features availability. One of the versions is always
+( implicitly or explicitly ) the ``default`` (fallback). Attribute strings can
+contain dependent features names joined by the "+" sign.
+}];
+}
+
def TargetClonesDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
@@ -2387,6 +2400,19 @@ generation options. Additionally, these versions will be resolved at runtime
based on the priority of their attribute options. All ``target_clone`` functions
are considered multiversioned functions.
+For AArch64 target:
+The attribute contains comma-separated strings of target features joined by "+"
+sign. For example:
+
+ .. code-block:: c++
+
+ __attribute__((target_clones("sha2+memtag2", "fcma+sve2-pmull128")))
+ void foo() {}
+
+For every multiversioned function a ``default`` (fallback) implementation
+always generated if not specified directly.
+
+For x86/x86-64 targets:
All multiversioned functions must contain a ``default`` (fallback)
implementation, otherwise usages of the function are considered invalid.
Additionally, a function may not become multiversioned after its first use.
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 53ec661285395..ff68514621327 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3056,8 +3056,8 @@ def warn_unsupported_branch_protection_spec : Warning<
def warn_unsupported_target_attribute
: Warning<"%select{unsupported|duplicate|unknown}0%select{| CPU|"
- " tune CPU}1 '%2' in the '%select{target|target_clones}3' "
- "attribute string; '%select{target|target_clones}3' "
+ " tune CPU}1 '%2' in the '%select{target|target_clones|target_version}3' "
+ "attribute string; '%select{target|target_clones|target_version}3' "
"attribute ignored">,
InGroup<IgnoredAttributes>;
def err_attribute_unsupported
@@ -11511,7 +11511,7 @@ def note_shadow_field : Note<"declared here">;
def err_multiversion_required_in_redecl : Error<
"function declaration is missing %select{'target'|'cpu_specific' or "
- "'cpu_dispatch'}0 attribute in a multiversioned function">;
+ "'cpu_dispatch'|'target_version'}0 attribute in a multiversioned function">;
def note_multiversioning_caused_here : Note<
"function multiversioning caused by this declaration">;
def err_multiversion_after_used : Error<
@@ -11526,7 +11526,7 @@ def err_multiversion_noproto : Error<
"multiversioned function must have a prototype">;
def err_multiversion_disallowed_other_attr
: Error<"attribute "
- "'%select{|target|cpu_specific|cpu_dispatch|target_clones}0' "
+ "'%select{|target|cpu_specific|cpu_dispatch|target_clones|target_version}0' "
"multiversioning cannot be combined"
" with attribute %1">;
def err_multiversion_
diff : Error<
@@ -11535,7 +11535,7 @@ def err_multiversion_
diff : Error<
"language linkage}0">;
def err_multiversion_doesnt_support
: Error<"attribute "
- "'%select{|target|cpu_specific|cpu_dispatch|target_clones}0' "
+ "'%select{|target|cpu_specific|cpu_dispatch|target_clones|target_version}0' "
"multiversioned functions do not "
"yet support %select{function templates|virtual functions|"
"deduced return types|constructors|destructors|deleted functions|"
@@ -11570,6 +11570,9 @@ def warn_target_clone_mixed_values
def warn_target_clone_duplicate_options
: Warning<"version list contains duplicate entries">,
InGroup<FunctionMultiVersioning>;
+def warn_target_clone_no_impact_options
+ : Warning<"version list contains entries that don't impact code generation">,
+ InGroup<FunctionMultiVersioning>;
// three-way comparison operator diagnostics
def err_implied_comparison_category_type_not_found : Error<
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 746e0f4ea918b..9e310f6473a67 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1340,6 +1340,13 @@ class TargetInfo : public virtual TransferrableTargetInfo,
return true;
}
+ /// Returns true if feature has an impact on target code
+ /// generation and get its dependent options in second argument.
+ virtual bool getFeatureDepOptions(StringRef Feature,
+ std::string &Options) const {
+ return true;
+ }
+
struct BranchProtectionInfo {
LangOptions::SignReturnAddressScopeKind SignReturnAddr =
LangOptions::SignReturnAddressScopeKind::None;
@@ -1386,7 +1393,9 @@ class TargetInfo : public virtual TransferrableTargetInfo,
/// Identify whether this target supports multiversioning of functions,
/// which requires support for cpu_supports and cpu_is functionality.
- bool supportsMultiVersioning() const { return getTriple().isX86(); }
+ bool supportsMultiVersioning() const {
+ return getTriple().isX86() || getTriple().isAArch64();
+ }
/// Identify whether this target supports IFuncs.
bool supportsIFunc() const {
@@ -1403,6 +1412,10 @@ class TargetInfo : public virtual TransferrableTargetInfo,
return 0;
}
+ // Return the target-specific cost for feature
+ // that taken into account in priority sorting.
+ virtual unsigned multiVersionFeatureCost() const { return 0; }
+
// Validate the contents of the __builtin_cpu_is(const char*)
// argument.
virtual bool validateCpuIs(StringRef Name) const { return false; }
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index f1fd45d8394ab..833794e9efa35 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3858,6 +3858,8 @@ def msmall_data_threshold_EQ : Joined <["-"], "msmall-data-threshold=">,
def msoft_float : Flag<["-"], "msoft-float">, Group<m_Group>, Flags<[CC1Option]>,
HelpText<"Use software floating point">,
MarshallingInfoFlag<CodeGenOpts<"SoftFloat">>;
+def mno_fmv : Flag<["-"], "mno-fmv">, Group<f_clang_Group>, Flags<[CC1Option]>,
+ HelpText<"Disable function multiversioning">;
def moutline_atomics : Flag<["-"], "moutline-atomics">, Group<f_clang_Group>, Flags<[CC1Option]>,
HelpText<"Generate local calls to out-of-line atomic operations">;
def mno_outline_atomics : Flag<["-"], "mno-outline-atomics">, Group<f_clang_Group>, Flags<[CC1Option]>,
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 67ba41ffa8464..9a110b13583b8 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -4657,10 +4657,14 @@ class Sema final {
llvm::Error isValidSectionSpecifier(StringRef Str);
bool checkSectionName(SourceLocation LiteralLoc, StringRef Str);
bool checkTargetAttr(SourceLocation LiteralLoc, StringRef Str);
- bool checkTargetClonesAttrString(SourceLocation LiteralLoc, StringRef Str,
- const StringLiteral *Literal,
- bool &HasDefault, bool &HasCommas,
- SmallVectorImpl<StringRef> &Strings);
+ bool checkTargetVersionAttr(SourceLocation LiteralLoc, StringRef &Str,
+ bool &isDefault);
+ bool
+ checkTargetClonesAttrString(SourceLocation LiteralLoc, StringRef Str,
+ const StringLiteral *Literal, bool &HasDefault,
+ bool &HasCommas, bool &HasNotDefault,
+ SmallVectorImpl<StringRef> &Strings,
+ SmallVectorImpl<SmallString<64>> &StringsBuffer);
bool checkMSInheritanceAttrOnDefinition(
CXXRecordDecl *RD, SourceRange Range, bool BestCase,
MSInheritanceModel SemanticSpelling);
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index c2491f872a58e..d90d59380534e 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -13291,6 +13291,18 @@ QualType ASTContext::getCorrespondingSignedFixedPointType(QualType Ty) const {
}
}
+std::vector<std::string> ASTContext::filterFunctionTargetVersionAttrs(
+ const TargetVersionAttr *TV) const {
+ assert(TV != nullptr);
+ llvm::SmallVector<StringRef, 8> Feats;
+ std::vector<std::string> ResFeats;
+ TV->getFeatures(Feats);
+ for (auto &Feature : Feats)
+ if (Target->validateCpuSupports(Feature.str()))
+ ResFeats.push_back("?" + Feature.str());
+ return ResFeats;
+}
+
ParsedTargetAttr
ASTContext::filterFunctionTargetAttrs(const TargetAttr *TD) const {
assert(TD != nullptr);
@@ -13349,12 +13361,32 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
} else if (const auto *TC = FD->getAttr<TargetClonesAttr>()) {
std::vector<std::string> Features;
StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex());
- if (VersionStr.startswith("arch="))
- TargetCPU = VersionStr.drop_front(sizeof("arch=") - 1);
- else if (VersionStr != "default")
- Features.push_back((StringRef{"+"} + VersionStr).str());
-
+ if (Target->getTriple().isAArch64()) {
+ // TargetClones for AArch64
+ if (VersionStr != "default") {
+ SmallVector<StringRef, 1> VersionFeatures;
+ VersionStr.split(VersionFeatures, "+");
+ for (auto &VFeature : VersionFeatures) {
+ VFeature = VFeature.trim();
+ Features.push_back((StringRef{"?"} + VFeature).str());
+ }
+ }
+ Features.insert(Features.begin(),
+ Target->getTargetOpts().FeaturesAsWritten.begin(),
+ Target->getTargetOpts().FeaturesAsWritten.end());
+ } else {
+ if (VersionStr.startswith("arch="))
+ TargetCPU = VersionStr.drop_front(sizeof("arch=") - 1);
+ else if (VersionStr != "default")
+ Features.push_back((StringRef{"+"} + VersionStr).str());
+ }
Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features);
+ } else if (const auto *TV = FD->getAttr<TargetVersionAttr>()) {
+ std::vector<std::string> Feats = filterFunctionTargetVersionAttrs(TV);
+ Feats.insert(Feats.begin(),
+ Target->getTargetOpts().FeaturesAsWritten.begin(),
+ Target->getTargetOpts().FeaturesAsWritten.end());
+ Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Feats);
} else {
FeatureMap = Target->getTargetOpts().FeatureMap;
}
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index b1fdc897bf27e..ccf5d71538e9f 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -3347,6 +3347,8 @@ bool FunctionDecl::isNoReturn() const {
MultiVersionKind FunctionDecl::getMultiVersionKind() const {
if (hasAttr<TargetAttr>())
return MultiVersionKind::Target;
+ if (hasAttr<TargetVersionAttr>())
+ return MultiVersionKind::TargetVersion;
if (hasAttr<CPUDispatchAttr>())
return MultiVersionKind::CPUDispatch;
if (hasAttr<CPUSpecificAttr>())
@@ -3365,7 +3367,8 @@ bool FunctionDecl::isCPUSpecificMultiVersion() const {
}
bool FunctionDecl::isTargetMultiVersion() const {
- return isMultiVersion() && hasAttr<TargetAttr>();
+ return isMultiVersion() &&
+ (hasAttr<TargetAttr>() || hasAttr<TargetVersionAttr>());
}
bool FunctionDecl::isTargetClonesMultiVersion() const {
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 90a83f62574fc..7b3dab7997405 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -58,6 +58,91 @@ static StringRef getArchVersionString(llvm::AArch64::ArchKind Kind) {
}
}
+void AArch64TargetInfo::setArchFeatures() {
+ switch (ArchKind) {
+ case llvm::AArch64::ArchKind::ARMV8_9A:
+ case llvm::AArch64::ArchKind::ARMV8_8A:
+ case llvm::AArch64::ArchKind::ARMV8_7A:
+ HasWFxT = true;
+ LLVM_FALLTHROUGH;
+ case llvm::AArch64::ArchKind::ARMV8_6A:
+ HasBFloat16 = true;
+ HasMatMul = true;
+ LLVM_FALLTHROUGH;
+ case llvm::AArch64::ArchKind::ARMV8_5A:
+ HasAlternativeNZCV = true;
+ HasFRInt3264 = true;
+ HasSSBS = true;
+ HasSB = true;
+ HasPredRes = true;
+ HasBTI = true;
+ LLVM_FALLTHROUGH;
+ case llvm::AArch64::ArchKind::ARMV8_4A:
+ HasDotProd = true;
+ HasDIT = true;
+ HasFlagM = true;
+ LLVM_FALLTHROUGH;
+ case llvm::AArch64::ArchKind::ARMV8_3A:
+ HasRCPC = true;
+ FPU |= NeonMode;
+ LLVM_FALLTHROUGH;
+ case llvm::AArch64::ArchKind::ARMV8_2A:
+ HasCCPP = true;
+ LLVM_FALLTHROUGH;
+ case llvm::AArch64::ArchKind::ARMV8_1A:
+ HasCRC = true;
+ HasLSE = true;
+ HasRDM = true;
+ return;
+ default:
+ break;
+ }
+ switch (ArchKind) {
+ case llvm::AArch64::ArchKind::ARMV9_4A:
+ case llvm::AArch64::ArchKind::ARMV9_3A:
+ case llvm::AArch64::ArchKind::ARMV9_2A:
+ HasWFxT = true;
+ LLVM_FALLTHROUGH;
+ case llvm::AArch64::ArchKind::ARMV9_1A:
+ HasBFloat16 = true;
+ HasMatMul = true;
+ LLVM_FALLTHROUGH;
+ case llvm::AArch64::ArchKind::ARMV9A:
+ FPU |= SveMode;
+ HasSVE2 = true;
+ HasFullFP16 = true;
+ HasAlternativeNZCV = true;
+ HasFRInt3264 = true;
+ HasSSBS = true;
+ HasSB = true;
+ HasPredRes = true;
+ HasBTI = true;
+ HasDotProd = true;
+ HasDIT = true;
+ HasFlagM = true;
+ HasRCPC = true;
+ FPU |= NeonMode;
+ HasCCPP = true;
+ HasCRC = true;
+ HasLSE = true;
+ HasRDM = true;
+ return;
+ default:
+ break;
+ }
+ if (ArchKind == llvm::AArch64::ArchKind::ARMV8R) {
+ HasDotProd = true;
+ HasDIT = true;
+ HasFlagM = true;
+ HasRCPC = true;
+ FPU |= NeonMode;
+ HasCCPP = true;
+ HasCRC = true;
+ HasLSE = true;
+ HasRDM = true;
+ }
+}
+
StringRef AArch64TargetInfo::getArchProfile() const {
switch (ArchKind) {
case llvm::AArch64::ArchKind::ARMV8R:
@@ -366,6 +451,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
if (HasRCPC)
Builder.defineMacro("__ARM_FEATURE_RCPC", "1");
+ if (HasFMV)
+ Builder.defineMacro("__HAVE_FUNCTION_MULTI_VERSIONING", "1");
+
// The __ARM_FEATURE_CRYPTO is deprecated in favor of finer grained feature
// macros for AES, SHA2, SHA3 and SM4
if (HasAES && HasSHA2)
@@ -552,13 +640,95 @@ AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts) const {
return std::nullopt;
}
+unsigned AArch64TargetInfo::multiVersionSortPriority(StringRef Name) const {
+ if (Name == "default")
+ return 0;
+ unsigned Priority = llvm::StringSwitch<unsigned>(Name)
+#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE, FMV_ID, \
+ DEP_FEATURES, FMV_PRIORITY) \
+ .Case(NAME, FMV_PRIORITY)
+#include "../../../../llvm/include/llvm/TargetParser/AArch64TargetParser.def"
+ ;
+ assert((Name == "none" || Priority < multiVersionFeatureCost()) &&
+ "FMV priority is out of bounds!");
+ return Priority;
+}
+
+unsigned AArch64TargetInfo::multiVersionFeatureCost() const {
+ // Take the maximum priority as per feature cost, so more features win.
+ // AARCH64_ARCH_EXT_NAME "none" feature must have top priority, use it.
+ return multiVersionSortPriority("none");
+}
+
+bool AArch64TargetInfo::getFeatureDepOptions(StringRef Name,
+ std::string &FeatureVec) const {
+ FeatureVec = llvm::StringSwitch<std::string>(Name)
+#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE, FMV_ID, \
+ DEP_FEATURES, FMV_PRIORITY) \
+ .Case(NAME, DEP_FEATURES)
+#include "../../../../llvm/include/llvm/TargetParser/AArch64TargetParser.def"
+ .Default("");
+ return FeatureVec != "";
+}
+
+bool AArch64TargetInfo::validateCpuSupports(StringRef FeatureStr) const {
+ unsigned Feat = llvm::StringSwitch<unsigned>(FeatureStr)
+#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE, FMV_ID, \
+ DEP_FEATURES, FMV_PRIORITY) \
+ .Case(NAME, llvm::AArch64::FEAT_##FMV_ID)
+#include "../../../../llvm/include/llvm/TargetParser/AArch64TargetParser.def"
+ .Default(llvm::AArch64::FEAT_MAX);
+ return Feat != llvm::AArch64::FEAT_MAX;
+}
+
bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
return llvm::StringSwitch<bool>(Feature)
- .Cases("aarch64", "arm64", "arm", true)
- .Case("neon", FPU & NeonMode)
- .Cases("sve", "sve2", "sve2-bitperm", "sve2-aes", "sve2-sha3", "sve2-sm4", "f64mm", "f32mm", "i8mm", "bf16", FPU & SveMode)
- .Case("ls64", HasLS64)
- .Default(false);
+ .Cases("aarch64", "arm64", "arm", true)
+ .Case("fmv", HasFMV)
+ .Cases("neon", "fp", "simd", FPU & NeonMode)
+ .Case("jscvt", HasJSCVT)
+ .Case("fcma", HasFCMA)
+ .Case("rng", HasRandGen)
+ .Case("flagm", HasFlagM)
+ .Case("flagm2", HasAlternativeNZCV)
+ .Case("fp16fml", HasFP16FML)
+ .Case("dotprod", HasDotProd)
+ .Case("sm4", HasSM4)
+ .Case("rdm", HasRDM)
+ .Case("lse", HasLSE)
+ .Case("crc", HasCRC)
+ .Case("sha2", HasSHA2)
+ .Case("sha3", HasSHA3)
+ .Cases("aes", "pmull", HasAES)
+ .Cases("fp16", "fullfp16", HasFullFP16)
+ .Case("dit", HasDIT)
+ .Case("dpb", HasCCPP)
+ .Case("dpb2", HasCCDP)
+ .Case("rcpc", HasRCPC)
+ .Case("frintts", HasFRInt3264)
+ .Case("i8mm", HasMatMul)
+ .Case("bf16", HasBFloat16)
+ .Case("sve", FPU & SveMode)
+ .Case("sve-bf16", FPU & SveMode && HasBFloat16)
+ .Case("sve-i8mm", FPU & SveMode && HasMatMul)
+ .Case("f32mm", FPU & SveMode && HasMatmulFP32)
+ .Case("f64mm", FPU & SveMode && HasMatmulFP64)
+ .Case("sve2", FPU & SveMode && HasSVE2)
+ .Case("sve2-pmull128", FPU & SveMode && HasSVE2AES)
+ .Case("sve2-bitperm", FPU & SveMode && HasSVE2BitPerm)
+ .Case("sve2-sha3", FPU & SveMode && HasSVE2SHA3)
+ .Case("sve2-sm4", FPU & SveMode && HasSVE2SM4)
+ .Case("sme", HasSME)
+ .Case("sme-f64f64", HasSMEF64)
+ .Case("sme-i16i64", HasSMEI64)
+ .Cases("memtag", "memtag2", HasMTE)
+ .Case("sb", HasSB)
+ .Case("predres", HasPredRes)
+ .Cases("ssbs", "ssbs2", HasSSBS)
+ .Case("bti", HasBTI)
+ .Cases("ls64", "ls64_v", "ls64_accdata", HasLS64)
+ .Case("wfxt", HasWFxT)
+ .Default(false);
}
void AArch64TargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features,
@@ -580,63 +750,134 @@ void AArch64TargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features,
bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
DiagnosticsEngine &Diags) {
for (const auto &Feature : Features) {
- if (Feature == "+neon")
+ if (Feature == "-neon")
+ HasNoNeon = true;
+ if (Feature == "-sve")
+ HasNoSVE = true;
+
+ if (Feature == "+neon" || Feature == "+fp-armv8")
+ FPU |= NeonMode;
+ if (Feature == "+jscvt") {
+ HasJSCVT = true;
FPU |= NeonMode;
+ }
+ if (Feature == "+fcma") {
+ HasFCMA = true;
+ FPU |= NeonMode;
+ }
+
if (Feature == "+sve") {
+ FPU |= NeonMode;
FPU |= SveMode;
HasFullFP16 = true;
}
if (Feature == "+sve2") {
+ FPU |= NeonMode;
FPU |= SveMode;
HasFullFP16 = true;
HasSVE2 = true;
}
if (Feature == "+sve2-aes") {
+ FPU |= NeonMode;
FPU |= SveMode;
HasFullFP16 = true;
HasSVE2 = true;
HasSVE2AES = true;
}
if (Feature == "+sve2-sha3") {
+ FPU |= NeonMode;
FPU |= SveMode;
HasFullFP16 = true;
HasSVE2 = true;
HasSVE2SHA3 = true;
}
if (Feature == "+sve2-sm4") {
+ FPU |= NeonMode;
FPU |= SveMode;
HasFullFP16 = true;
HasSVE2 = true;
HasSVE2SM4 = true;
}
if (Feature == "+sve2-bitperm") {
+ FPU |= NeonMode;
FPU |= SveMode;
HasFullFP16 = true;
HasSVE2 = true;
HasSVE2BitPerm = true;
}
if (Feature == "+f32mm") {
+ FPU |= NeonMode;
FPU |= SveMode;
+ HasFullFP16 = true;
HasMatmulFP32 = true;
}
if (Feature == "+f64mm") {
+ FPU |= NeonMode;
FPU |= SveMode;
+ HasFullFP16 = true;
HasMatmulFP64 = true;
}
+ if (Feature == "+sme") {
+ HasSME = true;
+ HasBFloat16 = true;
+ }
+ if (Feature == "+sme-f64f64") {
+ HasSME = true;
+ HasSMEF64 = true;
+ HasBFloat16 = true;
+ }
+ if (Feature == "+sme-i16i64") {
+ HasSME = true;
+ HasSMEI64 = true;
+ HasBFloat16 = true;
+ }
+ if (Feature == "+sb")
+ HasSB = true;
+ if (Feature == "+predres")
+ HasPredRes = true;
+ if (Feature == "+ssbs")
+ HasSSBS = true;
+ if (Feature == "+bti")
+ HasBTI = true;
+ if (Feature == "+wfxt")
+ HasWFxT = true;
+ if (Feature == "-fmv")
+ HasFMV = false;
if (Feature == "+crc")
HasCRC = true;
if (Feature == "+rcpc")
HasRCPC = true;
- if (Feature == "+aes")
+ if (Feature == "+aes") {
+ FPU |= NeonMode;
HasAES = true;
- if (Feature == "+sha2")
+ }
+ if (Feature == "+sha2") {
+ FPU |= NeonMode;
HasSHA2 = true;
+ }
if (Feature == "+sha3") {
+ FPU |= NeonMode;
HasSHA2 = true;
HasSHA3 = true;
}
- if (Feature == "+sm4")
+ if (Feature == "+rdm") {
+ FPU |= NeonMode;
+ HasRDM = true;
+ }
+ if (Feature == "+dit")
+ HasDIT = true;
+ if (Feature == "+cccp")
+ HasCCPP = true;
+ if (Feature == "+ccdp") {
+ HasCCPP = true;
+ HasCCDP = true;
+ }
+ if (Feature == "+fptoint")
+ HasFRInt3264 = true;
+ if (Feature == "+sm4") {
+ FPU |= NeonMode;
HasSM4 = true;
+ }
if (Feature == "+strict-align")
HasUnaligned = false;
// All predecessor archs are added but select the latest one for ArchKind.
@@ -672,12 +913,19 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
ArchKind = llvm::AArch64::ArchKind::ARMV9_4A;
if (Feature == "+v8r")
ArchKind = llvm::AArch64::ArchKind::ARMV8R;
- if (Feature == "+fullfp16")
+ if (Feature == "+fullfp16") {
+ FPU |= NeonMode;
HasFullFP16 = true;
- if (Feature == "+dotprod")
+ }
+ if (Feature == "+dotprod") {
+ FPU |= NeonMode;
HasDotProd = true;
- if (Feature == "+fp16fml")
+ }
+ if (Feature == "+fp16fml") {
+ FPU |= NeonMode;
+ HasFullFP16 = true;
HasFP16FML = true;
+ }
if (Feature == "+mte")
HasMTE = true;
if (Feature == "+tme")
@@ -696,6 +944,10 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasRandGen = true;
if (Feature == "+flagm")
HasFlagM = true;
+ if (Feature == "+altnzcv") {
+ HasFlagM = true;
+ HasAlternativeNZCV = true;
+ }
if (Feature == "+mops")
HasMOPS = true;
if (Feature == "+d128")
@@ -711,6 +963,14 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
}
setDataLayout();
+ setArchFeatures();
+
+ if (HasNoNeon) {
+ FPU &= ~NeonMode;
+ FPU &= ~SveMode;
+ }
+ if (HasNoSVE)
+ FPU &= ~SveMode;
return true;
}
@@ -718,6 +978,7 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
bool AArch64TargetInfo::initFeatureMap(
llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
const std::vector<std::string> &FeaturesVec) const {
+ std::vector<std::string> UpdatedFeaturesVec;
// Parse the CPU and add any implied features.
llvm::AArch64::ArchKind Arch = llvm::AArch64::parseCPUArch(CPU);
if (Arch != llvm::AArch64::ArchKind::INVALID) {
@@ -726,11 +987,33 @@ bool AArch64TargetInfo::initFeatureMap(
llvm::AArch64::getExtensionFeatures(Exts, CPUFeats);
for (auto F : CPUFeats) {
assert((F[0] == '+' || F[0] == '-') && "Expected +/- in target feature!");
- setFeatureEnabled(Features, F.drop_front(), F[0] == '+');
+ UpdatedFeaturesVec.push_back(F.str());
}
}
- return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
+ // Process target and dependent features. This is done in two loops collecting
+ // them into UpdatedFeaturesVec: first to add dependent '+'features,
+ // second to add target '+/-'features that can later disable some of
+ // features added on the first loop.
+ for (const auto &Feature : FeaturesVec)
+ if ((Feature[0] == '?' || Feature[0] == '+')) {
+ std::string Options;
+ if (AArch64TargetInfo::getFeatureDepOptions(Feature.substr(1), Options)) {
+ SmallVector<StringRef, 1> AttrFeatures;
+ StringRef(Options).split(AttrFeatures, ",");
+ for (auto F : AttrFeatures)
+ UpdatedFeaturesVec.push_back(F.str());
+ }
+ }
+ for (const auto &Feature : FeaturesVec)
+ if (Feature[0] == '+') {
+ std::string F;
+ llvm::AArch64::getFeatureOption(Feature, F);
+ UpdatedFeaturesVec.push_back(F);
+ } else if (Feature[0] != '?')
+ UpdatedFeaturesVec.push_back(Feature);
+
+ return TargetInfo::initFeatureMap(Features, Diags, CPU, UpdatedFeaturesVec);
}
// Parse AArch64 Target attributes, which are a comma separated list of:
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index b971c56ec9cda..b92696e98f6f7 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -43,6 +43,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
bool HasLS64 = false;
bool HasRandGen = false;
bool HasMatMul = false;
+ bool HasBFloat16 = false;
bool HasSVE2 = false;
bool HasSVE2AES = false;
bool HasSVE2SHA3 = false;
@@ -52,9 +53,28 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
bool HasMatmulFP32 = false;
bool HasLSE = false;
bool HasFlagM = false;
+ bool HasAlternativeNZCV = false;
bool HasMOPS = false;
bool HasD128 = false;
bool HasRCPC = false;
+ bool HasRDM = false;
+ bool HasDIT = false;
+ bool HasCCPP = false;
+ bool HasCCDP = false;
+ bool HasFRInt3264 = false;
+ bool HasSME = false;
+ bool HasSMEF64 = false;
+ bool HasSMEI64 = false;
+ bool HasSB = false;
+ bool HasPredRes = false;
+ bool HasSSBS = false;
+ bool HasBTI = false;
+ bool HasWFxT = false;
+ bool HasJSCVT = false;
+ bool HasFCMA = false;
+ bool HasNoNeon = false;
+ bool HasNoSVE = false;
+ bool HasFMV = true;
llvm::AArch64::ArchKind ArchKind = llvm::AArch64::ArchKind::INVALID;
@@ -77,10 +97,19 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
bool setCPU(const std::string &Name) override;
+ unsigned multiVersionSortPriority(StringRef Name) const override;
+ unsigned multiVersionFeatureCost() const override;
+
+ bool
+ initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,
+ StringRef CPU,
+ const std::vector<std::string> &FeaturesVec) const override;
bool useFP16ConversionIntrinsics() const override {
return false;
}
+ void setArchFeatures();
+
void getTargetDefinesARMV81A(const LangOptions &Opts,
MacroBuilder &Builder) const;
void getTargetDefinesARMV82A(const LangOptions &Opts,
@@ -117,15 +146,14 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
Optional<std::pair<unsigned, unsigned>>
getVScaleRange(const LangOptions &LangOpts) const override;
+ bool getFeatureDepOptions(StringRef Feature,
+ std::string &Options) const override;
+ bool validateCpuSupports(StringRef FeatureStr) const override;
bool hasFeature(StringRef Feature) const override;
void setFeatureEnabled(llvm::StringMap<bool> &Features, StringRef Name,
bool Enabled) const override;
bool handleTargetFeatures(std::vector<std::string> &Features,
DiagnosticsEngine &Diags) override;
- bool
- initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,
- StringRef CPU,
- const std::vector<std::string> &FeaturesVec) const override;
ParsedTargetAttr parseTargetAttr(StringRef Str) const override;
bool supportsTargetAttributeTune() const override { return true; }
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a9f7d501fc902..7cf7bd2c21f4a 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -52,6 +52,7 @@
#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/MatrixBuilder.h"
+#include "llvm/Support/AArch64TargetParser.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/X86TargetParser.h"
@@ -13034,6 +13035,16 @@ llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) {
return Result;
}
+Value *CodeGenFunction::EmitAArch64CpuInit() {
+ llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
+ llvm::FunctionCallee Func =
+ CGM.CreateRuntimeFunction(FTy, "init_cpu_features_resolver");
+ cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
+ cast<llvm::GlobalValue>(Func.getCallee())
+ ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
+ return Builder.CreateCall(Func);
+}
+
Value *CodeGenFunction::EmitX86CpuInit() {
llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
/*Variadic*/ false);
@@ -13045,6 +13056,32 @@ Value *CodeGenFunction::EmitX86CpuInit() {
return Builder.CreateCall(Func);
}
+llvm::Value *
+CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
+ uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
+ Value *Result = Builder.getTrue();
+ if (FeaturesMask != 0) {
+ // Get features from structure in runtime library
+ // struct {
+ // unsigned long long features;
+ // } __aarch64_cpu_features;
+ llvm::Type *STy = llvm::StructType::get(Int64Ty);
+ llvm::Constant *AArch64CPUFeatures =
+ CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
+ cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
+ llvm::Value *CpuFeatures = Builder.CreateGEP(
+ STy, AArch64CPUFeatures,
+ {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
+ Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
+ CharUnits::fromQuantity(8));
+ Value *Mask = Builder.getInt64(FeaturesMask);
+ Value *Bitset = Builder.CreateAnd(Features, Mask);
+ Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
+ Result = Builder.CreateAnd(Result, Cmp);
+ }
+ return Result;
+}
+
Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
if (BuiltinID == X86::BI__builtin_cpu_is)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 4a5cb4dfa965c..3b62beddcaa58 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -11497,7 +11497,7 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
if (CGM.getTarget().hasFeature("sve"))
emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
MangledName, 's', 128, Fn, ExprLoc);
- if (CGM.getTarget().hasFeature("neon"))
+ else if (CGM.getTarget().hasFeature("neon"))
emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
MangledName, 'n', 128, Fn, ExprLoc);
}
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 874a0835f212f..412573028984d 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -2627,8 +2627,22 @@ void CodeGenFunction::EmitKCFIOperandBundle(
Bundles.emplace_back("kcfi", CGM.CreateKCFITypeId(FP->desugar()));
}
-llvm::Value *
-CodeGenFunction::FormResolverCondition(const MultiVersionResolverOption &RO) {
+llvm::Value *CodeGenFunction::FormAArch64ResolverCondition(
+ const MultiVersionResolverOption &RO) {
+ llvm::SmallVector<StringRef, 8> CondFeatures;
+ for (const StringRef &Feature : RO.Conditions.Features) {
+ // Form condition for features which are not yet enabled in target
+ if (!getContext().getTargetInfo().hasFeature(Feature))
+ CondFeatures.push_back(Feature);
+ }
+ if (!CondFeatures.empty()) {
+ return EmitAArch64CpuSupports(CondFeatures);
+ }
+ return nullptr;
+}
+
+llvm::Value *CodeGenFunction::FormX86ResolverCondition(
+ const MultiVersionResolverOption &RO) {
llvm::Value *Condition = nullptr;
if (!RO.Conditions.Architecture.empty())
@@ -2666,8 +2680,72 @@ static void CreateMultiVersionResolverReturn(CodeGenModule &CGM,
void CodeGenFunction::EmitMultiVersionResolver(
llvm::Function *Resolver, ArrayRef<MultiVersionResolverOption> Options) {
- assert(getContext().getTargetInfo().getTriple().isX86() &&
- "Only implemented for x86 targets");
+
+ llvm::Triple::ArchType ArchType =
+ getContext().getTargetInfo().getTriple().getArch();
+
+ switch (ArchType) {
+ case llvm::Triple::x86:
+ case llvm::Triple::x86_64:
+ EmitX86MultiVersionResolver(Resolver, Options);
+ return;
+ case llvm::Triple::aarch64:
+ EmitAArch64MultiVersionResolver(Resolver, Options);
+ return;
+
+ default:
+ assert(false && "Only implemented for x86 and AArch64 targets");
+ }
+}
+
+void CodeGenFunction::EmitAArch64MultiVersionResolver(
+ llvm::Function *Resolver, ArrayRef<MultiVersionResolverOption> Options) {
+ assert(!Options.empty() && "No multiversion resolver options found");
+ assert(Options.back().Conditions.Features.size() == 0 &&
+ "Default case must be last");
+ bool SupportsIFunc = getContext().getTargetInfo().supportsIFunc();
+ assert(SupportsIFunc &&
+ "Multiversion resolver requires target IFUNC support");
+ bool AArch64CpuInitialized = false;
+ llvm::BasicBlock *CurBlock = createBasicBlock("resolver_entry", Resolver);
+
+ for (const MultiVersionResolverOption &RO : Options) {
+ Builder.SetInsertPoint(CurBlock);
+ llvm::Value *Condition = FormAArch64ResolverCondition(RO);
+
+ // The 'default' or 'all features enabled' case.
+ if (!Condition) {
+ CreateMultiVersionResolverReturn(CGM, Resolver, Builder, RO.Function,
+ SupportsIFunc);
+ return;
+ }
+
+ if (!AArch64CpuInitialized) {
+ Builder.SetInsertPoint(CurBlock, CurBlock->begin());
+ EmitAArch64CpuInit();
+ AArch64CpuInitialized = true;
+ Builder.SetInsertPoint(CurBlock);
+ }
+
+ llvm::BasicBlock *RetBlock = createBasicBlock("resolver_return", Resolver);
+ CGBuilderTy RetBuilder(*this, RetBlock);
+ CreateMultiVersionResolverReturn(CGM, Resolver, RetBuilder, RO.Function,
+ SupportsIFunc);
+ CurBlock = createBasicBlock("resolver_else", Resolver);
+ Builder.CreateCondBr(Condition, RetBlock, CurBlock);
+ }
+
+ // If no default, emit an unreachable.
+ Builder.SetInsertPoint(CurBlock);
+ llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap);
+ TrapCall->setDoesNotReturn();
+ TrapCall->setDoesNotThrow();
+ Builder.CreateUnreachable();
+ Builder.ClearInsertionPoint();
+}
+
+void CodeGenFunction::EmitX86MultiVersionResolver(
+ llvm::Function *Resolver, ArrayRef<MultiVersionResolverOption> Options) {
bool SupportsIFunc = getContext().getTargetInfo().supportsIFunc();
@@ -2678,7 +2756,7 @@ void CodeGenFunction::EmitMultiVersionResolver(
for (const MultiVersionResolverOption &RO : Options) {
Builder.SetInsertPoint(CurBlock);
- llvm::Value *Condition = FormResolverCondition(RO);
+ llvm::Value *Condition = FormX86ResolverCondition(RO);
// The 'default' or 'generic' case.
if (!Condition) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index d1ea27a607953..f1d7cc7108a8e 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4813,6 +4813,12 @@ class CodeGenFunction : public CodeGenTypeCache {
// last (if it exists).
void EmitMultiVersionResolver(llvm::Function *Resolver,
ArrayRef<MultiVersionResolverOption> Options);
+ void
+ EmitX86MultiVersionResolver(llvm::Function *Resolver,
+ ArrayRef<MultiVersionResolverOption> Options);
+ void
+ EmitAArch64MultiVersionResolver(llvm::Function *Resolver,
+ ArrayRef<MultiVersionResolverOption> Options);
private:
QualType getVarArgType(const Expr *Arg);
@@ -4831,7 +4837,11 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Value *EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs);
llvm::Value *EmitX86CpuSupports(uint64_t Mask);
llvm::Value *EmitX86CpuInit();
- llvm::Value *FormResolverCondition(const MultiVersionResolverOption &RO);
+ llvm::Value *FormX86ResolverCondition(const MultiVersionResolverOption &RO);
+ llvm::Value *EmitAArch64CpuInit();
+ llvm::Value *
+ FormAArch64ResolverCondition(const MultiVersionResolverOption &RO);
+ llvm::Value *EmitAArch64CpuSupports(ArrayRef<StringRef> FeatureStrs);
};
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index c8783ea8578d5..e89ac0778c79c 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1337,6 +1337,20 @@ static void AppendCPUSpecificCPUDispatchMangling(const CodeGenModule &CGM,
Out << ".resolver";
}
+static void AppendTargetVersionMangling(const CodeGenModule &CGM,
+ const TargetVersionAttr *Attr,
+ raw_ostream &Out) {
+ if (Attr->isDefaultVersion())
+ return;
+ Out << "._";
+ llvm::SmallVector<StringRef, 8> Feats;
+ Attr->getFeatures(Feats);
+ for (const auto &Feat : Feats) {
+ Out << 'M';
+ Out << Feat;
+ }
+}
+
static void AppendTargetMangling(const CodeGenModule &CGM,
const TargetAttr *Attr, raw_ostream &Out) {
if (Attr->isDefaultVersion())
@@ -1382,14 +1396,27 @@ static void AppendTargetClonesMangling(const CodeGenModule &CGM,
const TargetClonesAttr *Attr,
unsigned VersionIndex,
raw_ostream &Out) {
- Out << '.';
- StringRef FeatureStr = Attr->getFeatureStr(VersionIndex);
- if (FeatureStr.startswith("arch="))
- Out << "arch_" << FeatureStr.substr(sizeof("arch=") - 1);
- else
- Out << FeatureStr;
+ if (CGM.getTarget().getTriple().isAArch64()) {
+ StringRef FeatureStr = Attr->getFeatureStr(VersionIndex);
+ if (FeatureStr == "default")
+ return;
+ Out << "._";
+ SmallVector<StringRef, 8> Features;
+ FeatureStr.split(Features, "+");
+ for (auto &Feat : Features) {
+ Out << 'M';
+ Out << Feat;
+ }
+ } else {
+ Out << '.';
+ StringRef FeatureStr = Attr->getFeatureStr(VersionIndex);
+ if (FeatureStr.startswith("arch="))
+ Out << "arch_" << FeatureStr.substr(sizeof("arch=") - 1);
+ else
+ Out << FeatureStr;
- Out << '.' << Attr->getMangledIndex(VersionIndex);
+ Out << '.' << Attr->getMangledIndex(VersionIndex);
+ }
}
static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
@@ -1445,6 +1472,9 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
case MultiVersionKind::Target:
AppendTargetMangling(CGM, FD->getAttr<TargetAttr>(), Out);
break;
+ case MultiVersionKind::TargetVersion:
+ AppendTargetVersionMangling(CGM, FD->getAttr<TargetVersionAttr>(), Out);
+ break;
case MultiVersionKind::TargetClones:
AppendTargetClonesMangling(CGM, FD->getAttr<TargetClonesAttr>(),
GD.getMultiVersionIndex(), Out);
@@ -2166,10 +2196,12 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
const auto *FD = dyn_cast_or_null<FunctionDecl>(GD.getDecl());
FD = FD ? FD->getMostRecentDecl() : FD;
const auto *TD = FD ? FD->getAttr<TargetAttr>() : nullptr;
+ const auto *TV = FD ? FD->getAttr<TargetVersionAttr>() : nullptr;
+ assert((!TD || !TV) && "both target_version and target specified");
const auto *SD = FD ? FD->getAttr<CPUSpecificAttr>() : nullptr;
const auto *TC = FD ? FD->getAttr<TargetClonesAttr>() : nullptr;
bool AddedAttr = false;
- if (TD || SD || TC) {
+ if (TD || TV || SD || TC) {
llvm::StringMap<bool> FeatureMap;
getContext().getFunctionFeatureMap(FeatureMap, GD);
@@ -3623,12 +3655,18 @@ static unsigned
TargetMVPriority(const TargetInfo &TI,
const CodeGenFunction::MultiVersionResolverOption &RO) {
unsigned Priority = 0;
- for (StringRef Feat : RO.Conditions.Features)
+ unsigned NumFeatures = 0;
+ for (StringRef Feat : RO.Conditions.Features) {
Priority = std::max(Priority, TI.multiVersionSortPriority(Feat));
+ NumFeatures++;
+ }
if (!RO.Conditions.Architecture.empty())
Priority = std::max(
Priority, TI.multiVersionSortPriority(RO.Conditions.Architecture));
+
+ Priority += TI.multiVersionFeatureCost() * NumFeatures;
+
return Priority;
}
@@ -3673,13 +3711,19 @@ void CodeGenModule::emitMultiVersionFunctions() {
}
assert(Func && "This should have just been created");
}
-
- const auto *TA = CurFD->getAttr<TargetAttr>();
- llvm::SmallVector<StringRef, 8> Feats;
- TA->getAddedFeatures(Feats);
-
- Options.emplace_back(cast<llvm::Function>(Func),
- TA->getArchitecture(), Feats);
+ if (CurFD->getMultiVersionKind() == MultiVersionKind::Target) {
+ const auto *TA = CurFD->getAttr<TargetAttr>();
+ llvm::SmallVector<StringRef, 8> Feats;
+ TA->getAddedFeatures(Feats);
+ Options.emplace_back(cast<llvm::Function>(Func),
+ TA->getArchitecture(), Feats);
+ } else {
+ const auto *TVA = CurFD->getAttr<TargetVersionAttr>();
+ llvm::SmallVector<StringRef, 8> Feats;
+ TVA->getFeatures(Feats);
+ Options.emplace_back(cast<llvm::Function>(Func),
+ /*Architecture*/ "", Feats);
+ }
});
} else if (FD->isTargetClonesMultiVersion()) {
const auto *TC = FD->getAttr<TargetClonesAttr>();
@@ -3709,10 +3753,19 @@ void CodeGenModule::emitMultiVersionFunctions() {
StringRef Architecture;
llvm::SmallVector<StringRef, 1> Feature;
- if (Version.startswith("arch="))
- Architecture = Version.drop_front(sizeof("arch=") - 1);
- else if (Version != "default")
- Feature.push_back(Version);
+ if (getTarget().getTriple().isAArch64()) {
+ if (Version != "default") {
+ llvm::SmallVector<StringRef, 8> VerFeats;
+ Version.split(VerFeats, "+");
+ for (auto &CurFeat : VerFeats)
+ Feature.push_back(CurFeat.trim());
+ }
+ } else {
+ if (Version.startswith("arch="))
+ Architecture = Version.drop_front(sizeof("arch=") - 1);
+ else if (Version != "default")
+ Feature.push_back(Version);
+ }
Options.emplace_back(cast<llvm::Function>(Func), Architecture, Feature);
}
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 61294a8dfd2ac..bbb0cca2736a3 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -7228,6 +7228,14 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("+outline-atomics");
}
+ if (Triple.isAArch64() &&
+ (Args.hasArg(options::OPT_mno_fmv) ||
+ getToolChain().GetRuntimeLibType(Args) != ToolChain::RLT_CompilerRT)) {
+ // Disable Function Multiversioning on AArch64 target.
+ CmdArgs.push_back("-target-feature");
+ CmdArgs.push_back("-fmv");
+ }
+
if (Args.hasFlag(options::OPT_faddrsig, options::OPT_fno_addrsig,
(TC.getTriple().isOSBinFormatELF() ||
TC.getTriple().isOSBinFormatCOFF()) &&
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 4666809ce86f0..ec13dc46e8c08 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -2539,6 +2539,9 @@ static void noteOverloads(Sema &S, const UnresolvedSetImpl &Overloads,
if (FD->isMultiVersion() && FD->hasAttr<TargetAttr>() &&
!FD->getAttr<TargetAttr>()->isDefaultVersion())
continue;
+ if (FD->isMultiVersion() && FD->hasAttr<TargetVersionAttr>() &&
+ !FD->getAttr<TargetVersionAttr>()->isDefaultVersion())
+ continue;
}
S.Diag(Fn->getLocation(), diag::note_possible_target_of_call);
++ShownOverloads;
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 0d2cd02d10293..ae0b4c62b1998 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -10143,6 +10143,13 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
// Handle attributes.
ProcessDeclAttributes(S, NewFD, D);
+ const auto *NewTVA = NewFD->getAttr<TargetVersionAttr>();
+ if (NewTVA && !NewTVA->isDefaultVersion() &&
+ !Context.getTargetInfo().hasFeature("fmv")) {
+ // Don't add to scope fmv functions declarations if fmv disabled
+ AddToScope = false;
+ return NewFD;
+ }
if (getLangOpts().OpenCL) {
// OpenCL v1.1 s6.5: Using an address space qualifier in a function return
@@ -10351,7 +10358,8 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
D.setRedeclaration(true);
}
- assert((NewFD->isInvalidDecl() || !D.isRedeclaration() ||
+ assert((NewFD->isInvalidDecl() || NewFD->isMultiVersion() ||
+ !D.isRedeclaration() ||
Previous.getResultKind() != LookupResult::FoundOverloaded) &&
"previous declaration set still overloaded");
@@ -10823,37 +10831,53 @@ bool Sema::shouldLinkDependentDeclWithPrevious(Decl *D, Decl *PrevDecl) {
PrevVD->getType());
}
-/// Check the target attribute of the function for MultiVersion
-/// validity.
+/// Check the target or target_version attribute of the function for
+/// MultiVersion validity.
///
/// Returns true if there was an error, false otherwise.
static bool CheckMultiVersionValue(Sema &S, const FunctionDecl *FD) {
const auto *TA = FD->getAttr<TargetAttr>();
- assert(TA && "MultiVersion Candidate requires a target attribute");
- ParsedTargetAttr ParseInfo =
- S.getASTContext().getTargetInfo().parseTargetAttr(TA->getFeaturesStr());
+ const auto *TVA = FD->getAttr<TargetVersionAttr>();
+ assert(
+ (TA || TVA) &&
+ "MultiVersion candidate requires a target or target_version attribute");
const TargetInfo &TargetInfo = S.Context.getTargetInfo();
enum ErrType { Feature = 0, Architecture = 1 };
- if (!ParseInfo.CPU.empty() && !TargetInfo.validateCpuIs(ParseInfo.CPU)) {
- S.Diag(FD->getLocation(), diag::err_bad_multiversion_option)
- << Architecture << ParseInfo.CPU;
- return true;
- }
-
- for (const auto &Feat : ParseInfo.Features) {
- auto BareFeat = StringRef{Feat}.substr(1);
- if (Feat[0] == '-') {
+ if (TA) {
+ ParsedTargetAttr ParseInfo =
+ S.getASTContext().getTargetInfo().parseTargetAttr(TA->getFeaturesStr());
+ if (!ParseInfo.CPU.empty() && !TargetInfo.validateCpuIs(ParseInfo.CPU)) {
S.Diag(FD->getLocation(), diag::err_bad_multiversion_option)
- << Feature << ("no-" + BareFeat).str();
+ << Architecture << ParseInfo.CPU;
return true;
}
+ for (const auto &Feat : ParseInfo.Features) {
+ auto BareFeat = StringRef{Feat}.substr(1);
+ if (Feat[0] == '-') {
+ S.Diag(FD->getLocation(), diag::err_bad_multiversion_option)
+ << Feature << ("no-" + BareFeat).str();
+ return true;
+ }
+
+ if (!TargetInfo.validateCpuSupports(BareFeat) ||
+ !TargetInfo.isValidFeatureName(BareFeat)) {
+ S.Diag(FD->getLocation(), diag::err_bad_multiversion_option)
+ << Feature << BareFeat;
+ return true;
+ }
+ }
+ }
- if (!TargetInfo.validateCpuSupports(BareFeat) ||
- !TargetInfo.isValidFeatureName(BareFeat)) {
- S.Diag(FD->getLocation(), diag::err_bad_multiversion_option)
- << Feature << BareFeat;
- return true;
+ if (TVA) {
+ llvm::SmallVector<StringRef, 8> Feats;
+ TVA->getFeatures(Feats);
+ for (const auto &Feat : Feats) {
+ if (!TargetInfo.validateCpuSupports(Feat)) {
+ S.Diag(FD->getLocation(), diag::err_bad_multiversion_option)
+ << Feature << Feat;
+ return true;
+ }
}
}
return false;
@@ -10900,6 +10924,10 @@ static bool checkNonMultiVersionCompatAttributes(Sema &S,
if (MVKind != MultiVersionKind::Target)
return Diagnose(S, A);
break;
+ case attr::TargetVersion:
+ if (MVKind != MultiVersionKind::TargetVersion)
+ return Diagnose(S, A);
+ break;
case attr::TargetClones:
if (MVKind != MultiVersionKind::TargetClones)
return Diagnose(S, A);
@@ -11072,18 +11100,18 @@ static bool CheckMultiVersionAdditionalRules(Sema &S, const FunctionDecl *OldFD,
/// This sets NewFD->isInvalidDecl() to true if there was an error.
///
/// Returns true if there was an error, false otherwise.
-static bool CheckMultiVersionFirstFunction(Sema &S, FunctionDecl *FD,
- MultiVersionKind MVKind,
- const TargetAttr *TA) {
+static bool CheckMultiVersionFirstFunction(Sema &S, FunctionDecl *FD) {
+ MultiVersionKind MVKind = FD->getMultiVersionKind();
assert(MVKind != MultiVersionKind::None &&
"Function lacks multiversion attribute");
-
- // Target only causes MV if it is default, otherwise this is a normal
- // function.
- if (MVKind == MultiVersionKind::Target && !TA->isDefaultVersion())
+ const auto *TA = FD->getAttr<TargetAttr>();
+ const auto *TVA = FD->getAttr<TargetVersionAttr>();
+ // Target and target_version only causes MV if it is default, otherwise this
+ // is a normal function.
+ if ((TA && !TA->isDefaultVersion()) || (TVA && !TVA->isDefaultVersion()))
return false;
- if (MVKind == MultiVersionKind::Target && CheckMultiVersionValue(S, FD)) {
+ if ((TA || TVA) && CheckMultiVersionValue(S, FD)) {
FD->setInvalidDecl();
return true;
}
@@ -11106,25 +11134,27 @@ static bool PreviousDeclsHaveMultiVersionAttribute(const FunctionDecl *FD) {
return false;
}
-static bool CheckTargetCausesMultiVersioning(
- Sema &S, FunctionDecl *OldFD, FunctionDecl *NewFD, const TargetAttr *NewTA,
- bool &Redeclaration, NamedDecl *&OldDecl, LookupResult &Previous) {
+static bool CheckTargetCausesMultiVersioning(Sema &S, FunctionDecl *OldFD,
+ FunctionDecl *NewFD,
+ bool &Redeclaration,
+ NamedDecl *&OldDecl,
+ LookupResult &Previous) {
+ const auto *NewTA = NewFD->getAttr<TargetAttr>();
+ const auto *NewTVA = NewFD->getAttr<TargetVersionAttr>();
const auto *OldTA = OldFD->getAttr<TargetAttr>();
- ParsedTargetAttr NewParsed =
- S.getASTContext().getTargetInfo().parseTargetAttr(
- NewTA->getFeaturesStr());
- // Sort order doesn't matter, it just needs to be consistent.
- llvm::sort(NewParsed.Features);
-
+ const auto *OldTVA = OldFD->getAttr<TargetVersionAttr>();
// If the old decl is NOT MultiVersioned yet, and we don't cause that
// to change, this is a simple redeclaration.
- if (!NewTA->isDefaultVersion() &&
- (!OldTA || OldTA->getFeaturesStr() == NewTA->getFeaturesStr()))
+ if ((NewTA && !NewTA->isDefaultVersion() &&
+ (!OldTA || OldTA->getFeaturesStr() == NewTA->getFeaturesStr())) ||
+ (NewTVA && !NewTVA->isDefaultVersion() &&
+ (!OldTVA || OldTVA->getName() == NewTVA->getName())))
return false;
// Otherwise, this decl causes MultiVersioning.
if (CheckMultiVersionAdditionalRules(S, OldFD, NewFD, true,
- MultiVersionKind::Target)) {
+ NewTVA ? MultiVersionKind::TargetVersion
+ : MultiVersionKind::Target)) {
NewFD->setInvalidDecl();
return true;
}
@@ -11135,7 +11165,9 @@ static bool CheckTargetCausesMultiVersioning(
}
// If this is 'default', permit the forward declaration.
- if (!OldFD->isMultiVersion() && !OldTA && NewTA->isDefaultVersion()) {
+ if (!OldFD->isMultiVersion() &&
+ ((NewTA && NewTA->isDefaultVersion() && !OldTA) ||
+ (NewTVA && NewTVA->isDefaultVersion() && !OldTVA))) {
Redeclaration = true;
OldDecl = OldFD;
OldFD->setIsMultiVersion();
@@ -11149,26 +11181,50 @@ static bool CheckTargetCausesMultiVersioning(
return true;
}
- ParsedTargetAttr OldParsed =
- S.getASTContext().getTargetInfo().parseTargetAttr(
- OldTA->getFeaturesStr());
- llvm::sort(OldParsed.Features);
+ if (NewTA) {
+ ParsedTargetAttr OldParsed =
+ S.getASTContext().getTargetInfo().parseTargetAttr(
+ OldTA->getFeaturesStr());
+ llvm::sort(OldParsed.Features);
+ ParsedTargetAttr NewParsed =
+ S.getASTContext().getTargetInfo().parseTargetAttr(
+ NewTA->getFeaturesStr());
+ // Sort order doesn't matter, it just needs to be consistent.
+ llvm::sort(NewParsed.Features);
+ if (OldParsed == NewParsed) {
+ S.Diag(NewFD->getLocation(), diag::err_multiversion_duplicate);
+ S.Diag(OldFD->getLocation(), diag::note_previous_declaration);
+ NewFD->setInvalidDecl();
+ return true;
+ }
+ }
+
+ if (NewTVA) {
+ llvm::SmallVector<StringRef, 8> Feats;
+ OldTVA->getFeatures(Feats);
+ llvm::sort(Feats);
+ llvm::SmallVector<StringRef, 8> NewFeats;
+ NewTVA->getFeatures(NewFeats);
+ llvm::sort(NewFeats);
- if (OldParsed == NewParsed) {
- S.Diag(NewFD->getLocation(), diag::err_multiversion_duplicate);
- S.Diag(OldFD->getLocation(), diag::note_previous_declaration);
- NewFD->setInvalidDecl();
- return true;
+ if (Feats == NewFeats) {
+ S.Diag(NewFD->getLocation(), diag::err_multiversion_duplicate);
+ S.Diag(OldFD->getLocation(), diag::note_previous_declaration);
+ NewFD->setInvalidDecl();
+ return true;
+ }
}
for (const auto *FD : OldFD->redecls()) {
const auto *CurTA = FD->getAttr<TargetAttr>();
+ const auto *CurTVA = FD->getAttr<TargetVersionAttr>();
// We allow forward declarations before ANY multiversioning attributes, but
// nothing after the fact.
if (PreviousDeclsHaveMultiVersionAttribute(FD) &&
- (!CurTA || CurTA->isInherited())) {
+ ((NewTA && (!CurTA || CurTA->isInherited())) ||
+ (NewTVA && (!CurTVA || CurTVA->isInherited())))) {
S.Diag(FD->getLocation(), diag::err_multiversion_required_in_redecl)
- << 0;
+ << (NewTA ? 0 : 2);
S.Diag(NewFD->getLocation(), diag::note_multiversioning_caused_here);
NewFD->setInvalidDecl();
return true;
@@ -11199,11 +11255,11 @@ static bool MultiVersionTypesCompatible(MultiVersionKind Old,
/// multiversioned declaration collection.
static bool CheckMultiVersionAdditionalDecl(
Sema &S, FunctionDecl *OldFD, FunctionDecl *NewFD,
- MultiVersionKind NewMVKind, const TargetAttr *NewTA,
- const CPUDispatchAttr *NewCPUDisp, const CPUSpecificAttr *NewCPUSpec,
- const TargetClonesAttr *NewClones, bool &Redeclaration, NamedDecl *&OldDecl,
- LookupResult &Previous) {
-
+ MultiVersionKind NewMVKind, const CPUDispatchAttr *NewCPUDisp,
+ const CPUSpecificAttr *NewCPUSpec, const TargetClonesAttr *NewClones,
+ bool &Redeclaration, NamedDecl *&OldDecl, LookupResult &Previous) {
+ const auto *NewTA = NewFD->getAttr<TargetAttr>();
+ const auto *NewTVA = NewFD->getAttr<TargetVersionAttr>();
MultiVersionKind OldMVKind = OldFD->getMultiVersionKind();
// Disallow mixing of multiversioning types.
if (!MultiVersionTypesCompatible(OldMVKind, NewMVKind)) {
@@ -11219,6 +11275,11 @@ static bool CheckMultiVersionAdditionalDecl(
NewTA->getFeaturesStr());
llvm::sort(NewParsed.Features);
}
+ llvm::SmallVector<StringRef, 8> NewFeats;
+ if (NewTVA) {
+ NewTVA->getFeatures(NewFeats);
+ llvm::sort(NewFeats);
+ }
bool UseMemberUsingDeclRules =
S.CurContext->isRecord() && !NewFD->getFriendObjectKind();
@@ -11236,6 +11297,20 @@ static bool CheckMultiVersionAdditionalDecl(
S.IsOverload(NewFD, CurFD, UseMemberUsingDeclRules))
continue;
+ if (NewMVKind == MultiVersionKind::None &&
+ OldMVKind == MultiVersionKind::TargetVersion) {
+ NewFD->addAttr(TargetVersionAttr::CreateImplicit(
+ S.Context, "default", NewFD->getSourceRange(),
+ AttributeCommonInfo::AS_GNU));
+ NewFD->setIsMultiVersion();
+ NewMVKind = MultiVersionKind::TargetVersion;
+ if (!NewTVA) {
+ NewTVA = NewFD->getAttr<TargetVersionAttr>();
+ NewTVA->getFeatures(NewFeats);
+ llvm::sort(NewFeats);
+ }
+ }
+
switch (NewMVKind) {
case MultiVersionKind::None:
assert(OldMVKind == MultiVersionKind::TargetClones &&
@@ -11262,6 +11337,27 @@ static bool CheckMultiVersionAdditionalDecl(
}
break;
}
+ case MultiVersionKind::TargetVersion: {
+ const auto *CurTVA = CurFD->getAttr<TargetVersionAttr>();
+ if (CurTVA->getName() == NewTVA->getName()) {
+ NewFD->setIsMultiVersion();
+ Redeclaration = true;
+ OldDecl = ND;
+ return false;
+ }
+ llvm::SmallVector<StringRef, 8> CurFeats;
+ if (CurTVA) {
+ CurTVA->getFeatures(CurFeats);
+ llvm::sort(CurFeats);
+ }
+ if (CurFeats == NewFeats) {
+ S.Diag(NewFD->getLocation(), diag::err_multiversion_duplicate);
+ S.Diag(CurFD->getLocation(), diag::note_previous_declaration);
+ NewFD->setInvalidDecl();
+ return true;
+ }
+ break;
+ }
case MultiVersionKind::TargetClones: {
const auto *CurClones = CurFD->getAttr<TargetClonesAttr>();
Redeclaration = true;
@@ -11344,7 +11440,8 @@ static bool CheckMultiVersionAdditionalDecl(
// Else, this is simply a non-redecl case. Checking the 'value' is only
// necessary in the Target case, since The CPUSpecific/Dispatch cases are
// handled in the attribute adding step.
- if (NewMVKind == MultiVersionKind::Target &&
+ if ((NewMVKind == MultiVersionKind::TargetVersion ||
+ NewMVKind == MultiVersionKind::Target) &&
CheckMultiVersionValue(S, NewFD)) {
NewFD->setInvalidDecl();
return true;
@@ -11382,16 +11479,20 @@ static bool CheckMultiVersionFunction(Sema &S, FunctionDecl *NewFD,
bool &Redeclaration, NamedDecl *&OldDecl,
LookupResult &Previous) {
const auto *NewTA = NewFD->getAttr<TargetAttr>();
+ const auto *NewTVA = NewFD->getAttr<TargetVersionAttr>();
const auto *NewCPUDisp = NewFD->getAttr<CPUDispatchAttr>();
const auto *NewCPUSpec = NewFD->getAttr<CPUSpecificAttr>();
const auto *NewClones = NewFD->getAttr<TargetClonesAttr>();
MultiVersionKind MVKind = NewFD->getMultiVersionKind();
// Main isn't allowed to become a multiversion function, however it IS
- // permitted to have 'main' be marked with the 'target' optimization hint.
+ // permitted to have 'main' be marked with the 'target' optimization hint,
+ // for 'target_version' only default is allowed.
if (NewFD->isMain()) {
if (MVKind != MultiVersionKind::None &&
- !(MVKind == MultiVersionKind::Target && !NewTA->isDefaultVersion())) {
+ !(MVKind == MultiVersionKind::Target && !NewTA->isDefaultVersion()) &&
+ !(MVKind == MultiVersionKind::TargetVersion &&
+ NewTVA->isDefaultVersion())) {
S.Diag(NewFD->getLocation(), diag::err_multiversion_not_allowed_on_main);
NewFD->setInvalidDecl();
return true;
@@ -11406,18 +11507,34 @@ static bool CheckMultiVersionFunction(Sema &S, FunctionDecl *NewFD,
// multiversioning, this isn't an error condition.
if (MVKind == MultiVersionKind::None)
return false;
- return CheckMultiVersionFirstFunction(S, NewFD, MVKind, NewTA);
+ return CheckMultiVersionFirstFunction(S, NewFD);
}
FunctionDecl *OldFD = OldDecl->getAsFunction();
- if (!OldFD->isMultiVersion() && MVKind == MultiVersionKind::None)
+ if (!OldFD->isMultiVersion() && MVKind == MultiVersionKind::None) {
+ // No target_version attributes mean default
+ if (!NewTVA) {
+ const auto *OldTVA = OldFD->getAttr<TargetVersionAttr>();
+ if (OldTVA) {
+ NewFD->addAttr(TargetVersionAttr::CreateImplicit(
+ S.Context, "default", NewFD->getSourceRange(),
+ AttributeCommonInfo::AS_GNU));
+ NewFD->setIsMultiVersion();
+ OldFD->setIsMultiVersion();
+ OldDecl = OldFD;
+ Redeclaration = true;
+ return true;
+ }
+ }
return false;
+ }
// Multiversioned redeclarations aren't allowed to omit the attribute, except
- // for target_clones.
+ // for target_clones and target_version.
if (OldFD->isMultiVersion() && MVKind == MultiVersionKind::None &&
- OldFD->getMultiVersionKind() != MultiVersionKind::TargetClones) {
+ OldFD->getMultiVersionKind() != MultiVersionKind::TargetClones &&
+ OldFD->getMultiVersionKind() != MultiVersionKind::TargetVersion) {
S.Diag(NewFD->getLocation(), diag::err_multiversion_required_in_redecl)
<< (OldFD->getMultiVersionKind() != MultiVersionKind::Target);
NewFD->setInvalidDecl();
@@ -11427,8 +11544,9 @@ static bool CheckMultiVersionFunction(Sema &S, FunctionDecl *NewFD,
if (!OldFD->isMultiVersion()) {
switch (MVKind) {
case MultiVersionKind::Target:
- return CheckTargetCausesMultiVersioning(S, OldFD, NewFD, NewTA,
- Redeclaration, OldDecl, Previous);
+ case MultiVersionKind::TargetVersion:
+ return CheckTargetCausesMultiVersioning(S, OldFD, NewFD, Redeclaration,
+ OldDecl, Previous);
case MultiVersionKind::TargetClones:
if (OldFD->isUsed(false)) {
NewFD->setInvalidDecl();
@@ -11436,6 +11554,7 @@ static bool CheckMultiVersionFunction(Sema &S, FunctionDecl *NewFD,
}
OldFD->setIsMultiVersion();
break;
+
case MultiVersionKind::CPUDispatch:
case MultiVersionKind::CPUSpecific:
case MultiVersionKind::None:
@@ -11446,9 +11565,9 @@ static bool CheckMultiVersionFunction(Sema &S, FunctionDecl *NewFD,
// At this point, we have a multiversion function decl (in OldFD) AND an
// appropriate attribute in the current function decl. Resolve that these are
// still compatible with previous declarations.
- return CheckMultiVersionAdditionalDecl(S, OldFD, NewFD, MVKind, NewTA,
- NewCPUDisp, NewCPUSpec, NewClones,
- Redeclaration, OldDecl, Previous);
+ return CheckMultiVersionAdditionalDecl(S, OldFD, NewFD, MVKind, NewCPUDisp,
+ NewCPUSpec, NewClones, Redeclaration,
+ OldDecl, Previous);
}
/// Perform semantic checking of a new function declaration.
@@ -14993,6 +15112,16 @@ Decl *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Decl *D,
FD->dropAttr<IFuncAttr>();
FD->setInvalidDecl();
}
+ if (const auto *Attr = FD->getAttr<TargetVersionAttr>()) {
+ if (!Context.getTargetInfo().hasFeature("fmv") &&
+ !Attr->isDefaultVersion()) {
+ // If function multi versioning disabled skip parsing function body
+ // defined with non-default target_version attribute
+ if (SkipBody)
+ SkipBody->ShouldSkip = true;
+ return nullptr;
+ }
+ }
if (auto *Ctor = dyn_cast<CXXConstructorDecl>(FD)) {
if (Ctor->getTemplateSpecializationKind() == TSK_ExplicitSpecialization &&
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index b3bde19b2cb7a..9b2f9e5300780 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -3450,6 +3450,42 @@ bool Sema::checkTargetAttr(SourceLocation LiteralLoc, StringRef AttrStr) {
return false;
}
+// Check Target Version attrs
+bool Sema::checkTargetVersionAttr(SourceLocation LiteralLoc, StringRef &AttrStr,
+ bool &isDefault) {
+ enum FirstParam { Unsupported };
+ enum SecondParam { None };
+ enum ThirdParam { Target, TargetClones, TargetVersion };
+ if (AttrStr.trim() == "default")
+ isDefault = true;
+ llvm::SmallVector<StringRef, 8> Features;
+ AttrStr.split(Features, "+");
+ for (auto &CurFeature : Features) {
+ CurFeature = CurFeature.trim();
+ if (CurFeature == "default")
+ continue;
+ if (!Context.getTargetInfo().validateCpuSupports(CurFeature))
+ return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
+ << Unsupported << None << CurFeature << TargetVersion;
+ }
+ return false;
+}
+
+static void handleTargetVersionAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+ StringRef Str;
+ SourceLocation LiteralLoc;
+ bool isDefault = false;
+ if (!S.checkStringLiteralArgumentAttr(AL, 0, Str, &LiteralLoc) ||
+ S.checkTargetVersionAttr(LiteralLoc, Str, isDefault))
+ return;
+ // Do not create default only target_version attribute
+ if (!isDefault) {
+ TargetVersionAttr *NewAttr =
+ ::new (S.Context) TargetVersionAttr(S.Context, AL, Str);
+ D->addAttr(NewAttr);
+ }
+}
+
static void handleTargetAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
StringRef Str;
SourceLocation LiteralLoc;
@@ -3461,10 +3497,11 @@ static void handleTargetAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
D->addAttr(NewAttr);
}
-bool Sema::checkTargetClonesAttrString(SourceLocation LiteralLoc, StringRef Str,
- const StringLiteral *Literal,
- bool &HasDefault, bool &HasCommas,
- SmallVectorImpl<StringRef> &Strings) {
+bool Sema::checkTargetClonesAttrString(
+ SourceLocation LiteralLoc, StringRef Str, const StringLiteral *Literal,
+ bool &HasDefault, bool &HasCommas, bool &HasNotDefault,
+ SmallVectorImpl<StringRef> &Strings,
+ SmallVectorImpl<SmallString<64>> &StringsBuffer) {
enum FirstParam { Unsupported, Duplicate, Unknown };
enum SecondParam { None, CPU, Tune };
enum ThirdParam { Target, TargetClones };
@@ -3483,29 +3520,76 @@ bool Sema::checkTargetClonesAttrString(SourceLocation LiteralLoc, StringRef Str,
getLangOpts(), Context.getTargetInfo());
bool DefaultIsDupe = false;
+ bool HasCodeGenImpact = false;
if (Cur.empty())
return Diag(CurLoc, diag::warn_unsupported_target_attribute)
<< Unsupported << None << "" << TargetClones;
- if (Cur.startswith("arch=")) {
- if (!Context.getTargetInfo().isValidCPUName(
- Cur.drop_front(sizeof("arch=") - 1)))
+ if (Context.getTargetInfo().getTriple().isAArch64()) {
+ // AArch64 target clones specific
+ if (Cur == "default") {
+ DefaultIsDupe = HasDefault;
+ HasDefault = true;
+ if (llvm::is_contained(Strings, Cur) || DefaultIsDupe)
+ Diag(CurLoc, diag::warn_target_clone_duplicate_options);
+ else
+ Strings.push_back(Cur);
+ } else {
+ std::pair<StringRef, StringRef> CurParts = {{}, Cur};
+ llvm::SmallVector<StringRef, 8> CurFeatures;
+ while (!CurParts.second.empty()) {
+ CurParts = CurParts.second.split('+');
+ StringRef CurFeature = CurParts.first.trim();
+ if (!Context.getTargetInfo().validateCpuSupports(CurFeature)) {
+ Diag(CurLoc, diag::warn_unsupported_target_attribute)
+ << Unsupported << None << CurFeature << TargetClones;
+ continue;
+ }
+ std::string Options;
+ if (Context.getTargetInfo().getFeatureDepOptions(CurFeature, Options))
+ HasCodeGenImpact = true;
+ CurFeatures.push_back(CurFeature);
+ }
+ // Canonize TargetClones Attributes
+ llvm::sort(CurFeatures);
+ SmallString<64> Res;
+ for (auto &CurFeat : CurFeatures) {
+ if (!Res.equals(""))
+ Res.append("+");
+ Res.append(CurFeat);
+ }
+ if (llvm::is_contained(Strings, Res) || DefaultIsDupe)
+ Diag(CurLoc, diag::warn_target_clone_duplicate_options);
+ else if (!HasCodeGenImpact)
+ // Ignore features in target_clone attribute that don't impact
+ // code generation
+ Diag(CurLoc, diag::warn_target_clone_no_impact_options);
+ else if (!Res.empty()) {
+ StringsBuffer.push_back(Res);
+ Strings.push_back(StringsBuffer.back().str());
+ HasNotDefault = true;
+ }
+ }
+ } else {
+ // Other targets ( currently X86 )
+ if (Cur.startswith("arch=")) {
+ if (!Context.getTargetInfo().isValidCPUName(
+ Cur.drop_front(sizeof("arch=") - 1)))
+ return Diag(CurLoc, diag::warn_unsupported_target_attribute)
+ << Unsupported << CPU << Cur.drop_front(sizeof("arch=") - 1)
+ << TargetClones;
+ } else if (Cur == "default") {
+ DefaultIsDupe = HasDefault;
+ HasDefault = true;
+ } else if (!Context.getTargetInfo().isValidFeatureName(Cur))
return Diag(CurLoc, diag::warn_unsupported_target_attribute)
- << Unsupported << CPU << Cur.drop_front(sizeof("arch=") - 1)
- << TargetClones;
- } else if (Cur == "default") {
- DefaultIsDupe = HasDefault;
- HasDefault = true;
- } else if (!Context.getTargetInfo().isValidFeatureName(Cur))
- return Diag(CurLoc, diag::warn_unsupported_target_attribute)
- << Unsupported << None << Cur << TargetClones;
-
- if (llvm::is_contained(Strings, Cur) || DefaultIsDupe)
- Diag(CurLoc, diag::warn_target_clone_duplicate_options);
- // Note: Add even if there are duplicates, since it changes name mangling.
- Strings.push_back(Cur);
+ << Unsupported << None << Cur << TargetClones;
+ if (llvm::is_contained(Strings, Cur) || DefaultIsDupe)
+ Diag(CurLoc, diag::warn_target_clone_duplicate_options);
+ // Note: Add even if there are duplicates, since it changes name mangling.
+ Strings.push_back(Cur);
+ }
}
-
if (Str.rtrim().endswith(","))
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
<< Unsupported << None << "" << TargetClones;
@@ -3513,6 +3597,10 @@ bool Sema::checkTargetClonesAttrString(SourceLocation LiteralLoc, StringRef Str,
}
static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+ if (S.Context.getTargetInfo().getTriple().isAArch64() &&
+ !S.Context.getTargetInfo().hasFeature("fmv"))
+ return;
+
// Ensure we don't combine these with themselves, since that causes some
// confusing behavior.
if (const auto *Other = D->getAttr<TargetClonesAttr>()) {
@@ -3524,7 +3612,8 @@ static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
return;
SmallVector<StringRef, 2> Strings;
- bool HasCommas = false, HasDefault = false;
+ SmallVector<SmallString<64>, 2> StringsBuffer;
+ bool HasCommas = false, HasDefault = false, HasNotDefault = false;
for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) {
StringRef CurStr;
@@ -3533,13 +3622,19 @@ static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
S.checkTargetClonesAttrString(
LiteralLoc, CurStr,
cast<StringLiteral>(AL.getArgAsExpr(I)->IgnoreParenCasts()),
- HasDefault, HasCommas, Strings))
+ HasDefault, HasCommas, HasNotDefault, Strings, StringsBuffer))
return;
}
if (HasCommas && AL.getNumArgs() > 1)
S.Diag(AL.getLoc(), diag::warn_target_clone_mixed_values);
+ if (S.Context.getTargetInfo().getTriple().isAArch64() && !HasDefault) {
+ // Add default attribute if there is no one
+ HasDefault = true;
+ Strings.push_back("default");
+ }
+
if (!HasDefault) {
S.Diag(AL.getLoc(), diag::err_target_clone_must_have_default);
return;
@@ -3556,6 +3651,10 @@ static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
}
}
+ // No multiversion if we have default version only.
+ if (S.Context.getTargetInfo().getTriple().isAArch64() && !HasNotDefault)
+ return;
+
cast<FunctionDecl>(D)->setIsMultiVersion();
TargetClonesAttr *NewAttr = ::new (S.Context)
TargetClonesAttr(S.Context, AL, Strings.data(), Strings.size());
@@ -8907,6 +9006,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
case ParsedAttr::AT_Target:
handleTargetAttr(S, D, AL);
break;
+ case ParsedAttr::AT_TargetVersion:
+ handleTargetVersionAttr(S, D, AL);
+ break;
case ParsedAttr::AT_TargetClones:
handleTargetClonesAttr(S, D, AL);
break;
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 7f3d400fc67ee..7e551823a81bf 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -6521,8 +6521,11 @@ void Sema::AddOverloadCandidate(
}
}
- if (Function->isMultiVersion() && Function->hasAttr<TargetAttr>() &&
- !Function->getAttr<TargetAttr>()->isDefaultVersion()) {
+ if (Function->isMultiVersion() &&
+ ((Function->hasAttr<TargetAttr>() &&
+ !Function->getAttr<TargetAttr>()->isDefaultVersion()) ||
+ (Function->hasAttr<TargetVersionAttr>() &&
+ !Function->getAttr<TargetVersionAttr>()->isDefaultVersion()))) {
Candidate.Viable = false;
Candidate.FailureKind = ovl_non_default_multiversion_function;
return;
@@ -7182,8 +7185,11 @@ Sema::AddMethodCandidate(CXXMethodDecl *Method, DeclAccessPair FoundDecl,
return;
}
- if (Method->isMultiVersion() && Method->hasAttr<TargetAttr>() &&
- !Method->getAttr<TargetAttr>()->isDefaultVersion()) {
+ if (Method->isMultiVersion() &&
+ ((Method->hasAttr<TargetAttr>() &&
+ !Method->getAttr<TargetAttr>()->isDefaultVersion()) ||
+ (Method->hasAttr<TargetVersionAttr>() &&
+ !Method->getAttr<TargetVersionAttr>()->isDefaultVersion()))) {
Candidate.Viable = false;
Candidate.FailureKind = ovl_non_default_multiversion_function;
}
@@ -7636,8 +7642,11 @@ void Sema::AddConversionCandidate(
return;
}
- if (Conversion->isMultiVersion() && Conversion->hasAttr<TargetAttr>() &&
- !Conversion->getAttr<TargetAttr>()->isDefaultVersion()) {
+ if (Conversion->isMultiVersion() &&
+ ((Conversion->hasAttr<TargetAttr>() &&
+ !Conversion->getAttr<TargetAttr>()->isDefaultVersion()) ||
+ (Conversion->hasAttr<TargetVersionAttr>() &&
+ !Conversion->getAttr<TargetVersionAttr>()->isDefaultVersion()))) {
Candidate.Viable = false;
Candidate.FailureKind = ovl_non_default_multiversion_function;
}
@@ -10563,6 +10572,9 @@ void Sema::NoteOverloadCandidate(NamedDecl *Found, FunctionDecl *Fn,
if (Fn->isMultiVersion() && Fn->hasAttr<TargetAttr>() &&
!Fn->getAttr<TargetAttr>()->isDefaultVersion())
return;
+ if (Fn->isMultiVersion() && Fn->hasAttr<TargetVersionAttr>() &&
+ !Fn->getAttr<TargetVersionAttr>()->isDefaultVersion())
+ return;
if (shouldSkipNotingLambdaConversionDecl(Fn))
return;
@@ -12372,6 +12384,9 @@ class AddressOfFunctionResolver {
const auto *TA = FunDecl->getAttr<TargetAttr>();
if (TA && !TA->isDefaultVersion())
return false;
+ const auto *TVA = FunDecl->getAttr<TargetVersionAttr>();
+ if (TVA && !TVA->isDefaultVersion())
+ return false;
}
// If any candidate has a placeholder return type, trigger its deduction
diff --git a/clang/test/AST/attr-target-version.c b/clang/test/AST/attr-target-version.c
new file mode 100644
index 0000000000000..52ac0e61b5a59
--- /dev/null
+++ b/clang/test/AST/attr-target-version.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -ast-dump %s | FileCheck %s
+
+int __attribute__((target_version("sve2-bitperm + sha2"))) foov(void) { return 1; }
+int __attribute__((target_clones(" lse + fp + sha3 "))) fooc(void) { return 2; }
+// CHECK: TargetVersionAttr
+// CHECK: sve2-bitperm + sha2
+// CHECK: TargetClonesAttr
+// CHECK: fp+lse+sha3 default
diff --git a/clang/test/CodeGen/aarch64-targetattr.c b/clang/test/CodeGen/aarch64-targetattr.c
index bac132f7ef342..88af5cc5bc8da 100644
--- a/clang/test/CodeGen/aarch64-targetattr.c
+++ b/clang/test/CodeGen/aarch64-targetattr.c
@@ -10,100 +10,100 @@ void v82sve() {}
// CHECK-LABEL: @v82sve2() #2
__attribute__((target("arch=armv8.2-a+sve2")))
void v82sve2() {}
-// CHECK-LABEL: @v82svesve2() #3
+// CHECK-LABEL: @v82svesve2() #2
__attribute__((target("arch=armv8.2-a+sve+sve2")))
void v82svesve2() {}
-// CHECK-LABEL: @v86sve2() #4
+// CHECK-LABEL: @v86sve2() #3
__attribute__((target("arch=armv8.6-a+sve2")))
void v86sve2() {}
-// CHECK-LABEL: @a710() #5
+// CHECK-LABEL: @a710() #4
__attribute__((target("cpu=cortex-a710")))
void a710() {}
-// CHECK-LABEL: @tunea710() #6
+// CHECK-LABEL: @tunea710() #5
__attribute__((target("tune=cortex-a710")))
void tunea710() {}
-// CHECK-LABEL: @generic() #7
+// CHECK-LABEL: @generic() #6
__attribute__((target("cpu=generic")))
void generic() {}
-// CHECK-LABEL: @tune() #8
+// CHECK-LABEL: @tune() #7
__attribute__((target("tune=generic")))
void tune() {}
-// CHECK-LABEL: @n1tunea710() #9
+// CHECK-LABEL: @n1tunea710() #8
__attribute__((target("cpu=neoverse-n1,tune=cortex-a710")))
void n1tunea710() {}
-// CHECK-LABEL: @svetunea710() #10
+// CHECK-LABEL: @svetunea710() #9
__attribute__((target("sve,tune=cortex-a710")))
void svetunea710() {}
-// CHECK-LABEL: @plussvetunea710() #10
+// CHECK-LABEL: @plussvetunea710() #9
__attribute__((target("+sve,tune=cortex-a710")))
void plussvetunea710() {}
-// CHECK-LABEL: @v1plussve2() #11
+// CHECK-LABEL: @v1plussve2() #10
__attribute__((target("cpu=neoverse-v1,+sve2")))
void v1plussve2() {}
-// CHECK-LABEL: @v1sve2() #11
+// CHECK-LABEL: @v1sve2() #10
__attribute__((target("cpu=neoverse-v1+sve2")))
void v1sve2() {}
-// CHECK-LABEL: @v1minussve() #12
+// CHECK-LABEL: @v1minussve() #11
__attribute__((target("cpu=neoverse-v1,+nosve")))
void v1minussve() {}
-// CHECK-LABEL: @v1nosve() #12
+// CHECK-LABEL: @v1nosve() #11
__attribute__((target("cpu=neoverse-v1,no-sve")))
void v1nosve() {}
-// CHECK-LABEL: @v1msve() #12
+// CHECK-LABEL: @v1msve() #11
__attribute__((target("cpu=neoverse-v1+nosve")))
void v1msve() {}
-// CHECK-LABEL: @plussve() #13
+// CHECK-LABEL: @plussve() #12
__attribute__((target("+sve")))
void plussve() {}
-// CHECK-LABEL: @plussveplussve2() #14
+// CHECK-LABEL: @plussveplussve2() #13
__attribute__((target("+sve+nosve2")))
void plussveplussve2() {}
-// CHECK-LABEL: @plussveminusnosve2() #14
+// CHECK-LABEL: @plussveminusnosve2() #13
__attribute__((target("sve,no-sve2")))
void plussveminusnosve2() {}
-// CHECK-LABEL: @plusfp16() #15
+// CHECK-LABEL: @plusfp16() #14
__attribute__((target("+fp16")))
void plusfp16() {}
-// CHECK-LABEL: @all() #16
+// CHECK-LABEL: @all() #15
__attribute__((target("cpu=neoverse-n1,tune=cortex-a710,arch=armv8.6-a+sve2")))
void all() {}
-// CHECK-LABEL: @allplusbranchprotection() #17
+// CHECK-LABEL: @allplusbranchprotection() #16
__attribute__((target("cpu=neoverse-n1,tune=cortex-a710,arch=armv8.6-a+sve2,branch-protection=standard")))
void allplusbranchprotection() {}
// These tests check that the user facing and internal llvm name are both accepted.
-// CHECK-LABEL: @plusnoneon() #18
+// CHECK-LABEL: @plusnoneon() #17
__attribute__((target("+noneon")))
void plusnoneon() {}
-// CHECK-LABEL: @plusnosimd() #18
+// CHECK-LABEL: @plusnosimd() #17
__attribute__((target("+nosimd")))
void plusnosimd() {}
-// CHECK-LABEL: @noneon() #18
+// CHECK-LABEL: @noneon() #17
__attribute__((target("no-neon")))
void noneon() {}
-// CHECK-LABEL: @nosimd() #18
+// CHECK-LABEL: @nosimd() #17
__attribute__((target("no-simd")))
void nosimd() {}
// CHECK: attributes #0 = { {{.*}} "target-features"="+v8.1a,+v8.2a,+v8a" }
-// CHECK: attributes #1 = { {{.*}} "target-features"="+sve,+v8.1a,+v8.2a,+v8a" }
-// CHECK: attributes #2 = { {{.*}} "target-features"="+sve2,+v8.1a,+v8.2a,+v8a" }
-// CHECK: attributes #4 = { {{.*}} "target-features"="+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" }
-// CHECK: attributes #5 = { {{.*}} "target-cpu"="cortex-a710" "target-features"="+bf16,+crc,+dotprod,+flagm,+fp-armv8,+fp16fml,+i8mm,+lse,+mte,+neon,+pauth,+ras,+rcpc,+rdm,+sb,+sve,+sve2,+sve2-bitperm" }
-// CHECK: attributes #6 = { {{.*}} "tune-cpu"="cortex-a710" }
-// CHECK: attributes #7 = { {{.*}} "target-cpu"="generic" }
-// CHECK: attributes #8 = { {{.*}} "tune-cpu"="generic" }
-// CHECK: attributes #9 = { {{.*}} "target-cpu"="neoverse-n1" "target-features"="+crc,+crypto,+dotprod,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+spe,+ssbs" "tune-cpu"="cortex-a710" }
-// CHECK: attributes #10 = { {{.*}} "target-features"="+sve" "tune-cpu"="cortex-a710" }
-// CHECK: attributes #11 = { {{.*}} "target-cpu"="neoverse-v1" "target-features"="+bf16,+crc,+crypto,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+lse,+neon,+rand,+ras,+rcpc,+rdm,+spe,+ssbs,+sve,+sve2" }
-// CHECK: attributes #12 = { {{.*}} "target-cpu"="neoverse-v1" "target-features"="+bf16,+crc,+crypto,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+lse,+neon,+rand,+ras,+rcpc,+rdm,+spe,+ssbs,-sve" }
-// CHECK: attributes #13 = { {{.*}} "target-features"="+sve" }
-// CHECK: attributes #14 = { {{.*}} "target-features"="+sve,-sve2" }
-// CHECK: attributes #15 = { {{.*}} "target-features"="+fullfp16" }
-// CHECK: attributes #16 = { {{.*}} "target-cpu"="neoverse-n1" "target-features"="+crc,+crypto,+dotprod,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+spe,+ssbs,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" }
-// CHECK: attributes #17 = { {{.*}} "branch-target-enforcement"="true" {{.*}} "target-cpu"="neoverse-n1" "target-features"="+crc,+crypto,+dotprod,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+spe,+ssbs,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" }
-// CHECK: attributes #18 = { {{.*}} "target-features"="-neon" }
+// CHECK: attributes #1 = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+v8.1a,+v8.2a,+v8a" }
+// CHECK: attributes #2 = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2,+v8.1a,+v8.2a,+v8a" }
+// CHECK: attributes #3 = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" }
+// CHECK: attributes #4 = { {{.*}} "target-cpu"="cortex-a710" "target-features"="+bf16,+crc,+dotprod,+flagm,+fp-armv8,+fp16fml,+i8mm,+lse,+mte,+neon,+pauth,+ras,+rcpc,+rdm,+sb,+sve,+sve2,+sve2-bitperm" }
+// CHECK: attributes #5 = { {{.*}} "tune-cpu"="cortex-a710" }
+// CHECK: attributes #6 = { {{.*}} "target-cpu"="generic" }
+// CHECK: attributes #7 = { {{.*}} "tune-cpu"="generic" }
+// CHECK: attributes #8 = { {{.*}} "target-cpu"="neoverse-n1" "target-features"="+crc,+crypto,+dotprod,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+spe,+ssbs" "tune-cpu"="cortex-a710" }
+// CHECK: attributes #9 = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+sve" "tune-cpu"="cortex-a710" }
+// CHECK: attributes #10 = { {{.*}} "target-cpu"="neoverse-v1" "target-features"="+bf16,+crc,+crypto,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+lse,+neon,+rand,+ras,+rcpc,+rdm,+spe,+ssbs,+sve,+sve2" }
+// CHECK: attributes #11 = { {{.*}} "target-cpu"="neoverse-v1" "target-features"="+bf16,+crc,+crypto,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+lse,+neon,+rand,+ras,+rcpc,+rdm,+spe,+ssbs,-sve" }
+// CHECK: attributes #12 = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+sve" }
+// CHECK: attributes #13 = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+sve,-sve2" }
+// CHECK: attributes #14 = { {{.*}} "target-features"="+fullfp16" }
+// CHECK: attributes #15 = { {{.*}} "target-cpu"="neoverse-n1" "target-features"="+crc,+crypto,+dotprod,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" }
+// CHECK: attributes #16 = { {{.*}} "branch-target-enforcement"="true" {{.*}} "target-cpu"="neoverse-n1" "target-features"="+crc,+crypto,+dotprod,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" }
+// CHECK: attributes #17 = { {{.*}} "target-features"="-neon" }
diff --git a/clang/test/CodeGen/attr-target-clones-aarch64.c b/clang/test/CodeGen/attr-target-clones-aarch64.c
new file mode 100644
index 0000000000000..390998a5a0b61
--- /dev/null
+++ b/clang/test/CodeGen/attr-target-clones-aarch64.c
@@ -0,0 +1,347 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals --include-generated-funcs
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fmv -S -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-NOFMV
+
+int __attribute__((target_clones("lse+aes", "sve2"))) ftc(void) { return 0; }
+int __attribute__((target_clones("sha2", "sha2+memtag2", " default "))) ftc_def(void) { return 1; }
+int __attribute__((target_clones("sha2", "default"))) ftc_dup1(void) { return 2; }
+int __attribute__((target_clones("fp", "crc+dotprod"))) ftc_dup2(void) { return 3; }
+int foo() {
+ return ftc() + ftc_def() + ftc_dup1() + ftc_dup2();
+}
+
+inline int __attribute__((target_clones("rng+simd", "rcpc+predres", "sve2-aes+wfxt"))) ftc_inline1(void) { return 1; }
+inline int __attribute__((target_clones("fp16", "fcma+sve2-bitperm", "default"))) ftc_inline2(void);
+inline int __attribute__((target_clones("bti", "sve+sb"))) ftc_inline3(void) { return 3; }
+
+int __attribute__((target_clones("default"))) ftc_direct(void) { return 4; }
+
+int __attribute__((target_clones("default"))) main() {
+ return ftc_inline1() + ftc_inline2() + ftc_inline3() + ftc_direct();
+}
+inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))) ftc_inline2(void) { return 2; };
+
+
+// CHECK: @__aarch64_cpu_features = external dso_local global { i64 }
+// CHECK: @ftc.ifunc = weak_odr ifunc i32 (), ptr @ftc.resolver
+// CHECK: @ftc_def.ifunc = weak_odr ifunc i32 (), ptr @ftc_def.resolver
+// CHECK: @ftc_dup1.ifunc = weak_odr ifunc i32 (), ptr @ftc_dup1.resolver
+// CHECK: @ftc_dup2.ifunc = weak_odr ifunc i32 (), ptr @ftc_dup2.resolver
+// CHECK: @ftc_inline1.ifunc = weak_odr ifunc i32 (), ptr @ftc_inline1.resolver
+// CHECK: @ftc_inline2.ifunc = weak_odr ifunc i32 (), ptr @ftc_inline2.resolver
+// CHECK: @ftc_inline3.ifunc = weak_odr ifunc i32 (), ptr @ftc_inline3.resolver
+
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @ftc._MaesMlse(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 0
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @ftc._Msve2(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 0
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @ftc(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 0
+// CHECK-LABEL: @ftc.resolver(
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 16512
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 16512
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @ftc._MaesMlse
+// CHECK: resolver_else:
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 68719476736
+// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 68719476736
+// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
+// CHECK: resolver_return1:
+// CHECK-NEXT: ret ptr @ftc._Msve2
+// CHECK: resolver_else2:
+// CHECK-NEXT: ret ptr @ftc
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @ftc_def._Msha2(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 1
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @ftc_def._Mmemtag2Msha2(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 1
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @ftc_def(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 1
+// CHECK-LABEL: @ftc_def.resolver(
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 17592186048512
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 17592186048512
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @ftc_def._Mmemtag2Msha2
+// CHECK: resolver_else:
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 4096
+// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 4096
+// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
+// CHECK: resolver_return1:
+// CHECK-NEXT: ret ptr @ftc_def._Msha2
+// CHECK: resolver_else2:
+// CHECK-NEXT: ret ptr @ftc_def
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @ftc_dup1._Msha2(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 2
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @ftc_dup1(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 2
+// CHECK-LABEL: @ftc_dup1.resolver(
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4096
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4096
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @ftc_dup1._Msha2
+// CHECK: resolver_else:
+// CHECK-NEXT: ret ptr @ftc_dup1
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @ftc_dup2._Mfp(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 3
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @ftc_dup2._McrcMdotprod(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 3
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @ftc_dup2(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 3
+// CHECK-LABEL: @ftc_dup2.resolver(
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1040
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1040
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @ftc_dup2._McrcMdotprod
+// CHECK: resolver_else:
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 256
+// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 256
+// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
+// CHECK: resolver_return1:
+// CHECK-NEXT: ret ptr @ftc_dup2._Mfp
+// CHECK: resolver_else2:
+// CHECK-NEXT: ret ptr @ftc_dup2
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @foo(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[CALL:%.*]] = call i32 @ftc.ifunc()
+// CHECK-NEXT: [[CALL1:%.*]] = call i32 @ftc_def.ifunc()
+// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]]
+// CHECK-NEXT: [[CALL2:%.*]] = call i32 @ftc_dup1.ifunc()
+// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CALL2]]
+// CHECK-NEXT: [[CALL4:%.*]] = call i32 @ftc_dup2.ifunc()
+// CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CALL4]]
+// CHECK-NEXT: ret i32 [[ADD5]]
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @ftc_direct(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 4
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @main(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// CHECK-NEXT: [[CALL:%.*]] = call i32 @ftc_inline1.ifunc()
+// CHECK-NEXT: [[CALL1:%.*]] = call i32 @ftc_inline2.ifunc()
+// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]]
+// CHECK-NEXT: [[CALL2:%.*]] = call i32 @ftc_inline3.ifunc()
+// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CALL2]]
+// CHECK-NEXT: [[CALL4:%.*]] = call i32 @ftc_direct()
+// CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CALL4]]
+// CHECK-NEXT: ret i32 [[ADD5]]
+// CHECK-LABEL: @ftc_inline1.resolver(
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 18014535948435456
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 18014535948435456
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @ftc_inline1._Msve2-aesMwfxt
+// CHECK: resolver_else:
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 140737492549632
+// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 140737492549632
+// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
+// CHECK: resolver_return1:
+// CHECK-NEXT: ret ptr @ftc_inline1._MpredresMrcpc
+// CHECK: resolver_else2:
+// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 513
+// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 513
+// CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]]
+// CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]]
+// CHECK: resolver_return3:
+// CHECK-NEXT: ret ptr @ftc_inline1._MrngMsimd
+// CHECK: resolver_else4:
+// CHECK-NEXT: ret ptr @ftc_inline1
+// CHECK-LABEL: @ftc_inline2.resolver(
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 549757911040
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 549757911040
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @ftc_inline2._MfcmaMsve2-bitperm
+// CHECK: resolver_else:
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 65536
+// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 65536
+// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
+// CHECK: resolver_return1:
+// CHECK-NEXT: ret ptr @ftc_inline2._Mfp16
+// CHECK: resolver_else2:
+// CHECK-NEXT: ret ptr @ftc_inline2
+// CHECK-LABEL: @ftc_inline3.resolver(
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 70369817919488
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 70369817919488
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @ftc_inline3._MsbMsve
+// CHECK: resolver_else:
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 1125899906842624
+// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 1125899906842624
+// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
+// CHECK: resolver_return1:
+// CHECK-NEXT: ret ptr @ftc_inline3._Mbti
+// CHECK: resolver_else2:
+// CHECK-NEXT: ret ptr @ftc_inline3
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @ftc_inline1._MrngMsimd(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 1
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @ftc_inline1._MpredresMrcpc(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 1
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @ftc_inline1._Msve2-aesMwfxt(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 1
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @ftc_inline1(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 1
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @ftc_inline2._Mfp16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 2
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @ftc_inline2._MfcmaMsve2-bitperm(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 2
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @ftc_inline2(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 2
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @ftc_inline3._Mbti(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 3
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @ftc_inline3._MsbMsve(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 3
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: @ftc_inline3(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 3
+// CHECK-NOFMV: Function Attrs: noinline nounwind optnone
+// CHECK-NOFMV-LABEL: @ftc(
+// CHECK-NOFMV-NEXT: entry:
+// CHECK-NOFMV-NEXT: ret i32 0
+// CHECK-NOFMV: Function Attrs: noinline nounwind optnone
+// CHECK-NOFMV-LABEL: @ftc_def(
+// CHECK-NOFMV-NEXT: entry:
+// CHECK-NOFMV-NEXT: ret i32 1
+// CHECK-NOFMV: Function Attrs: noinline nounwind optnone
+// CHECK-NOFMV-LABEL: @ftc_dup1(
+// CHECK-NOFMV-NEXT: entry:
+// CHECK-NOFMV-NEXT: ret i32 2
+// CHECK-NOFMV: Function Attrs: noinline nounwind optnone
+// CHECK-NOFMV-LABEL: @ftc_dup2(
+// CHECK-NOFMV-NEXT: entry:
+// CHECK-NOFMV-NEXT: ret i32 3
+// CHECK-NOFMV: Function Attrs: noinline nounwind optnone
+// CHECK-NOFMV-LABEL: @foo(
+// CHECK-NOFMV-NEXT: entry:
+// CHECK-NOFMV-NEXT: [[CALL:%.*]] = call i32 @ftc()
+// CHECK-NOFMV-NEXT: [[CALL1:%.*]] = call i32 @ftc_def()
+// CHECK-NOFMV-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]]
+// CHECK-NOFMV-NEXT: [[CALL2:%.*]] = call i32 @ftc_dup1()
+// CHECK-NOFMV-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CALL2]]
+// CHECK-NOFMV-NEXT: [[CALL4:%.*]] = call i32 @ftc_dup2()
+// CHECK-NOFMV-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CALL4]]
+// CHECK-NOFMV-NEXT: ret i32 [[ADD5]]
+// CHECK-NOFMV: Function Attrs: noinline nounwind optnone
+// CHECK-NOFMV-LABEL: @ftc_direct(
+// CHECK-NOFMV-NEXT: entry:
+// CHECK-NOFMV-NEXT: ret i32 4
+// CHECK-NOFMV: Function Attrs: noinline nounwind optnone
+// CHECK-NOFMV-LABEL: @main(
+// CHECK-NOFMV-NEXT: entry:
+// CHECK-NOFMV-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// CHECK-NOFMV-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// CHECK-NOFMV-NEXT: [[CALL:%.*]] = call i32 @ftc_inline1()
+// CHECK-NOFMV-NEXT: [[CALL1:%.*]] = call i32 @ftc_inline2()
+// CHECK-NOFMV-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]]
+// CHECK-NOFMV-NEXT: [[CALL2:%.*]] = call i32 @ftc_inline3()
+// CHECK-NOFMV-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CALL2]]
+// CHECK-NOFMV-NEXT: [[CALL4:%.*]] = call i32 @ftc_direct()
+// CHECK-NOFMV-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CALL4]]
+// CHECK-NOFMV-NEXT: ret i32 [[ADD5]]
+
+// CHECK: attributes #0 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+lse,+neon" }
+// CHECK: attributes #1 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2" }
+// CHECK: attributes #2 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// CHECK: attributes #3 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+sha2" }
+// CHECK: attributes #4 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+mte,+neon,+sha2" }
+// CHECK: attributes #5 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon" }
+// CHECK: attributes #6 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+dotprod,+fp-armv8,+neon" }
+// CHECK: attributes #7 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+rand" }
+// CHECK: attributes #8 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+predres,+rcpc" }
+// CHECK: attributes #9 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2,+sve2-aes,+wfxt" }
+// CHECK: attributes #10 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon" }
+// CHECK: attributes #11 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+complxnum,+fp-armv8,+fullfp16,+neon,+sve,+sve2,+sve2-bitperm" }
+// CHECK: attributes #12 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti" }
+// CHECK: attributes #13 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sb,+sve" }
+
+// CHECK-NOFMV: attributes #0 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" }
+// CHECK-NOFMV: attributes #1 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" }
diff --git a/clang/test/CodeGen/attr-target-version.c b/clang/test/CodeGen/attr-target-version.c
new file mode 100644
index 0000000000000..1e129e771da04
--- /dev/null
+++ b/clang/test/CodeGen/attr-target-version.c
@@ -0,0 +1,541 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +ls64 -target-feature +fullfp16 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fmv -S -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-NOFMV
+
+int __attribute__((target_version("rng+flagm+fp16fml"))) fmv(void) { return 1; }
+int __attribute__((target_version("flagm2+sme-i16i64"))) fmv(void) { return 2; }
+int __attribute__((target_version("lse+sha2"))) fmv(void) { return 3; }
+int __attribute__((target_version("dotprod+ls64_accdata"))) fmv(void) { return 4; }
+int __attribute__((target_version("fp16fml+memtag"))) fmv(void) { return 5; }
+int __attribute__((target_version("fp+aes"))) fmv(void) { return 6; }
+int __attribute__((target_version("crc+ls64_v"))) fmv(void) { return 7; }
+int __attribute__((target_version("bti"))) fmv(void) { return 8; }
+int __attribute__((target_version("sme2"))) fmv(void) { return 9; }
+int __attribute__((target_version("default"))) fmv(void) { return 0; }
+int __attribute__((target_version("ls64+simd"))) fmv_one(void) { return 1; }
+int __attribute__((target_version("dpb"))) fmv_one(void) { return 2; }
+int __attribute__((target_version("default"))) fmv_one(void) { return 0; }
+int __attribute__((target_version("fp"))) fmv_two(void) { return 1; }
+int __attribute__((target_version("simd"))) fmv_two(void) { return 2; }
+int __attribute__((target_version("dgh"))) fmv_two(void) { return 3; }
+int __attribute__((target_version("fp16+simd"))) fmv_two(void) { return 4; }
+int __attribute__((target_version("default"))) fmv_two(void) { return 0; }
+int foo() {
+ return fmv()+fmv_one()+fmv_two();
+}
+
+inline int __attribute__((target_version("sha1+pmull+f64mm"))) fmv_inline(void) { return 1; }
+inline int __attribute__((target_version("fp16+fcma+sme+ fp16 "))) fmv_inline(void) { return 2; }
+inline int __attribute__((target_version("sha3+i8mm+f32mm"))) fmv_inline(void) { return 12; }
+inline int __attribute__((target_version("dit+sve-ebf16"))) fmv_inline(void) { return 8; }
+inline int __attribute__((target_version("dpb+rcpc2 "))) fmv_inline(void) { return 6; }
+inline int __attribute__((target_version(" dpb2 + jscvt"))) fmv_inline(void) { return 7; }
+inline int __attribute__((target_version("rcpc+frintts"))) fmv_inline(void) { return 3; }
+inline int __attribute__((target_version("sve+sve-bf16"))) fmv_inline(void) { return 4; }
+inline int __attribute__((target_version("sve2-aes+sve2-sha3"))) fmv_inline(void) { return 5; }
+inline int __attribute__((target_version("sve2+sve2-pmull128+sve2-bitperm"))) fmv_inline(void) { return 9; }
+inline int __attribute__((target_version("sve2-sm4+memtag2"))) fmv_inline(void) { return 10; }
+inline int __attribute__((target_version("memtag3"))) fmv_inline(void) { return 11; }
+inline int __attribute__((target_version("default"))) fmv_inline(void) { return 3; }
+
+__attribute__((target_version("ls64"))) int fmv_e(void);
+int fmv_e(void) { return 20; }
+
+static __attribute__((target_version("sb"))) inline int fmv_d(void);
+static __attribute__((target_version("default"))) inline int fmv_d(void);
+
+int __attribute__((target_version("default"))) fmv_default(void) { return 111; }
+int fmv_default(void);
+
+void fmv_c(void);
+void __attribute__((target_version("ssbs"))) fmv_c(void){};
+void __attribute__((target_version("default"))) fmv_c(void){};
+
+int goo() {
+ fmv_inline();
+ fmv_e();
+ fmv_d();
+ fmv_c();
+ return fmv_default();
+}
+static inline int __attribute__((target_version("sb"))) fmv_d(void) { return 0; }
+static inline int __attribute__((target_version(" default "))) fmv_d(void) { return 1; }
+
+static void func(void) {}
+inline __attribute__((target_version("default"))) void recb(void) { func(); }
+inline __attribute__((target_version("default"))) void reca(void) { recb(); }
+void recur(void) { reca(); }
+
+int __attribute__((target_version("default"))) main(void) {
+ recur();
+ return goo();
+}
+
+typedef int (*Fptr)();
+void f(Fptr);
+int hoo(void) {
+ f(fmv);
+ Fptr fp1 = &fmv;
+ Fptr fp2 = fmv;
+ return fp1() + fp2();
+}
+
+// CHECK: @__aarch64_cpu_features = external dso_local global { i64 }
+// CHECK: @fmv.ifunc = weak_odr ifunc i32 (), ptr @fmv.resolver
+// CHECK: @fmv_one.ifunc = weak_odr ifunc i32 (), ptr @fmv_one.resolver
+// CHECK: @fmv_two.ifunc = weak_odr ifunc i32 (), ptr @fmv_two.resolver
+// CHECK: @fmv_inline.ifunc = weak_odr ifunc i32 (), ptr @fmv_inline.resolver
+// CHECK: @fmv_e.ifunc = weak_odr ifunc i32 (), ptr @fmv_e.resolver
+// CHECK: @fmv_d.ifunc = internal ifunc i32 (), ptr @fmv_d.resolver
+// CHECK: @fmv_c.ifunc = weak_odr ifunc void (), ptr @fmv_c.resolver
+
+// CHECK-LABEL: @fmv._MrngMflagmMfp16fml(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 1
+// CHECK-LABEL: @fmv._Mflagm2Msme-i16i64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 2
+// CHECK-LABEL: @fmv._MlseMsha2(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 3
+// CHECK-LABEL: @fmv._MdotprodMls64_accdata(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 4
+// CHECK-LABEL: @fmv._Mfp16fmlMmemtag(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 5
+// CHECK-LABEL: @fmv._MfpMaes(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 6
+// CHECK-LABEL: @fmv._McrcMls64_v(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 7
+// CHECK-LABEL: @fmv._Mbti(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 8
+// CHECK-LABEL: @fmv._Msme2(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 9
+// CHECK-LABEL: @fmv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 0
+// CHECK-LABEL: @fmv_one._Mls64Msimd(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 1
+// CHECK-LABEL: @fmv_one._Mdpb(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 2
+// CHECK-LABEL: @fmv_one(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 0
+// CHECK-LABEL: @fmv_two._Mfp(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 1
+// CHECK-LABEL: @fmv_two._Msimd(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 2
+// CHECK-LABEL: @fmv_two._Mdgh(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 3
+// CHECK-LABEL: @fmv_two._Mfp16Msimd(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 4
+// CHECK-LABEL: @fmv_two(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 0
+// CHECK-LABEL: @foo(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[CALL:%.*]] = call i32 @fmv.ifunc()
+// CHECK-NEXT: [[CALL1:%.*]] = call i32 @fmv_one.ifunc()
+// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]]
+// CHECK-NEXT: [[CALL2:%.*]] = call i32 @fmv_two.ifunc()
+// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CALL2]]
+// CHECK-NEXT: ret i32 [[ADD3]]
+// CHECK-LABEL: @fmv.resolver(
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 11
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 11
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @fmv._MrngMflagmMfp16fml
+// CHECK: resolver_else:
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 72057594037927940
+// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 72057594037927940
+// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
+// CHECK: resolver_return1:
+// CHECK-NEXT: ret ptr @fmv._Mflagm2Msme-i16i64
+// CHECK: resolver_else2:
+// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 16
+// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 16
+// CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]]
+// CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]]
+// CHECK: resolver_return3:
+// CHECK-NEXT: ret ptr @fmv._MdotprodMls64_accdata
+// CHECK: resolver_else4:
+// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 1024
+// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 1024
+// CHECK-NEXT: [[TMP15:%.*]] = and i1 true, [[TMP14]]
+// CHECK-NEXT: br i1 [[TMP15]], label [[RESOLVER_RETURN5:%.*]], label [[RESOLVER_ELSE6:%.*]]
+// CHECK: resolver_return5:
+// CHECK-NEXT: ret ptr @fmv._McrcMls64_v
+// CHECK: resolver_else6:
+// CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 8796093022216
+// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 8796093022216
+// CHECK-NEXT: [[TMP19:%.*]] = and i1 true, [[TMP18]]
+// CHECK-NEXT: br i1 [[TMP19]], label [[RESOLVER_RETURN7:%.*]], label [[RESOLVER_ELSE8:%.*]]
+// CHECK: resolver_return7:
+// CHECK-NEXT: ret ptr @fmv._Mfp16fmlMmemtag
+// CHECK: resolver_else8:
+// CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 16384
+// CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[TMP21]], 16384
+// CHECK-NEXT: [[TMP23:%.*]] = and i1 true, [[TMP22]]
+// CHECK-NEXT: br i1 [[TMP23]], label [[RESOLVER_RETURN9:%.*]], label [[RESOLVER_ELSE10:%.*]]
+// CHECK: resolver_return9:
+// CHECK-NEXT: ret ptr @fmv._MfpMaes
+// CHECK: resolver_else10:
+// CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], 4224
+// CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 4224
+// CHECK-NEXT: [[TMP27:%.*]] = and i1 true, [[TMP26]]
+// CHECK-NEXT: br i1 [[TMP27]], label [[RESOLVER_RETURN11:%.*]], label [[RESOLVER_ELSE12:%.*]]
+// CHECK: resolver_return11:
+// CHECK-NEXT: ret ptr @fmv._MlseMsha2
+// CHECK: resolver_else12:
+// CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP28]], 144115188075855872
+// CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[TMP29]], 144115188075855872
+// CHECK-NEXT: [[TMP31:%.*]] = and i1 true, [[TMP30]]
+// CHECK-NEXT: br i1 [[TMP31]], label [[RESOLVER_RETURN13:%.*]], label [[RESOLVER_ELSE14:%.*]]
+// CHECK: resolver_return13:
+// CHECK-NEXT: ret ptr @fmv._Msme2
+// CHECK: resolver_else14:
+// CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP33:%.*]] = and i64 [[TMP32]], 1125899906842624
+// CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[TMP33]], 1125899906842624
+// CHECK-NEXT: [[TMP35:%.*]] = and i1 true, [[TMP34]]
+// CHECK-NEXT: br i1 [[TMP35]], label [[RESOLVER_RETURN15:%.*]], label [[RESOLVER_ELSE16:%.*]]
+// CHECK: resolver_return15:
+// CHECK-NEXT: ret ptr @fmv._Mbti
+// CHECK: resolver_else16:
+// CHECK-NEXT: ret ptr @fmv
+// CHECK-LABEL: @fmv_one.resolver(
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: ret ptr @fmv_one._Mls64Msimd
+// CHECK-LABEL: @fmv_two.resolver(
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: ret ptr @fmv_two._Mfp16Msimd
+// CHECK-LABEL: @fmv_e(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 20
+// CHECK-LABEL: @fmv_default(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 111
+// CHECK-LABEL: @fmv_c._Mssbs(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret void
+// CHECK-LABEL: @fmv_c(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret void
+// CHECK-LABEL: @goo(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[CALL:%.*]] = call i32 @fmv_inline.ifunc()
+// CHECK-NEXT: [[CALL1:%.*]] = call i32 @fmv_e.ifunc()
+// CHECK-NEXT: [[CALL2:%.*]] = call i32 @fmv_d.ifunc()
+// CHECK-NEXT: call void @fmv_c.ifunc()
+// CHECK-NEXT: [[CALL3:%.*]] = call i32 @fmv_default()
+// CHECK-NEXT: ret i32 [[CALL3]]
+// CHECK-LABEL: @fmv_inline.resolver(
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4398048608256
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4398048608256
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @fmv_inline._Mfp16MfcmaMsmeMfp16
+// CHECK: resolver_else:
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 893353197568
+// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 893353197568
+// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
+// CHECK: resolver_return1:
+// CHECK-NEXT: ret ptr @fmv_inline._Msve2Msve2-pmull128Msve2-bitperm
+// CHECK: resolver_else2:
+// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 34359773184
+// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 34359773184
+// CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]]
+// CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]]
+// CHECK: resolver_return3:
+// CHECK-NEXT: ret ptr @fmv_inline._Msha1MpmullMf64mm
+// CHECK: resolver_else4:
+// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 17246986240
+// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 17246986240
+// CHECK-NEXT: [[TMP15:%.*]] = and i1 true, [[TMP14]]
+// CHECK-NEXT: br i1 [[TMP15]], label [[RESOLVER_RETURN5:%.*]], label [[RESOLVER_ELSE6:%.*]]
+// CHECK: resolver_return5:
+// CHECK-NEXT: ret ptr @fmv_inline._Msha3Mi8mmMf32mm
+// CHECK: resolver_else6:
+// CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 19791209299968
+// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 19791209299968
+// CHECK-NEXT: [[TMP19:%.*]] = and i1 true, [[TMP18]]
+// CHECK-NEXT: br i1 [[TMP19]], label [[RESOLVER_RETURN7:%.*]], label [[RESOLVER_ELSE8:%.*]]
+// CHECK: resolver_return7:
+// CHECK-NEXT: ret ptr @fmv_inline._Msve2-sm4Mmemtag2
+// CHECK: resolver_else8:
+// CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 1236950581248
+// CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[TMP21]], 1236950581248
+// CHECK-NEXT: [[TMP23:%.*]] = and i1 true, [[TMP22]]
+// CHECK-NEXT: br i1 [[TMP23]], label [[RESOLVER_RETURN9:%.*]], label [[RESOLVER_ELSE10:%.*]]
+// CHECK: resolver_return9:
+// CHECK-NEXT: ret ptr @fmv_inline._Msve2-aesMsve2-sha3
+// CHECK: resolver_else10:
+// CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], 4295098368
+// CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 4295098368
+// CHECK-NEXT: [[TMP27:%.*]] = and i1 true, [[TMP26]]
+// CHECK-NEXT: br i1 [[TMP27]], label [[RESOLVER_RETURN11:%.*]], label [[RESOLVER_ELSE12:%.*]]
+// CHECK: resolver_return11:
+// CHECK-NEXT: ret ptr @fmv_inline._MditMsve-ebf16
+// CHECK: resolver_else12:
+// CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP28]], 3221225472
+// CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[TMP29]], 3221225472
+// CHECK-NEXT: [[TMP31:%.*]] = and i1 true, [[TMP30]]
+// CHECK-NEXT: br i1 [[TMP31]], label [[RESOLVER_RETURN13:%.*]], label [[RESOLVER_ELSE14:%.*]]
+// CHECK: resolver_return13:
+// CHECK-NEXT: ret ptr @fmv_inline._MsveMsve-bf16
+// CHECK: resolver_else14:
+// CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP33:%.*]] = and i64 [[TMP32]], 20971520
+// CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[TMP33]], 20971520
+// CHECK-NEXT: [[TMP35:%.*]] = and i1 true, [[TMP34]]
+// CHECK-NEXT: br i1 [[TMP35]], label [[RESOLVER_RETURN15:%.*]], label [[RESOLVER_ELSE16:%.*]]
+// CHECK: resolver_return15:
+// CHECK-NEXT: ret ptr @fmv_inline._MrcpcMfrintts
+// CHECK: resolver_else16:
+// CHECK-NEXT: [[TMP36:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP37:%.*]] = and i64 [[TMP36]], 8650752
+// CHECK-NEXT: [[TMP38:%.*]] = icmp eq i64 [[TMP37]], 8650752
+// CHECK-NEXT: [[TMP39:%.*]] = and i1 true, [[TMP38]]
+// CHECK-NEXT: br i1 [[TMP39]], label [[RESOLVER_RETURN17:%.*]], label [[RESOLVER_ELSE18:%.*]]
+// CHECK: resolver_return17:
+// CHECK-NEXT: ret ptr @fmv_inline._MdpbMrcpc2
+// CHECK: resolver_else18:
+// CHECK-NEXT: [[TMP40:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP41:%.*]] = and i64 [[TMP40]], 1572864
+// CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[TMP41]], 1572864
+// CHECK-NEXT: [[TMP43:%.*]] = and i1 true, [[TMP42]]
+// CHECK-NEXT: br i1 [[TMP43]], label [[RESOLVER_RETURN19:%.*]], label [[RESOLVER_ELSE20:%.*]]
+// CHECK: resolver_return19:
+// CHECK-NEXT: ret ptr @fmv_inline._Mdpb2Mjscvt
+// CHECK: resolver_else20:
+// CHECK-NEXT: [[TMP44:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP45:%.*]] = and i64 [[TMP44]], 35184372088832
+// CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[TMP45]], 35184372088832
+// CHECK-NEXT: [[TMP47:%.*]] = and i1 true, [[TMP46]]
+// CHECK-NEXT: br i1 [[TMP47]], label [[RESOLVER_RETURN21:%.*]], label [[RESOLVER_ELSE22:%.*]]
+// CHECK: resolver_return21:
+// CHECK-NEXT: ret ptr @fmv_inline._Mmemtag3
+// CHECK: resolver_else22:
+// CHECK-NEXT: ret ptr @fmv_inline
+// CHECK-LABEL: @fmv_e.resolver(
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: ret ptr @fmv_e._Mls64
+// CHECK-LABEL: @fmv_d.resolver(
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 70368744177664
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 70368744177664
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @fmv_d._Msb
+// CHECK: resolver_else:
+// CHECK-NEXT: ret ptr @fmv_d
+// CHECK-LABEL: @fmv_c.resolver(
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 281474976710656
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 281474976710656
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @fmv_c._Mssbs
+// CHECK: resolver_else:
+// CHECK-NEXT: ret ptr @fmv_c
+// CHECK-LABEL: @recur(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @reca()
+// CHECK-NEXT: ret void
+// CHECK-LABEL: @main(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// CHECK-NEXT: call void @recur()
+// CHECK-NEXT: [[CALL:%.*]] = call i32 @goo()
+// CHECK-NEXT: ret i32 [[CALL]]
+// CHECK-LABEL: @hoo(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[FP1:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[FP2:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: call void @f(ptr noundef @fmv.ifunc)
+// CHECK-NEXT: store ptr @fmv.ifunc, ptr [[FP1]], align 8
+// CHECK-NEXT: store ptr @fmv.ifunc, ptr [[FP2]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[FP1]], align 8
+// CHECK-NEXT: [[CALL:%.*]] = call i32 [[TMP0]]()
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[FP2]], align 8
+// CHECK-NEXT: [[CALL1:%.*]] = call i32 [[TMP1]]()
+// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]]
+// CHECK-NEXT: ret i32 [[ADD]]
+// CHECK-LABEL: @fmv_inline._Msha1MpmullMf64mm(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 1
+// CHECK-LABEL: @fmv_inline._Mfp16MfcmaMsmeMfp16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 2
+// CHECK-LABEL: @fmv_inline._Msha3Mi8mmMf32mm(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 12
+// CHECK-LABEL: @fmv_inline._MditMsve-ebf16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 8
+// CHECK-LABEL: @fmv_inline._MdpbMrcpc2(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 6
+// CHECK-LABEL: @fmv_inline._Mdpb2Mjscvt(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 7
+// CHECK-LABEL: @fmv_inline._MrcpcMfrintts(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 3
+// CHECK-LABEL: @fmv_inline._MsveMsve-bf16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 4
+// CHECK-LABEL: @fmv_inline._Msve2-aesMsve2-sha3(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 5
+// CHECK-LABEL: @fmv_inline._Msve2Msve2-pmull128Msve2-bitperm(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 9
+// CHECK-LABEL: @fmv_inline._Msve2-sm4Mmemtag2(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 10
+// CHECK-LABEL: @fmv_inline._Mmemtag3(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 11
+// CHECK-LABEL: @fmv_inline(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 3
+// CHECK-LABEL: @fmv_d._Msb(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 0
+// CHECK-LABEL: define internal i32 @fmv_d(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 1
+// CHECK-NOFMV-LABEL: @fmv(
+// CHECK-NOFMV-NEXT: entry:
+// CHECK-NOFMV-NEXT: ret i32 0
+// CHECK-NOFMV-LABEL: @fmv_one(
+// CHECK-NOFMV-NEXT: entry:
+// CHECK-NOFMV-NEXT: ret i32 0
+// CHECK-NOFMV-LABEL: @fmv_two(
+// CHECK-NOFMV-NEXT: entry:
+// CHECK-NOFMV-NEXT: ret i32 0
+// CHECK-NOFMV-LABEL: @foo(
+// CHECK-NOFMV-NEXT: entry:
+// CHECK-NOFMV-NEXT: [[CALL:%.*]] = call i32 @fmv()
+// CHECK-NOFMV-NEXT: [[CALL1:%.*]] = call i32 @fmv_one()
+// CHECK-NOFMV-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]]
+// CHECK-NOFMV-NEXT: [[CALL2:%.*]] = call i32 @fmv_two()
+// CHECK-NOFMV-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CALL2]]
+// CHECK-NOFMV-NEXT: ret i32 [[ADD3]]
+// CHECK-NOFMV-LABEL: @fmv_e(
+// CHECK-NOFMV-NEXT: entry:
+// CHECK-NOFMV-NEXT: ret i32 20
+// CHECK-NOFMV-LABEL: @fmv_default(
+// CHECK-NOFMV-NEXT: entry:
+// CHECK-NOFMV-NEXT: ret i32 111
+// CHECK-NOFMV-LABEL: @fmv_c(
+// CHECK-NOFMV-NEXT: entry:
+// CHECK-NOFMV-NEXT: ret void
+// CHECK-NOFMV-LABEL: @goo(
+// CHECK-NOFMV-NEXT: entry:
+// CHECK-NOFMV-NEXT: [[CALL:%.*]] = call i32 @fmv_inline()
+// CHECK-NOFMV-NEXT: [[CALL1:%.*]] = call i32 @fmv_e()
+// CHECK-NOFMV-NEXT: [[CALL2:%.*]] = call i32 @fmv_d()
+// CHECK-NOFMV-NEXT: call void @fmv_c()
+// CHECK-NOFMV-NEXT: [[CALL3:%.*]] = call i32 @fmv_default()
+// CHECK-NOFMV-NEXT: ret i32 [[CALL3]]
+// CHECK-NOFMV-LABEL: define internal i32 @fmv_d(
+// CHECK-NOFMV-NEXT: entry:
+// CHECK-NOFMV-NEXT: ret i32 1
+// CHECK-NOFMV-LABEL: @recur(
+// CHECK-NOFMV-NEXT: entry:
+// CHECK-NOFMV-NEXT: call void @reca()
+// CHECK-NOFMV-NEXT: ret void
+// CHECK-NOFMV-LABEL: @main(
+// CHECK-NOFMV-NEXT: entry:
+// CHECK-NOFMV-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// CHECK-NOFMV-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// CHECK-NOFMV-NEXT: call void @recur()
+// CHECK-NOFMV-NEXT: [[CALL:%.*]] = call i32 @goo()
+// CHECK-NOFMV-NEXT: ret i32 [[CALL]]
+// CHECK-NOFMV-LABEL: @hoo(
+// CHECK-NOFMV-NEXT: entry:
+// CHECK-NOFMV-NEXT: [[FP1:%.*]] = alloca ptr, align 8
+// CHECK-NOFMV-NEXT: [[FP2:%.*]] = alloca ptr, align 8
+// CHECK-NOFMV-NEXT: call void @f(ptr noundef @fmv)
+// CHECK-NOFMV-NEXT: store ptr @fmv, ptr [[FP1]], align 8
+// CHECK-NOFMV-NEXT: store ptr @fmv, ptr [[FP2]], align 8
+// CHECK-NOFMV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[FP1]], align 8
+// CHECK-NOFMV-NEXT: [[CALL:%.*]] = call i32 [[TMP0]]()
+// CHECK-NOFMV-NEXT: [[TMP1:%.*]] = load ptr, ptr [[FP2]], align 8
+// CHECK-NOFMV-NEXT: [[CALL1:%.*]] = call i32 [[TMP1]]()
+// CHECK-NOFMV-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]]
+// CHECK-NOFMV-NEXT: ret i32 [[ADD]]
+
+// CHECK: attributes #0 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+flagm,+fp-armv8,+fp16fml,+fullfp16,+ls64,+neon,+rand" }
+// CHECK: attributes #1 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+altnzcv,+bf16,+flagm,+fullfp16,+ls64,+sme,+sme-i16i64" }
+// CHECK: attributes #2 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+lse,+neon,+sha2" }
+// CHECK: attributes #3 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+dotprod,+fp-armv8,+fullfp16,+ls64,+neon" }
+// CHECK: attributes #4 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fp16fml,+fullfp16,+ls64,+neon" }
+// CHECK: attributes #5 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+neon" }
+// CHECK: attributes #6 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fullfp16,+ls64" }
+// CHECK: attributes #7 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti,+fullfp16,+ls64" }
+// CHECK: attributes #8 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fullfp16,+ls64,+sme,+sme2" }
+// CHECK: attributes #9 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64" }
+// CHECK: attributes #10 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ccpp,+fullfp16,+ls64" }
+// CHECK: attributes #11 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64" }
+// CHECK: attributes #12 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+aes,+f64mm,+fp-armv8,+fullfp16,+ls64,+neon,+sve" }
+// CHECK: attributes #13 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+complxnum,+fp-armv8,+fullfp16,+ls64,+neon,+sme" }
+// CHECK: attributes #14 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+f32mm,+fp-armv8,+fullfp16,+i8mm,+ls64,+neon,+sha2,+sha3,+sve" }
+// CHECK: attributes #15 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+dit,+fp-armv8,+fullfp16,+ls64,+neon,+sve" }
+// CHECK: attributes #16 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ccpp,+fullfp16,+ls64,+rcpc" }
+// CHECK: attributes #17 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ccdp,+ccpp,+fp-armv8,+fullfp16,+jsconv,+ls64,+neon" }
+// CHECK: attributes #18 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fptoint,+fullfp16,+ls64,+rcpc" }
+// CHECK: attributes #19 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fp-armv8,+fullfp16,+ls64,+neon,+sve" }
+// CHECK: attributes #20 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+neon,+sve,+sve2,+sve2-aes,+sve2-sha3" }
+// CHECK: attributes #21 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+neon,+sve,+sve2,+sve2-aes,+sve2-bitperm" }
+// CHECK: attributes #22 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+mte,+neon,+sve,+sve2,+sve2-sm4" }
+// CHECK: attributes #23 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64,+mte" }
+// CHECK: attributes #24 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64,+sb" }
+
+// CHECK-NOFMV: attributes #0 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" }
+// CHECK-NOFMV: attributes #1 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" }
diff --git a/clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp b/clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp
new file mode 100644
index 0000000000000..ea008562787fe
--- /dev/null
+++ b/clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp
@@ -0,0 +1,195 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs
+// RUN: %clang_cc1 -std=c++11 -triple aarch64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+
+int __attribute__((target_clones("ls64_v+fp16", "default"))) foo_ovl(int) { return 1; }
+int __attribute__((target_clones("ls64_accdata+ls64"))) foo_ovl(void) { return 2; }
+
+int bar() {
+ return foo_ovl(1) + foo_ovl();
+}
+
+template <typename T1, typename T2> struct MyClass {
+ int __attribute__((target_clones("frintts", "ssbs+sme-f64f64"))) foo_tml() { return 1; }
+};
+
+template <typename T> struct MyClass<int, T> {
+ int __attribute__((target_clones("frintts", "ssbs+sme-f64f64"))) foo_tml() { return 2; }
+};
+
+template <typename T> struct MyClass<float, T> {
+ int foo_tml() { return 3; }
+};
+
+template <> struct MyClass<double, float> {
+ int __attribute__((target_clones("default"))) foo_tml() { return 4; }
+};
+
+void run_foo_tml() {
+ MyClass<short, short> Mc1;
+ Mc1.foo_tml();
+ MyClass<int, short> Mc2;
+ Mc2.foo_tml();
+ MyClass<float, short> Mc3;
+ Mc3.foo_tml();
+ MyClass<double, float> Mc4;
+ Mc4.foo_tml();
+}
+
+
+// CHECK: @__aarch64_cpu_features = external dso_local global { i64 }
+// CHECK: @_Z7foo_ovli.ifunc = weak_odr ifunc i32 (i32), ptr @_Z7foo_ovli.resolver
+// CHECK: @_Z7foo_ovlv.ifunc = weak_odr ifunc i32 (), ptr @_Z7foo_ovlv.resolver
+// CHECK: @_ZN7MyClassIssE7foo_tmlEv.ifunc = weak_odr ifunc i32 (ptr), ptr @_ZN7MyClassIssE7foo_tmlEv.resolver
+// CHECK: @_ZN7MyClassIisE7foo_tmlEv.ifunc = weak_odr ifunc i32 (ptr), ptr @_ZN7MyClassIisE7foo_tmlEv.resolver
+
+// CHECK-LABEL: @_Z7foo_ovli._Mfp16Mls64_v(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i32 [[TMP0:%.*]], ptr [[DOTADDR]], align 4
+// CHECK-NEXT: ret i32 1
+// CHECK-LABEL: @_Z7foo_ovli(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i32 [[TMP0:%.*]], ptr [[DOTADDR]], align 4
+// CHECK-NEXT: ret i32 1
+// CHECK-LABEL: @_Z7foo_ovli.resolver(
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4503599627436032
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4503599627436032
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @_Z7foo_ovli._Mfp16Mls64_v
+// CHECK: resolver_else:
+// CHECK-NEXT: ret ptr @_Z7foo_ovli
+// CHECK-LABEL: @_Z7foo_ovlv._Mls64Mls64_accdata(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 2
+// CHECK-LABEL: @_Z7foo_ovlv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 2
+// CHECK-LABEL: @_Z7foo_ovlv.resolver(
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 11258999068426240
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 11258999068426240
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @_Z7foo_ovlv._Mls64Mls64_accdata
+// CHECK: resolver_else:
+// CHECK-NEXT: ret ptr @_Z7foo_ovlv
+// CHECK-LABEL: @_Z3barv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_Z7foo_ovli.ifunc(i32 noundef 1)
+// CHECK-NEXT: [[CALL1:%.*]] = call noundef i32 @_Z7foo_ovlv.ifunc()
+// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]]
+// CHECK-NEXT: ret i32 [[ADD]]
+// CHECK-LABEL: @_Z11run_foo_tmlv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[MC1:%.*]] = alloca [[STRUCT_MYCLASS:%.*]], align 1
+// CHECK-NEXT: [[MC2:%.*]] = alloca [[STRUCT_MYCLASS_0:%.*]], align 1
+// CHECK-NEXT: [[MC3:%.*]] = alloca [[STRUCT_MYCLASS_1:%.*]], align 1
+// CHECK-NEXT: [[MC4:%.*]] = alloca [[STRUCT_MYCLASS_2:%.*]], align 1
+// CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_ZN7MyClassIssE7foo_tmlEv.ifunc(ptr noundef nonnull align 1 dereferenceable(1) [[MC1]])
+// CHECK-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN7MyClassIisE7foo_tmlEv.ifunc(ptr noundef nonnull align 1 dereferenceable(1) [[MC2]])
+// CHECK-NEXT: [[CALL2:%.*]] = call noundef i32 @_ZN7MyClassIfsE7foo_tmlEv(ptr noundef nonnull align 1 dereferenceable(1) [[MC3]])
+// CHECK-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZN7MyClassIdfE7foo_tmlEv(ptr noundef nonnull align 1 dereferenceable(1) [[MC4]])
+// CHECK-NEXT: ret void
+// CHECK-LABEL: @_ZN7MyClassIssE7foo_tmlEv.resolver(
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 36310271995674624
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 36310271995674624
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @_ZN7MyClassIssE7foo_tmlEv._Msme-f64f64Mssbs
+// CHECK: resolver_else:
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 16777216
+// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 16777216
+// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
+// CHECK: resolver_return1:
+// CHECK-NEXT: ret ptr @_ZN7MyClassIssE7foo_tmlEv._Mfrintts
+// CHECK: resolver_else2:
+// CHECK-NEXT: ret ptr @_ZN7MyClassIssE7foo_tmlEv
+// CHECK-LABEL: @_ZN7MyClassIisE7foo_tmlEv.resolver(
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 36310271995674624
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 36310271995674624
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @_ZN7MyClassIisE7foo_tmlEv._Msme-f64f64Mssbs
+// CHECK: resolver_else:
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 16777216
+// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 16777216
+// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
+// CHECK: resolver_return1:
+// CHECK-NEXT: ret ptr @_ZN7MyClassIisE7foo_tmlEv._Mfrintts
+// CHECK: resolver_else2:
+// CHECK-NEXT: ret ptr @_ZN7MyClassIisE7foo_tmlEv
+// CHECK-LABEL: @_ZN7MyClassIfsE7foo_tmlEv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: ret i32 3
+// CHECK-LABEL: @_ZN7MyClassIdfE7foo_tmlEv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: ret i32 4
+// CHECK-LABEL: @_ZN7MyClassIssE7foo_tmlEv._Mfrintts(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: ret i32 1
+// CHECK-LABEL: @_ZN7MyClassIssE7foo_tmlEv._Msme-f64f64Mssbs(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: ret i32 1
+// CHECK-LABEL: @_ZN7MyClassIssE7foo_tmlEv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: ret i32 1
+// CHECK-LABEL: @_ZN7MyClassIisE7foo_tmlEv._Mfrintts(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: ret i32 2
+// CHECK-LABEL: @_ZN7MyClassIisE7foo_tmlEv._Msme-f64f64Mssbs(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: ret i32 2
+// CHECK-LABEL: @_ZN7MyClassIisE7foo_tmlEv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: ret i32 2
+
+// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon" }
+// CHECK: attributes #1 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// CHECK: attributes #2 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ls64" }
+// CHECK: attributes #3 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fptoint" }
+// CHECK: attributes #4 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme,+sme-f64f64" }
diff --git a/clang/test/CodeGenCXX/attr-target-version.cpp b/clang/test/CodeGenCXX/attr-target-version.cpp
new file mode 100644
index 0000000000000..71637cb1320ee
--- /dev/null
+++ b/clang/test/CodeGenCXX/attr-target-version.cpp
@@ -0,0 +1,129 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs
+// RUN: %clang_cc1 -std=c++11 -triple aarch64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+
+int __attribute__((target_version("sme-f64f64+bf16"))) foo(int) { return 1; }
+int __attribute__((target_version("default"))) foo(int) { return 2; }
+int __attribute__((target_version("sm4+ebf16"))) foo(void) { return 3; }
+int __attribute__((target_version("default"))) foo(void) { return 4; }
+
+struct MyClass {
+ int __attribute__((target_version("dotprod"))) goo(int);
+ int __attribute__((target_version("crc"))) goo(int);
+ int __attribute__((target_version("default"))) goo(int);
+};
+
+int __attribute__((target_version("default"))) MyClass::goo(int) { return 1; }
+int __attribute__((target_version("crc"))) MyClass::goo(int) { return 2; }
+int __attribute__((target_version("dotprod"))) MyClass::goo(int) { return 3; }
+
+int bar() {
+ MyClass m;
+ return m.goo(1) + foo(1) + foo();
+}
+
+
+// CHECK: @__aarch64_cpu_features = external dso_local global { i64 }
+// CHECK: @_ZN7MyClass3gooEi.ifunc = weak_odr ifunc i32 (ptr, i32), ptr @_ZN7MyClass3gooEi.resolver
+// CHECK: @_Z3fooi.ifunc = weak_odr ifunc i32 (i32), ptr @_Z3fooi.resolver
+// CHECK: @_Z3foov.ifunc = weak_odr ifunc i32 (), ptr @_Z3foov.resolver
+
+// CHECK-LABEL: @_Z3fooi._Msme-f64f64Mbf16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i32 [[TMP0:%.*]], ptr [[DOTADDR]], align 4
+// CHECK-NEXT: ret i32 1
+// CHECK-LABEL: @_Z3fooi(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i32 [[TMP0:%.*]], ptr [[DOTADDR]], align 4
+// CHECK-NEXT: ret i32 2
+// CHECK-LABEL: @_Z3foov._Msm4Mebf16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 3
+// CHECK-LABEL: @_Z3foov(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 4
+// CHECK-LABEL: @_ZN7MyClass3gooEi(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: store i32 [[TMP0:%.*]], ptr [[DOTADDR]], align 4
+// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: ret i32 1
+// CHECK-LABEL: @_ZN7MyClass3gooEi._Mcrc(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: store i32 [[TMP0:%.*]], ptr [[DOTADDR]], align 4
+// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: ret i32 2
+// CHECK-LABEL: @_ZN7MyClass3gooEi._Mdotprod(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: store i32 [[TMP0:%.*]], ptr [[DOTADDR]], align 4
+// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: ret i32 3
+// CHECK-LABEL: @_Z3barv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[M:%.*]] = alloca [[STRUCT_MYCLASS:%.*]], align 1
+// CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_ZN7MyClass3gooEi.ifunc(ptr noundef nonnull align 1 dereferenceable(1) [[M]], i32 noundef 1)
+// CHECK-NEXT: [[CALL1:%.*]] = call noundef i32 @_Z3fooi.ifunc(i32 noundef 1)
+// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]]
+// CHECK-NEXT: [[CALL2:%.*]] = call noundef i32 @_Z3foov.ifunc()
+// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CALL2]]
+// CHECK-NEXT: ret i32 [[ADD3]]
+// CHECK-LABEL: @_ZN7MyClass3gooEi.resolver(
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1024
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1024
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @_ZN7MyClass3gooEi._Mcrc
+// CHECK: resolver_else:
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 16
+// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 16
+// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
+// CHECK: resolver_return1:
+// CHECK-NEXT: ret ptr @_ZN7MyClass3gooEi._Mdotprod
+// CHECK: resolver_else2:
+// CHECK-NEXT: ret ptr @_ZN7MyClass3gooEi
+// CHECK-LABEL: @_Z3fooi.resolver(
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 36028797153181696
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 36028797153181696
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @_Z3fooi._Msme-f64f64Mbf16
+// CHECK: resolver_else:
+// CHECK-NEXT: ret ptr @_Z3fooi
+// CHECK-LABEL: @_Z3foov.resolver(
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 268435488
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 268435488
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @_Z3foov._Msm4Mebf16
+// CHECK: resolver_else:
+// CHECK-NEXT: ret ptr @_Z3foov
+
+// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme,+sme-f64f64" }
+// CHECK: attributes #1 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// CHECK: attributes #2 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fp-armv8,+neon,+sm4" }
+// CHECK: attributes #3 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc" }
+// CHECK: attributes #4 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+dotprod,+fp-armv8,+neon" }
+
diff --git a/clang/test/Driver/aarch64-features.c b/clang/test/Driver/aarch64-features.c
index 15809e1737985..5c079b62768ae 100644
--- a/clang/test/Driver/aarch64-features.c
+++ b/clang/test/Driver/aarch64-features.c
@@ -6,6 +6,22 @@
// The AArch64 PCS states that chars should be unsigned.
// CHECK: fno-signed-char
+// Check Function Multi Versioning option and rtlib dependency.
+// RUN: %clang -target aarch64-linux-android -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FMV %s
+
+// RUN: %clang -target aarch64-linux-android -rtlib=compiler-rt -mno-fmv \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FMV-OFF %s
+
+// RUN: %clang -target aarch64-linux-gnu \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FMV-OFF %s
+
+// RUN: %clang -target arm64-unknown-linux -rtlib=libgcc \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FMV-OFF %s
+
+// CHECK-FMV-OFF: "-target-feature" "-fmv"
+// CHECK-FMV-NOT: "-target-feature" "-fmv"
+
// Check for AArch64 out-of-line atomics default settings.
// RUN: %clang -target aarch64-linux-android -rtlib=compiler-rt \
// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index 1326cec0a4310..5d4a9fc01ef7f 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -183,6 +183,7 @@
// CHECK-NEXT: TLSModel (SubjectMatchRule_variable_is_thread_local)
// CHECK-NEXT: Target (SubjectMatchRule_function)
// CHECK-NEXT: TargetClones (SubjectMatchRule_function)
+// CHECK-NEXT: TargetVersion (SubjectMatchRule_function)
// CHECK-NEXT: TestTypestate (SubjectMatchRule_function_is_member)
// CHECK-NEXT: TrivialABI (SubjectMatchRule_record)
// CHECK-NEXT: Uninitialized (SubjectMatchRule_variable_is_local)
diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c
index 53b6644ef12e6..3ec31e1024a6c 100644
--- a/clang/test/Preprocessor/aarch64-target-features.c
+++ b/clang/test/Preprocessor/aarch64-target-features.c
@@ -150,8 +150,13 @@
// RUN: %clang -target aarch64 -mtune=cyclone -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MTUNE-CYCLONE %s
// RUN: %clang -target aarch64-none-linux-gnu -march=armv8-a+sve -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE %s
+// CHECK-SVE: __ARM_FEATURE_FP16_SCALAR_ARITHMETIC 1
+// CHECK-SVE: __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 1
// CHECK-SVE: __ARM_FEATURE_SVE 1
// CHECK-SVE: __ARM_FEATURE_SVE_VECTOR_OPERATORS 2
+// CHECK-SVE: __ARM_NEON 1
+// CHECK-SVE: __ARM_NEON_FP 0xE
+// CHECK-SVE: __ARM_NEON_SVE_BRIDGE 1
// RUN: %clang -target aarch64-none-linux-gnu -march=armv8-a+sve+bf16 -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE-BF16 %s
// CHECK-SVE-BF16: __ARM_FEATURE_BF16_SCALAR_ARITHMETIC 1
@@ -190,7 +195,18 @@
// RUN: %clang -target aarch64-none-linux-gnu -march=armv9-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE2 %s
// RUN: %clang -target aarch64-none-linux-gnu -march=armv9-a+sve2 -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE2 %s
+// CHECK-SVE2: __ARM_FEATURE_FP16_SCALAR_ARITHMETIC 1
+// CHECK-SVE2: __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 1
+// CHECK-SVE2: __ARM_FEATURE_SVE 1
// CHECK-SVE2: __ARM_FEATURE_SVE2 1
+// CHECK-SVE2: __ARM_NEON 1
+// CHECK-SVE2: __ARM_NEON_FP 0xE
+// CHECK-SVE2: __ARM_NEON_SVE_BRIDGE 1
+
+// RUN: %clang -target aarch64-none-linux-gnu -march=armv9-a+nosimd -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-NONEON %s
+// CHECK-NONEON-NOT: __ARM_FEATURE_SVE 1
+// CHECK-NONEON-NOT: __ARM_NEON 1
+
// RUN: %clang -target aarch64-none-linux-gnu -march=armv9-a+sve2-aes -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE2AES %s
// CHECK-SVE2AES: __ARM_FEATURE_SVE2_AES 1
// RUN: %clang -target aarch64-none-linux-gnu -march=armv9-a+sve2-sha3 -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE2SHA3 %s
@@ -202,6 +218,8 @@
// RUN: %clang -target aarch64-none-linux-gnu -march=armv8.2a+dotprod -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-DOTPROD %s
// CHECK-DOTPROD: __ARM_FEATURE_DOTPROD 1
+// CHECK-DOTPROD: __ARM_NEON 1
+// CHECK-DOTPROD: __ARM_NEON_FP 0xE
// On ARMv8.2-A and above, +fp16fml implies +fp16.
// On ARMv8.4-A and above, +fp16 implies +fp16fml.
diff --git a/clang/test/Preprocessor/init-aarch64.c b/clang/test/Preprocessor/init-aarch64.c
index 3c36793d824a5..869d7ca8a0636 100644
--- a/clang/test/Preprocessor/init-aarch64.c
+++ b/clang/test/Preprocessor/init-aarch64.c
@@ -112,6 +112,7 @@
// AARCH64-NEXT: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1
// AARCH64_CXX-NEXT: #define __GLIBCXX_BITSIZE_INT_N_0 128
// AARCH64_CXX-NEXT: #define __GLIBCXX_TYPE_INT_N_0 __int128
+// AARCH64-NEXT: #define __HAVE_FUNCTION_MULTI_VERSIONING 1
// AARCH64-NEXT: #define __INT16_C_SUFFIX__
// AARCH64-NEXT: #define __INT16_FMTd__ "hd"
// AARCH64-NEXT: #define __INT16_FMTi__ "hi"
diff --git a/clang/test/Sema/attr-target-clones-aarch64.c b/clang/test/Sema/attr-target-clones-aarch64.c
new file mode 100644
index 0000000000000..dd8e72c421b13
--- /dev/null
+++ b/clang/test/Sema/attr-target-clones-aarch64.c
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -fsyntax-only -verify %s
+
+void __attribute__((target_clones("fp16+sve2-aes", "sb+sve2-sha3"))) no_def(void);
+
+// expected-warning at +1 {{unsupported 'default' in the 'target_clones' attribute string; 'target_clones' attribute ignored}}
+void __attribute__((target_clones("default+sha3"))) warn1(void);
+// expected-warning at +1 {{version list contains entries that don't impact code generation}}
+void __attribute__((target_clones("ssbs+ls64"))) warn2(void);
+
+// expected-error at +2 {{'target_clones' and 'target_version' attributes are not compatible}}
+// expected-note at +1 {{conflicting attribute is here}}
+void __attribute__((target_version("sve-bf16"), target_clones("sme+memtag"))) not_compat(void);
+
+int redecl(void);
+int __attribute__((target_clones("frintts", "simd+fp", "default"))) redecl(void) { return 1; }
+
+int __attribute__((target_clones("jscvt+fcma", "rcpc", "default"))) redecl2(void);
+int __attribute__((target_clones("jscvt+fcma", "rcpc"))) redecl2(void) { return 1; }
+
+int __attribute__((target_clones("sve+dotprod"))) redecl3(void);
+int redecl3(void);
+
+int __attribute__((target_clones("rng", "fp16fml+fp", "default"))) redecl4(void);
+// expected-error at +3 {{'target_clones' attribute does not match previous declaration}}
+// expected-note at -2 {{previous declaration is here}}
+// expected-warning at +1 {{version list contains entries that don't impact code generation}}
+int __attribute__((target_clones("dgh+memtag+rpres+ls64_v", "ebf16+dpb+sha1", "default"))) redecl4(void) { return 1; }
+
+int __attribute__((target_version("flagm2"))) redef2(void) { return 1; }
+// expected-error at +2 {{multiversioning attributes cannot be combined}}
+// expected-note at -2 {{previous declaration is here}}
+int __attribute__((target_clones("flagm2", "default"))) redef2(void) { return 1; }
+
+int __attribute__((target_clones("f32mm", "f64mm", "sha1+fp"))) redef3(void) { return 1; }
+// expected-error at +2 {{'target_clones' attribute does not match previous declaration}}
+// expected-note at -2 {{previous declaration is here}}
+int __attribute__((target_clones("f32mm", "sha1+fp", "f64mm"))) redef3(void) { return 1; }
+
+int __attribute__((target_clones("rdm+lse+rdm", "lse+rdm"))) dup1(void) { return 1; }
+// expected-warning at +1 {{version list contains duplicate entries}}
+int __attribute__((target_clones("rdm+lse+rdm", "rdm+lse+rdm"))) dup2(void) { return 2; }
+// expected-warning at +1 {{version list contains duplicate entries}}
+int __attribute__((target_clones("rcpc2+sve2-pmull128", "rcpc2+sve2-pmull128"))) dup3(void) { return 3; }
+// expected-warning at +1 {{version list contains duplicate entries}}
+void __attribute__((target_clones("sha3", "default", "default"))) dup4(void);
+// expected-warning at +2 {{version list contains duplicate entries}}
+// expected-warning at +1 {{version list contains duplicate entries}}
+int __attribute__((target_clones("fp", "fp", "crc+dotprod", "dotprod+crc"))) dup5(void) { return 5; }
+
+// expected-warning at +1 {{version list contains duplicate entries}}
+int __attribute__((target_clones("fp16+memtag", "memtag+fp16"))) dup6(void) { return 6; }
+int __attribute__((target_clones("simd+ssbs2", "simd+dpb2"))) dup7(void) { return 7; }
+
+// expected-warning at +1 {{unsupported '' in the 'target_clones' attribute string;}}
+void __attribute__((target_clones(""))) empty_target_1(void);
+// expected-warning at +3 {{unsupported 'default' in the 'target_clones' attribute string;}}
+// expected-warning at +2 {{unsupported 'default' in the 'target_clones' attribute string;}}
+// expected-warning at +1 {{version list contains entries that don't impact code generation}}
+void __attribute__((target_clones("default+default"))) empty_target_2(void);
+// expected-warning at +1 {{unsupported '' in the 'target_clones' attribute string;}}
+void __attribute__((target_clones("+sve2")))
+empty_target_3(void);
+// expected-warning at +1 {{unsupported 'bs' in the 'target_clones' attribute string;}}
+void __attribute__((target_clones("sb+bs")))
+empty_target_4(void);
+
+// expected-warning at +1 {{unsupported '' in the 'target_clones' attribute string;}}
+void __attribute__((target_clones("default", "")))
+empty_target_5(void);
+
+// expected-warning at +1 {{version list contains duplicate entries}}
+void __attribute__((target_clones("sve2-bitperm", "sve2-bitperm")))
+dupe_normal(void);
+
+void __attribute__((target_clones("default"), target_clones("memtag3+bti"))) dupe_normal2(void);
+
+int mv_after_use(void);
+int useage(void) {
+ return mv_after_use();
+}
+// expected-error at +1 {{function declaration cannot become a multiversioned function after first usage}}
+int __attribute__((target_clones("sve2-sha3+ssbs2", "sm4"))) mv_after_use(void) { return 1; }
+// expected-error at +1 {{'main' cannot be a multiversioned function}}
+int __attribute__((target_clones("sve-i8mm"))) main() { return 1; }
diff --git a/clang/test/Sema/attr-target-version.c b/clang/test/Sema/attr-target-version.c
new file mode 100644
index 0000000000000..c054b037ee84d
--- /dev/null
+++ b/clang/test/Sema/attr-target-version.c
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -fsyntax-only -verify %s
+
+int __attribute__((target_version("crc"))) dup(void) { return 3; }
+int __attribute__((target_version("default"))) dup(void) { return 1; }
+//expected-error at +2 {{redefinition of 'dup'}}
+//expected-note at -2 {{previous definition is here}}
+int __attribute__((target_version("default"))) dup(void) { return 2; }
+
+int __attribute__((target_version("default"))) dup1(void) { return 1; }
+//expected-error at +2 {{redefinition of 'dup1'}}
+//expected-note at -2 {{previous definition is here}}
+int dup1(void) { return 2; }
+
+int __attribute__((target_version("aes"))) foo(void) { return 1; }
+//expected-note at +1 {{previous definition is here}}
+int __attribute__((target_version("default"))) foo(void) { return 2; }
+
+//expected-note at +1 {{previous declaration is here}}
+int __attribute__((target_version("sha3 + pmull "))) foo(void) { return 1; }
+
+//expected-error at +1 {{multiversioning attributes cannot be combined}}
+int __attribute__((target("dotprod"))) foo(void) { return -1; }
+
+//expected-error at +1 {{redefinition of 'foo'}}
+int foo(void) { return 2; }
+
+//expected-note at +1 {{previous declaration is here}}
+void __attribute__((target_version("bti+flagm2"))) one(void) {}
+//expected-error at +1 {{multiversioned function redeclarations require identical target attributes}}
+void __attribute__((target_version("flagm2+bti"))) one(void) {}
+
+void __attribute__((target_version("ssbs+sha1"))) two(void) {}
+void __attribute__((target_version("ssbs+fp16fml"))) two(void) {}
+
+//expected-error at +1 {{'main' cannot be a multiversioned function}}
+int __attribute__((target_version("lse"))) main(void) { return 1; }
+
+//expected-note at +1 {{previous definition is here}}
+int hoo(void) { return 1; }
+//expected-note at -1 {{previous definition is here}}
+//expected-warning at +2 {{attribute declaration must precede definition}}
+//expected-error at +1 {{redefinition of 'hoo'}}
+int __attribute__((target_version("dit"))) hoo(void) { return 2; }
+
+//expected-warning at +1 {{unsupported '' in the 'target_version' attribute string; 'target_version' attribute ignored}}
+int __attribute__((target_version(""))) unsup1(void) { return 1; }
+//expected-warning at +1 {{unsupported 'crc32' in the 'target_version' attribute string; 'target_version' attribute ignored}}
+void __attribute__((target_version("crc32"))) unsup2(void) {}
+
+void __attribute__((target_version("default+fp16"))) koo(void) {}
+void __attribute__((target_version("default+default+default"))) loo(void) {}
+void __attribute__((target_version("rdm+rng+crc"))) redef(void) {}
+//expected-error at +2 {{redefinition of 'redef'}}
+//expected-note at -2 {{previous definition is here}}
+void __attribute__((target_version("rdm+rng+crc"))) redef(void) {}
+
+int __attribute__((target_version("sm4"))) def(void);
+void __attribute__((target_version("dit"))) nodef(void);
+void __attribute__((target_version("ls64"))) nodef(void);
+void __attribute__((target_version("aes"))) ovl(void);
+void __attribute__((target_version("default"))) ovl(void);
+int bar() {
+ // expected-error at +2 {{reference to overloaded function could not be resolved; did you mean to call it?}}
+ // expected-note at -3 {{possible target for call}}
+ ovl++;
+ // expected-error at +1 {{no matching function for call to 'nodef'}}
+ nodef();
+ return def();
+}
+// expected-error at +1 {{function declaration cannot become a multiversioned function after first usage}}
+int __attribute__((target_version("sha1"))) def(void) { return 1; }
+
+int __attribute__((target_version("sve"))) prot();
+// expected-error at -1 {{multiversioned function must have a prototype}}
+// expected-note at +1 {{function multiversioning caused by this declaration}}
+int __attribute__((target_version("fcma"))) prot();
+
+int __attribute__((target_version("pmull"))) rtype(int);
+// expected-error at +1 {{multiversioned function declaration has a
diff erent return type}}
+float __attribute__((target_version("rdm"))) rtype(int);
+
+int __attribute__((target_version("sha2"))) combine(void) { return 1; }
+// expected-error at +1 {{multiversioned function declaration has a
diff erent calling convention}}
+int __attribute__((aarch64_vector_pcs, target_version("sha3"))) combine(void) { return 2; }
diff --git a/clang/test/SemaCXX/attr-target-clones-aarch64.cpp b/clang/test/SemaCXX/attr-target-clones-aarch64.cpp
new file mode 100644
index 0000000000000..8eddfcea390c2
--- /dev/null
+++ b/clang/test/SemaCXX/attr-target-clones-aarch64.cpp
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -fsyntax-only -verify -fexceptions -fcxx-exceptions %s -std=c++14
+
+void lambda() {
+ // expected-error at +1 {{attribute 'target_clones' multiversioned functions do not yet support lambdas}}
+ auto x = []() __attribute__((target_clones("default"))){};
+ x();
+ // expected-error at +1 {{attribute 'target_clones' multiversioned functions do not yet support lambdas}}
+ auto y = []() __attribute__((target_clones("fp16+lse", "rdm"))){};
+ y();
+}
diff --git a/clang/test/SemaCXX/attr-target-version.cpp b/clang/test/SemaCXX/attr-target-version.cpp
new file mode 100644
index 0000000000000..da24503f775f5
--- /dev/null
+++ b/clang/test/SemaCXX/attr-target-version.cpp
@@ -0,0 +1,101 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -fsyntax-only -verify -fexceptions -fcxx-exceptions %s -std=c++14
+void __attribute__((target_version("default"))) wrong_tv(void);
+//expected-warning at +1 {{unsupported 'vmull' in the 'target_version' attribute string; 'target_version' attribute ignored}}
+void __attribute__((target_version("vmull"))) wrong_tv(void);
+
+void __attribute__((target_version("dotprod"))) no_def(void);
+void __attribute__((target_version("rdm+fp"))) no_def(void);
+
+// expected-error at +1 {{no matching function for call to 'no_def'}}
+void foo(void) { no_def(); }
+
+constexpr int __attribute__((target_version("sve2")))
diff _const(void) { return 1; }
+//expected-error at +1 {{multiversioned function declaration has a
diff erent constexpr specification}}
+int __attribute__((target_version("sve2-bitperm")))
diff _const(void);
+
+int __attribute__((target_version("fp")))
diff _const1(void) { return 1; }
+//expected-error at +1 {{multiversioned function declaration has a
diff erent constexpr specification}}
+constexpr int __attribute__((target_version("sve2-aes")))
diff _const1(void);
+
+static int __attribute__((target_version("sve2-sha3")))
diff _link(void) { return 1; }
+//expected-error at +1 {{multiversioned function declaration has a
diff erent linkage}}
+int __attribute__((target_version("dpb")))
diff _link(void);
+
+int __attribute__((target_version("memtag")))
diff _link1(void) { return 1; }
+//expected-error at +1 {{multiversioned function declaration has a
diff erent linkage}}
+static int __attribute__((target_version("bti")))
diff _link1(void);
+
+int __attribute__((target_version("flagm2")))
diff _link2(void) { return 1; }
+extern int __attribute__((target_version("flagm")))
diff _link2(void);
+
+namespace {
+static int __attribute__((target_version("memtag3")))
diff _link2(void) { return 2; }
+int __attribute__((target_version("sve2-bitperm")))
diff _link2(void) { return 1; }
+} // namespace
+
+inline int __attribute__((target_version("sme")))
diff _inline(void) { return 1; }
+//expected-error at +1 {{multiversioned function declaration has a
diff erent inline specification}}
+int __attribute__((target_version("fp16")))
diff _inline(void) { return 2; }
+
+inline int __attribute__((target_version("sme")))
diff _inline1(void) { return 1; }
+int __attribute__((target_version("default")))
diff _inline1(void) { return 2; }
+
+int __attribute__((target_version("fcma")))
diff _type1(void) { return 1; }
+//expected-error at +1 {{multiversioned function declaration has a
diff erent return type}}
+double __attribute__((target_version("rcpc")))
diff _type1(void);
+
+auto __attribute__((target_version("rcpc2")))
diff _type2(void) -> int { return 1; }
+//expected-error at +1 {{multiversioned function declaration has a
diff erent return type}}
+auto __attribute__((target_version("sve-bf16")))
diff _type2(void) -> long { return (long)1; }
+
+int __attribute__((target_version("fp16fml")))
diff _type3(void) noexcept(false) { return 1; }
+//expected-error at +2 {{exception specification in declaration does not match previous declaration}}
+//expected-note at -2 {{previous declaration is here}}
+int __attribute__((target_version("sve2-sha3")))
diff _type3(void) noexcept(true) { return 2; }
+
+template <typename T> int __attribute__((target_version("default"))) temp(T) { return 1; }
+
+template <typename T> int __attribute__((target_version("simd"))) temp1(T) { return 1; }
+// expected-error at +1 {{attribute 'target_version' multiversioned functions do not yet support function templates}}
+template <typename T> int __attribute__((target_version("sha3"))) temp1(T) { return 2; }
+
+extern "C" {
+int __attribute__((target_version("aes"))) extc(void) { return 1; }
+}
+//expected-error at +1 {{multiversioned function declaration has a
diff erent language linkage}}
+int __attribute__((target_version("lse"))) extc(void) { return 1; }
+
+auto __attribute__((target_version("default"))) ret1(void) { return 1; }
+auto __attribute__((target_version("dpb"))) ret2(void) { return 1; }
+auto __attribute__((target_version("dpb2"))) ret3(void) -> int { return 1; }
+
+class Cls {
+ __attribute__((target_version("rng"))) Cls();
+ __attribute__((target_version("sve-i8mm"))) ~Cls();
+
+ Cls &__attribute__((target_version("f32mm"))) operator=(const Cls &) = default;
+ Cls &__attribute__((target_version("ssbs"))) operator=(Cls &&) = delete;
+
+ virtual void __attribute__((target_version("default"))) vfunc();
+ virtual void __attribute__((target_version("sm4"))) vfunc1();
+};
+
+__attribute__((target_version("sha3"))) void Decl();
+namespace Nms {
+using ::Decl;
+// expected-error at +3 {{declaration conflicts with target of using declaration already in scope}}
+// expected-note at -4 {{target of using declaration}}
+// expected-note at -3 {{using declaration}}
+__attribute__((target_version("jscvt"))) void Decl();
+} // namespace Nms
+
+class Out {
+ int __attribute__((target_version("bti"))) func(void);
+ int __attribute__((target_version("ssbs2"))) func(void);
+};
+int __attribute__((target_version("bti"))) Out::func(void) { return 1; }
+int __attribute__((target_version("ssbs2"))) Out::func(void) { return 2; }
+// expected-error at +3 {{out-of-line definition of 'func' does not match any declaration in 'Out'}}
+// expected-note at -3 {{member declaration nearly matches}}
+// expected-note at -3 {{member declaration nearly matches}}
+int __attribute__((target_version("rng"))) Out::func(void) { return 3; }
diff --git a/compiler-rt/lib/builtins/cpu_model.c b/compiler-rt/lib/builtins/cpu_model.c
index 59bdff699644c..e377d6458a0f7 100644
--- a/compiler-rt/lib/builtins/cpu_model.c
+++ b/compiler-rt/lib/builtins/cpu_model.c
@@ -9,7 +9,7 @@
// This file is based on LLVM's lib/Support/Host.cpp.
// It implements the operating system Host concept and builtin
// __cpu_model for the compiler_rt library for x86 and
-// __aarch64_have_lse_atomics for AArch64.
+// __aarch64_have_lse_atomics, __aarch64_cpu_features for AArch64.
//
//===----------------------------------------------------------------------===//
@@ -837,6 +837,76 @@ int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
return 0;
}
#elif defined(__aarch64__)
+// CPUFeatures must correspond to the same AArch64 features in
+// AArch64TargetParser.h
+enum CPUFeatures {
+ FEAT_RNG,
+ FEAT_FLAGM,
+ FEAT_FLAGM2,
+ FEAT_FP16FML,
+ FEAT_DOTPROD,
+ FEAT_SM4,
+ FEAT_RDM,
+ FEAT_LSE,
+ FEAT_FP,
+ FEAT_SIMD,
+ FEAT_CRC,
+ FEAT_SHA1,
+ FEAT_SHA2,
+ FEAT_SHA3,
+ FEAT_AES,
+ FEAT_PMULL,
+ FEAT_FP16,
+ FEAT_DIT,
+ FEAT_DPB,
+ FEAT_DPB2,
+ FEAT_JSCVT,
+ FEAT_FCMA,
+ FEAT_RCPC,
+ FEAT_RCPC2,
+ FEAT_FRINTTS,
+ FEAT_DGH,
+ FEAT_I8MM,
+ FEAT_BF16,
+ FEAT_EBF16,
+ FEAT_RPRES,
+ FEAT_SVE,
+ FEAT_SVE_BF16,
+ FEAT_SVE_EBF16,
+ FEAT_SVE_I8MM,
+ FEAT_SVE_F32MM,
+ FEAT_SVE_F64MM,
+ FEAT_SVE2,
+ FEAT_SVE_AES,
+ FEAT_SVE_PMULL128,
+ FEAT_SVE_BITPERM,
+ FEAT_SVE_SHA3,
+ FEAT_SVE_SM4,
+ FEAT_SME,
+ FEAT_MEMTAG,
+ FEAT_MEMTAG2,
+ FEAT_MEMTAG3,
+ FEAT_SB,
+ FEAT_PREDRES,
+ FEAT_SSBS,
+ FEAT_SSBS2,
+ FEAT_BTI,
+ FEAT_LS64,
+ FEAT_LS64_V,
+ FEAT_LS64_ACCDATA,
+ FEAT_WFXT,
+ FEAT_SME_F64,
+ FEAT_SME_I64,
+ FEAT_SME2,
+ FEAT_MAX
+};
+// Architecture features used
+// in Function Multi Versioning
+struct {
+ unsigned long long features;
+ // As features grows new fields could be added
+} __aarch64_cpu_features __attribute__((visibility("hidden"), nocommon));
+
// LSE support detection for out-of-line atomics
// using HWCAP and Auxiliary vector
_Bool __aarch64_have_lse_atomics
@@ -844,12 +914,174 @@ _Bool __aarch64_have_lse_atomics
#if defined(__has_include)
#if __has_include(<sys/auxv.h>)
#include <sys/auxv.h>
+#if __has_include(<asm/hwcap.h>)
+#include <asm/hwcap.h>
+
#ifndef AT_HWCAP
#define AT_HWCAP 16
#endif
+
+#ifndef HWCAP_FP
+#define HWCAP_FP (1 << 0)
+#endif
+#ifndef HWCAP_ASIMD
+#define HWCAP_ASIMD (1 << 1)
+#endif
+#ifndef HWCAP_AES
+#define HWCAP_AES (1 << 3)
+#endif
+#ifndef HWCAP_PMULL
+#define HWCAP_PMULL (1 << 4)
+#endif
+#ifndef HWCAP_SHA1
+#define HWCAP_SHA1 (1 << 5)
+#endif
+#ifndef HWCAP_SHA2
+#define HWCAP_SHA2 (1 << 6)
+#endif
#ifndef HWCAP_ATOMICS
#define HWCAP_ATOMICS (1 << 8)
#endif
+#ifndef HWCAP_FPHP
+#define HWCAP_FPHP (1 << 9)
+#endif
+#ifndef HWCAP_ASIMDHP
+#define HWCAP_ASIMDHP (1 << 10)
+#endif
+#ifndef HWCAP_ASIMDRDM
+#define HWCAP_ASIMDRDM (1 << 12)
+#endif
+#ifndef HWCAP_JSCVT
+#define HWCAP_JSCVT (1 << 13)
+#endif
+#ifndef HWCAP_FCMA
+#define HWCAP_FCMA (1 << 14)
+#endif
+#ifndef HWCAP_LRCPC
+#define HWCAP_LRCPC (1 << 15)
+#endif
+#ifndef HWCAP_DCPOP
+#define HWCAP_DCPOP (1 << 16)
+#endif
+#ifndef HWCAP_SHA3
+#define HWCAP_SHA3 (1 << 17)
+#endif
+#ifndef HWCAP_SM3
+#define HWCAP_SM3 (1 << 18)
+#endif
+#ifndef HWCAP_SM4
+#define HWCAP_SM4 (1 << 19)
+#endif
+#ifndef HWCAP_ASIMDDP
+#define HWCAP_ASIMDDP (1 << 20)
+#endif
+#ifndef HWCAP_SHA512
+#define HWCAP_SHA512 (1 << 21)
+#endif
+#ifndef HWCAP_SVE
+#define HWCAP_SVE (1 << 22)
+#endif
+#ifndef HWCAP_ASIMDFHM
+#define HWCAP_ASIMDFHM (1 << 23)
+#endif
+#ifndef HWCAP_DIT
+#define HWCAP_DIT (1 << 24)
+#endif
+#ifndef HWCAP_ILRCPC
+#define HWCAP_ILRCPC (1 << 26)
+#endif
+#ifndef HWCAP_FLAGM
+#define HWCAP_FLAGM (1 << 27)
+#endif
+#ifndef HWCAP_SSBS
+#define HWCAP_SSBS (1 << 28)
+#endif
+#ifndef HWCAP_SB
+#define HWCAP_SB (1 << 29)
+#endif
+
+#ifndef HWCAP2_DCPODP
+#define HWCAP2_DCPODP (1 << 0)
+#endif
+#ifndef HWCAP2_SVE2
+#define HWCAP2_SVE2 (1 << 1)
+#endif
+#ifndef HWCAP2_SVEAES
+#define HWCAP2_SVEAES (1 << 2)
+#endif
+#ifndef HWCAP2_SVEPMULL
+#define HWCAP2_SVEPMULL (1 << 3)
+#endif
+#ifndef HWCAP2_SVEBITPERM
+#define HWCAP2_SVEBITPERM (1 << 4)
+#endif
+#ifndef HWCAP2_SVESHA3
+#define HWCAP2_SVESHA3 (1 << 5)
+#endif
+#ifndef HWCAP2_SVESM4
+#define HWCAP2_SVESM4 (1 << 6)
+#endif
+#ifndef HWCAP2_FLAGM2
+#define HWCAP2_FLAGM2 (1 << 7)
+#endif
+#ifndef HWCAP2_FRINT
+#define HWCAP2_FRINT (1 << 8)
+#endif
+#ifndef HWCAP2_SVEI8MM
+#define HWCAP2_SVEI8MM (1 << 9)
+#endif
+#ifndef HWCAP2_SVEF32MM
+#define HWCAP2_SVEF32MM (1 << 10)
+#endif
+#ifndef HWCAP2_SVEF64MM
+#define HWCAP2_SVEF64MM (1 << 11)
+#endif
+#ifndef HWCAP2_SVEBF16
+#define HWCAP2_SVEBF16 (1 << 12)
+#endif
+#ifndef HWCAP2_I8MM
+#define HWCAP2_I8MM (1 << 13)
+#endif
+#ifndef HWCAP2_BF16
+#define HWCAP2_BF16 (1 << 14)
+#endif
+#ifndef HWCAP2_DGH
+#define HWCAP2_DGH (1 << 15)
+#endif
+#ifndef HWCAP2_RNG
+#define HWCAP2_RNG (1 << 16)
+#endif
+#ifndef HWCAP2_BTI
+#define HWCAP2_BTI (1 << 17)
+#endif
+#ifndef HWCAP2_MTE
+#define HWCAP2_MTE (1 << 18)
+#endif
+#ifndef HWCAP2_RPRES
+#define HWCAP2_RPRES (1 << 21)
+#endif
+#ifndef HWCAP2_MTE3
+#define HWCAP2_MTE3 (1 << 22)
+#endif
+#ifndef HWCAP2_SME
+#define HWCAP2_SME (1 << 23)
+#endif
+#ifndef HWCAP2_SME_I16I64
+#define HWCAP2_SME_I16I64 (1 << 24)
+#endif
+#ifndef HWCAP2_SME_F64F64
+#define HWCAP2_SME_F64F64 (1 << 25)
+#endif
+#ifndef HWCAP2_WFXT
+#define HWCAP2_WFXT (1UL << 31)
+#endif
+#ifndef HWCAP2_EBF16
+#define HWCAP2_EBF16 (1UL << 32)
+#endif
+#ifndef HWCAP2_SVE_EBF16
+#define HWCAP2_SVE_EBF16 (1UL << 33)
+#endif
+
#if defined(__ANDROID__)
#include <string.h>
#include <sys/system_properties.h>
@@ -857,6 +1089,13 @@ _Bool __aarch64_have_lse_atomics
#include <zircon/features.h>
#include <zircon/syscalls.h>
#endif
+
+// Detect Exynos 9810 CPU
+#define IF_EXYNOS9810 \
+ char arch[PROP_VALUE_MAX]; \
+ if (__system_property_get("ro.arch", arch) > 0 && \
+ strncmp(arch, "exynos9810", sizeof("exynos9810") - 1) == 0)
+
static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
#if defined(__FreeBSD__)
unsigned long hwcap;
@@ -875,25 +1114,233 @@ static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
_Bool result = (hwcap & HWCAP_ATOMICS) != 0;
#if defined(__ANDROID__)
if (result) {
- char arch[PROP_VALUE_MAX];
- if (__system_property_get("ro.arch", arch) > 0 &&
- strncmp(arch, "exynos9810", sizeof("exynos9810") - 1) == 0) {
- // Some cores in the Exynos 9810 CPU are ARMv8.2 and others are ARMv8.0;
- // only the former support LSE atomics. However, the kernel in the
- // initial Android 8.0 release of Galaxy S9/S9+ devices incorrectly
- // reported the feature as being supported.
- //
- // The kernel appears to have been corrected to mark it unsupported as of
- // the Android 9.0 release on those devices, and this issue has not been
- // observed anywhere else. Thus, this workaround may be removed if
- // compiler-rt ever drops support for Android 8.0.
- result = false;
- }
+ // Some cores in the Exynos 9810 CPU are ARMv8.2 and others are ARMv8.0;
+ // only the former support LSE atomics. However, the kernel in the
+ // initial Android 8.0 release of Galaxy S9/S9+ devices incorrectly
+ // reported the feature as being supported.
+ //
+ // The kernel appears to have been corrected to mark it unsupported as of
+ // the Android 9.0 release on those devices, and this issue has not been
+ // observed anywhere else. Thus, this workaround may be removed if
+ // compiler-rt ever drops support for Android 8.0.
+ IF_EXYNOS9810 result = false;
}
#endif // defined(__ANDROID__)
__aarch64_have_lse_atomics = result;
#endif // defined(__FreeBSD__)
}
+
+void init_cpu_features_resolver(unsigned long hwcap, unsigned long hwcap2) {
+#define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F
+#define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr))
+#define extractBits(val, start, number) \
+ (val & ((1ULL << number) - 1ULL) << start) >> start
+ if (hwcap & HWCAP_CRC32)
+ setCPUFeature(FEAT_CRC);
+ if (hwcap & HWCAP_PMULL)
+ setCPUFeature(FEAT_PMULL);
+ if (hwcap & HWCAP_FLAGM)
+ setCPUFeature(FEAT_FLAGM);
+ if (hwcap2 & HWCAP2_FLAGM2) {
+ setCPUFeature(FEAT_FLAGM);
+ setCPUFeature(FEAT_FLAGM2);
+ }
+ if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4)
+ setCPUFeature(FEAT_SM4);
+ if (hwcap & HWCAP_ASIMDDP)
+ setCPUFeature(FEAT_DOTPROD);
+ if (hwcap & HWCAP_ASIMDFHM)
+ setCPUFeature(FEAT_FP16FML);
+ if (hwcap & HWCAP_FPHP) {
+ setCPUFeature(FEAT_FP16);
+ setCPUFeature(FEAT_FP);
+ }
+ if (hwcap & HWCAP_DIT)
+ setCPUFeature(FEAT_DIT);
+ if (hwcap & HWCAP_ASIMDRDM)
+ setCPUFeature(FEAT_RDM);
+ if (hwcap & HWCAP_ILRCPC)
+ setCPUFeature(FEAT_RCPC2);
+ if (hwcap & HWCAP_AES)
+ setCPUFeature(FEAT_AES);
+ if (hwcap & HWCAP_SHA1)
+ setCPUFeature(FEAT_SHA1);
+ if (hwcap & HWCAP_SHA2)
+ setCPUFeature(FEAT_SHA2);
+ if (hwcap & HWCAP_JSCVT)
+ setCPUFeature(FEAT_JSCVT);
+ if (hwcap & HWCAP_FCMA)
+ setCPUFeature(FEAT_FCMA);
+ if (hwcap & HWCAP_SB)
+ setCPUFeature(FEAT_SB);
+ if (hwcap & HWCAP_SSBS)
+ setCPUFeature(FEAT_SSBS2);
+ if (hwcap2 & HWCAP2_MTE) {
+ setCPUFeature(FEAT_MEMTAG);
+ setCPUFeature(FEAT_MEMTAG2);
+ }
+ if (hwcap2 & HWCAP2_MTE3) {
+ setCPUFeature(FEAT_MEMTAG);
+ setCPUFeature(FEAT_MEMTAG2);
+ setCPUFeature(FEAT_MEMTAG3);
+ }
+ if (hwcap2 & HWCAP2_SVEAES)
+ setCPUFeature(FEAT_SVE_AES);
+ if (hwcap2 & HWCAP2_SVEPMULL) {
+ setCPUFeature(FEAT_SVE_AES);
+ setCPUFeature(FEAT_SVE_PMULL128);
+ }
+ if (hwcap2 & HWCAP2_SVEBITPERM)
+ setCPUFeature(FEAT_SVE_BITPERM);
+ if (hwcap2 & HWCAP2_SVESHA3)
+ setCPUFeature(FEAT_SVE_SHA3);
+ if (hwcap2 & HWCAP2_SVESM4)
+ setCPUFeature(FEAT_SVE_SM4);
+ if (hwcap2 & HWCAP2_DCPODP)
+ setCPUFeature(FEAT_DPB2);
+ if (hwcap & HWCAP_ATOMICS)
+ setCPUFeature(FEAT_LSE);
+ if (hwcap2 & HWCAP2_RNG)
+ setCPUFeature(FEAT_RNG);
+ if (hwcap2 & HWCAP2_I8MM)
+ setCPUFeature(FEAT_I8MM);
+ if (hwcap2 & HWCAP2_EBF16)
+ setCPUFeature(FEAT_EBF16);
+ if (hwcap2 & HWCAP2_SVE_EBF16)
+ setCPUFeature(FEAT_SVE_EBF16);
+ if (hwcap2 & HWCAP2_DGH)
+ setCPUFeature(FEAT_DGH);
+ if (hwcap2 & HWCAP2_FRINT)
+ setCPUFeature(FEAT_FRINTTS);
+ if (hwcap2 & HWCAP2_SVEI8MM)
+ setCPUFeature(FEAT_SVE_I8MM);
+ if (hwcap2 & HWCAP2_SVEF32MM)
+ setCPUFeature(FEAT_SVE_F32MM);
+ if (hwcap2 & HWCAP2_SVEF64MM)
+ setCPUFeature(FEAT_SVE_F64MM);
+ if (hwcap2 & HWCAP2_BTI)
+ setCPUFeature(FEAT_BTI);
+ if (hwcap2 & HWCAP2_RPRES)
+ setCPUFeature(FEAT_RPRES);
+ if (hwcap2 & HWCAP2_WFXT)
+ setCPUFeature(FEAT_WFXT);
+ if (hwcap2 & HWCAP2_SME)
+ setCPUFeature(FEAT_SME);
+ if (hwcap2 & HWCAP2_SME_I16I64)
+ setCPUFeature(FEAT_SME_I64);
+ if (hwcap2 & HWCAP2_SME_F64F64)
+ setCPUFeature(FEAT_SME_F64);
+ if (hwcap & HWCAP_CPUID) {
+ unsigned long ftr;
+ getCPUFeature(ID_AA64PFR1_EL1, ftr);
+ // ID_AA64PFR1_EL1.MTE >= 0b0001
+ if (extractBits(ftr, 8, 4) >= 0x1)
+ setCPUFeature(FEAT_MEMTAG);
+ // ID_AA64PFR1_EL1.SSBS == 0b0001
+ if (extractBits(ftr, 4, 4) == 0x1)
+ setCPUFeature(FEAT_SSBS);
+ // ID_AA64PFR1_EL1.SME == 0b0010
+ if (extractBits(ftr, 24, 4) == 0x2)
+ setCPUFeature(FEAT_SME2);
+ getCPUFeature(ID_AA64PFR0_EL1, ftr);
+ // ID_AA64PFR0_EL1.FP != 0b1111
+ if (extractBits(ftr, 16, 4) != 0xF) {
+ setCPUFeature(FEAT_FP);
+ // ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP
+ setCPUFeature(FEAT_SIMD);
+ }
+ // ID_AA64PFR0_EL1.SVE != 0b0000
+ if (extractBits(ftr, 32, 4) != 0x0) {
+ // get ID_AA64ZFR0_EL1, that name supported
+ // if sve enabled only
+ getCPUFeature(S3_0_C0_C4_4, ftr);
+ // ID_AA64ZFR0_EL1.SVEver == 0b0000
+ if (extractBits(ftr, 0, 4) == 0x0)
+ setCPUFeature(FEAT_SVE);
+ // ID_AA64ZFR0_EL1.SVEver == 0b0001
+ if (extractBits(ftr, 0, 4) == 0x1)
+ setCPUFeature(FEAT_SVE2);
+ // ID_AA64ZFR0_EL1.BF16 != 0b0000
+ if (extractBits(ftr, 20, 4) != 0x0)
+ setCPUFeature(FEAT_SVE_BF16);
+ }
+ getCPUFeature(ID_AA64ISAR0_EL1, ftr);
+ // ID_AA64ISAR0_EL1.SHA3 != 0b0000
+ if (extractBits(ftr, 32, 4) != 0x0)
+ setCPUFeature(FEAT_SHA3);
+ getCPUFeature(ID_AA64ISAR1_EL1, ftr);
+ // ID_AA64ISAR1_EL1.DPB >= 0b0001
+ if (extractBits(ftr, 0, 4) >= 0x1)
+ setCPUFeature(FEAT_DPB);
+ // ID_AA64ISAR1_EL1.LRCPC != 0b0000
+ if (extractBits(ftr, 20, 4) != 0x0)
+ setCPUFeature(FEAT_RCPC);
+ // ID_AA64ISAR1_EL1.SPECRES == 0b0001
+ if (extractBits(ftr, 40, 4) == 0x2)
+ setCPUFeature(FEAT_PREDRES);
+ // ID_AA64ISAR1_EL1.BF16 != 0b0000
+ if (extractBits(ftr, 44, 4) != 0x0)
+ setCPUFeature(FEAT_BF16);
+ // ID_AA64ISAR1_EL1.LS64 >= 0b0001
+ if (extractBits(ftr, 60, 4) >= 0x1)
+ setCPUFeature(FEAT_LS64);
+ // ID_AA64ISAR1_EL1.LS64 >= 0b0010
+ if (extractBits(ftr, 60, 4) >= 0x2)
+ setCPUFeature(FEAT_LS64_V);
+ // ID_AA64ISAR1_EL1.LS64 >= 0b0011
+ if (extractBits(ftr, 60, 4) >= 0x3)
+ setCPUFeature(FEAT_LS64_ACCDATA);
+ } else {
+ // Set some features in case of no CPUID support
+ if (hwcap & (HWCAP_FP | HWCAP_FPHP)) {
+ setCPUFeature(FEAT_FP);
+ // FP and AdvSIMD fields have the same value
+ setCPUFeature(FEAT_SIMD);
+ }
+ if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP)
+ setCPUFeature(FEAT_DPB);
+ if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC)
+ setCPUFeature(FEAT_RCPC);
+ if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16)
+ setCPUFeature(FEAT_BF16);
+ if (hwcap2 & HWCAP2_SVEBF16)
+ setCPUFeature(FEAT_SVE_BF16);
+ if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE)
+ setCPUFeature(FEAT_SVE2);
+ if (hwcap & HWCAP_SHA3)
+ setCPUFeature(FEAT_SHA3);
+ }
+}
+
+void CONSTRUCTOR_ATTRIBUTE init_cpu_features(void) {
+ unsigned long hwcap;
+ unsigned long hwcap2;
+ // CPU features already initialized.
+ if (__aarch64_cpu_features.features)
+ return;
+ setCPUFeature(FEAT_MAX);
+#if defined(__FreeBSD__)
+ int res = 0;
+ res = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap);
+ res |= elf_aux_info(AT_HWCAP2, &hwcap2, sizeof hwcap2);
+ if (res)
+ return;
+#else
+#if defined(__ANDROID__)
+ // Don't set any CPU features,
+ // detection could be wrong on Exynos 9810.
+ IF_EXYNOS9810 return;
+#endif // defined(__ANDROID__)
+ hwcap = getauxval(AT_HWCAP);
+ hwcap2 = getauxval(AT_HWCAP2);
+#endif // defined(__FreeBSD__)
+ init_cpu_features_resolver(hwcap, hwcap2);
+#undef extractBits
+#undef getCPUFeature
+#undef setCPUFeature
+#undef IF_EXYNOS9810
+}
#endif // defined(__has_include)
#endif // __has_include(<sys/auxv.h>)
+#endif // __has_include(<asm/hwcap.h>)
#endif // defined(__aarch64__)
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.def b/llvm/include/llvm/TargetParser/AArch64TargetParser.def
index 4ac5d2b99c31c..e7fadc98a9c0d 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.def
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.def
@@ -101,65 +101,152 @@ AARCH64_ARCH("armv8-r", ARMV8R, "+v8r",
#undef AARCH64_ARCH
#ifndef AARCH64_ARCH_EXT_NAME
-#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE)
+#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE,\
+ FMV_ID, DEP_FEATURES, FMV_PRIORITY)
#endif
// FIXME: This would be nicer were it tablegen
-AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, {}, {})
-AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, {}, {})
-AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc")
-AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse")
-AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm")
-AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto", "-crypto")
-AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4")
-AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3")
-AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2")
-AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes")
-AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod", "-dotprod")
-AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8")
-AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon")
-AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16")
-AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml")
-AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe")
-AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras")
-AARCH64_ARCH_EXT_NAME("rasv2", AArch64::AEK_RASv2, "+rasv2", "-rasv2")
-AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve")
-AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2")
-AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes", "-sve2-aes")
-AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", "-sve2-sm4")
-AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3")
-AARCH64_ARCH_EXT_NAME("sve2-bitperm", AArch64::AEK_SVE2BITPERM, "+sve2-bitperm", "-sve2-bitperm")
-AARCH64_ARCH_EXT_NAME("sve2p1", AArch64::AEK_SVE2p1, "+sve2p1", "-sve2p1")
-AARCH64_ARCH_EXT_NAME("b16b16", AArch64::AEK_B16B16, "+b16b16", "-b16b16")
-AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc")
-AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand")
-AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte")
-AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs")
-AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb")
-AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres")
-AARCH64_ARCH_EXT_NAME("bf16", AArch64::AEK_BF16, "+bf16", "-bf16")
-AARCH64_ARCH_EXT_NAME("i8mm", AArch64::AEK_I8MM, "+i8mm", "-i8mm")
-AARCH64_ARCH_EXT_NAME("f32mm", AArch64::AEK_F32MM, "+f32mm", "-f32mm")
-AARCH64_ARCH_EXT_NAME("f64mm", AArch64::AEK_F64MM, "+f64mm", "-f64mm")
-AARCH64_ARCH_EXT_NAME("tme", AArch64::AEK_TME, "+tme", "-tme")
-AARCH64_ARCH_EXT_NAME("ls64", AArch64::AEK_LS64, "+ls64", "-ls64")
-AARCH64_ARCH_EXT_NAME("brbe", AArch64::AEK_BRBE, "+brbe", "-brbe")
-AARCH64_ARCH_EXT_NAME("pauth", AArch64::AEK_PAUTH, "+pauth", "-pauth")
-AARCH64_ARCH_EXT_NAME("flagm", AArch64::AEK_FLAGM, "+flagm", "-flagm")
-AARCH64_ARCH_EXT_NAME("sme", AArch64::AEK_SME, "+sme", "-sme")
-AARCH64_ARCH_EXT_NAME("sme-f64f64", AArch64::AEK_SMEF64F64, "+sme-f64f64", "-sme-f64f64")
-AARCH64_ARCH_EXT_NAME("sme-i16i64", AArch64::AEK_SMEI16I64, "+sme-i16i64", "-sme-i16i64")
-AARCH64_ARCH_EXT_NAME("sme-f16f16", AArch64::AEK_SMEF16F16, "+sme-f16f16", "-sme-f16f16")
-AARCH64_ARCH_EXT_NAME("sme2", AArch64::AEK_SME2, "+sme2", "-sme2")
-AARCH64_ARCH_EXT_NAME("sme2p1", AArch64::AEK_SME2p1, "+sme2p1", "-sme2p1")
-AARCH64_ARCH_EXT_NAME("hbc", AArch64::AEK_HBC, "+hbc", "-hbc")
-AARCH64_ARCH_EXT_NAME("mops", AArch64::AEK_MOPS, "+mops", "-mops")
-AARCH64_ARCH_EXT_NAME("pmuv3", AArch64::AEK_PERFMON, "+perfmon", "-perfmon")
-AARCH64_ARCH_EXT_NAME("predres2", AArch64::AEK_SPECRES2, "+specres2", "-specres2")
-AARCH64_ARCH_EXT_NAME("cssc", AArch64::AEK_CSSC, "+cssc", "-cssc")
-AARCH64_ARCH_EXT_NAME("rcpc3", AArch64::AEK_RCPC3, "+rcpc3", "-rcpc3")
-AARCH64_ARCH_EXT_NAME("the", AArch64::AEK_THE, "+the", "-the")
-AARCH64_ARCH_EXT_NAME("d128", AArch64::AEK_D128, "+d128", "-d128")
-AARCH64_ARCH_EXT_NAME("lse128", AArch64::AEK_LSE128, "+lse128", "-lse128")
+AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, {}, {}, MAX, "", 0)
+// "none" feature has the maximum allowed function multi versioning priority
+AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, {}, {}, MAX, "", 1000)
+AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc", CRC, "+crc", 110)
+AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse", LSE, "+lse", 80)
+AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm", RDM,
+ "+rdm,+fp-armv8,+neon", 70)
+AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto", "-crypto", MAX,
+ "", 0)
+AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4", SM4,
+ "+sm4,+fp-armv8,+neon", 60)
+AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3", SHA3,
+ "+sha3,+sha2,+fp-armv8,+neon", 140)
+AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2", SHA2,
+ "+sha2,+fp-armv8,+neon", 130)
+AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes", AES,
+ "+fp-armv8,+neon", 150)
+AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod", "-dotprod",
+ DOTPROD, "+dotprod,+fp-armv8,+neon", 50)
+AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8", FP,
+ "+fp-armv8,+neon", 90)
+AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon", SIMD,
+ "+fp-armv8,+neon", 100)
+AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16", FP16,
+ "+fullfp16,+fp-armv8,+neon", 170)
+AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml",
+ FP16FML, "+fp16fml,+fullfp16,+fp-armv8,+neon", 40)
+AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe", MAX, "",
+ 0)
+AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras", MAX, "", 0)
+AARCH64_ARCH_EXT_NAME("rasv2", AArch64::AEK_RASv2, "+rasv2", "-rasv2", MAX, "",
+ 0)
+AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve", SVE,
+ "+sve,+fullfp16,+fp-armv8,+neon", 310)
+AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2", SVE2,
+ "+sve2,+sve,+fullfp16,+fp-armv8,+neon", 370)
+AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes",
+ "-sve2-aes", SVE_AES,
+ "+sve2,+sve,+sve2-aes,+fullfp16,+fp-armv8,+neon", 380)
+AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4",
+ "-sve2-sm4", SVE_SM4,
+ "+sve2,+sve,+sve2-sm4,+fullfp16,+fp-armv8,+neon", 420)
+AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3",
+ "-sve2-sha3", SVE_SHA3,
+ "+sve2,+sve,+sve2-sha3,+fullfp16,+fp-armv8,+neon", 410)
+AARCH64_ARCH_EXT_NAME("sve2-bitperm", AArch64::AEK_SVE2BITPERM, "+sve2-bitperm",
+ "-sve2-bitperm", SVE_BITPERM,
+ "+sve2,+sve,+sve2-bitperm,+fullfp16,+fp-armv8,+neon", 400)
+AARCH64_ARCH_EXT_NAME("sve2p1", AArch64::AEK_SVE2p1, "+sve2p1", "-sve2p1", MAX,
+ "", 0)
+AARCH64_ARCH_EXT_NAME("b16b16", AArch64::AEK_B16B16, "+b16b16", "-b16b16", MAX,
+ "", 0)
+AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc", RCPC,
+ "+rcpc", 230)
+AARCH64_ARCH_EXT_NAME("rcpc2", AArch64::AEK_NONE, {}, {}, RCPC2, "+rcpc", 240)
+AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand", RNG, "+rand",
+ 10)
+AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte", MEMTAG, "",
+ 440)
+AARCH64_ARCH_EXT_NAME("memtag2", AArch64::AEK_NONE, {}, {}, MEMTAG2, "+mte",
+ 450)
+AARCH64_ARCH_EXT_NAME("memtag3", AArch64::AEK_NONE, {}, {}, MEMTAG3, "+mte",
+ 460)
+AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs", SSBS, "",
+ 490)
+AARCH64_ARCH_EXT_NAME("ssbs2", AArch64::AEK_NONE, {}, {}, SSBS2, "+ssbs", 500)
+AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb", SB, "+sb", 470)
+AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres",
+ PREDRES, "+predres", 480)
+AARCH64_ARCH_EXT_NAME("bf16", AArch64::AEK_BF16, "+bf16", "-bf16", BF16,
+ "+bf16", 280)
+AARCH64_ARCH_EXT_NAME("i8mm", AArch64::AEK_I8MM, "+i8mm", "-i8mm", I8MM,
+ "+i8mm", 270)
+AARCH64_ARCH_EXT_NAME("f32mm", AArch64::AEK_F32MM, "+f32mm", "-f32mm",
+ SVE_F32MM, "+sve,+f32mm,+fullfp16,+fp-armv8,+neon", 350)
+AARCH64_ARCH_EXT_NAME("f64mm", AArch64::AEK_F64MM, "+f64mm", "-f64mm",
+ SVE_F64MM, "+sve,+f64mm,+fullfp16,+fp-armv8,+neon", 360)
+AARCH64_ARCH_EXT_NAME("tme", AArch64::AEK_TME, "+tme", "-tme", MAX, "", 0)
+AARCH64_ARCH_EXT_NAME("ls64", AArch64::AEK_LS64, "+ls64", "-ls64", LS64, "",
+ 520)
+AARCH64_ARCH_EXT_NAME("brbe", AArch64::AEK_BRBE, "+brbe", "-brbe", MAX, "", 0)
+AARCH64_ARCH_EXT_NAME("pauth", AArch64::AEK_PAUTH, "+pauth", "-pauth", MAX, "",
+ 0)
+AARCH64_ARCH_EXT_NAME("flagm", AArch64::AEK_FLAGM, "+flagm", "-flagm", FLAGM,
+ "+flagm", 20)
+AARCH64_ARCH_EXT_NAME("flagm2", AArch64::AEK_NONE, {}, {}, FLAGM2,
+ "+flagm,+altnzcv", 30)
+AARCH64_ARCH_EXT_NAME("sme", AArch64::AEK_SME, "+sme", "-sme", SME,
+ "+sme,+bf16", 430)
+AARCH64_ARCH_EXT_NAME("sme-f64f64", AArch64::AEK_SMEF64F64, "+sme-f64f64",
+ "-sme-f64f64", SME_F64, "+sme,+sme-f64f64,+bf16", 560)
+AARCH64_ARCH_EXT_NAME("sme-i16i64", AArch64::AEK_SMEI16I64, "+sme-i16i64",
+ "-sme-i16i64", SME_I64, "+sme,+sme-i16i64,+bf16", 570)
+AARCH64_ARCH_EXT_NAME("sme-f16f16", AArch64::AEK_SMEF16F16, "+sme-f16f16",
+ "-sme-f16f16", MAX, "", 0)
+AARCH64_ARCH_EXT_NAME("sme2", AArch64::AEK_SME2, "+sme2", "-sme2", SME2,
+ "+sme2,+sme,+bf16", 580)
+AARCH64_ARCH_EXT_NAME("sme2p1", AArch64::AEK_SME2p1, "+sme2p1", "-sme2p1", MAX,
+ "", 0)
+AARCH64_ARCH_EXT_NAME("hbc", AArch64::AEK_HBC, "+hbc", "-hbc", MAX, "", 0)
+AARCH64_ARCH_EXT_NAME("mops", AArch64::AEK_MOPS, "+mops", "-mops", MAX, "", 0)
+AARCH64_ARCH_EXT_NAME("pmuv3", AArch64::AEK_PERFMON, "+perfmon", "-perfmon",
+ MAX, "", 0)
+AARCH64_ARCH_EXT_NAME("predres2", AArch64::AEK_SPECRES2, "+specres2",
+ "-specres2", MAX, "", 0)
+AARCH64_ARCH_EXT_NAME("cssc", AArch64::AEK_CSSC, "+cssc", "-cssc", MAX, "", 0)
+AARCH64_ARCH_EXT_NAME("rcpc3", AArch64::AEK_RCPC3, "+rcpc3", "-rcpc3", MAX, "",
+ 0)
+AARCH64_ARCH_EXT_NAME("the", AArch64::AEK_THE, "+the", "-the", MAX, "", 0)
+AARCH64_ARCH_EXT_NAME("d128", AArch64::AEK_D128, "+d128", "-d128", MAX, "", 0)
+AARCH64_ARCH_EXT_NAME("lse128", AArch64::AEK_LSE128, "+lse128", "-lse128", MAX,
+ "", 0)
+AARCH64_ARCH_EXT_NAME("sha1", AArch64::AEK_NONE, {}, {}, SHA1,
+ "+fp-armv8,+neon", 120)
+AARCH64_ARCH_EXT_NAME("pmull", AArch64::AEK_NONE, {}, {}, PMULL,
+ "+aes,+fp-armv8,+neon", 160)
+AARCH64_ARCH_EXT_NAME("dit", AArch64::AEK_NONE, {}, {}, DIT, "+dit", 180)
+AARCH64_ARCH_EXT_NAME("dpb", AArch64::AEK_NONE, {}, {}, DPB, "+ccpp", 190)
+AARCH64_ARCH_EXT_NAME("dpb2", AArch64::AEK_NONE, {}, {}, DPB2, "+ccpp,+ccdp",
+ 200)
+AARCH64_ARCH_EXT_NAME("jscvt", AArch64::AEK_NONE, {}, {}, JSCVT,
+ "+fp-armv8,+neon,+jsconv", 210)
+AARCH64_ARCH_EXT_NAME("fcma", AArch64::AEK_NONE, {}, {}, FCMA,
+ "+fp-armv8,+neon,+complxnum", 220)
+AARCH64_ARCH_EXT_NAME("frintts", AArch64::AEK_NONE, {}, {}, FRINTTS, "+fptoint",
+ 250)
+AARCH64_ARCH_EXT_NAME("dgh", AArch64::AEK_NONE, {}, {}, DGH, "", 260)
+AARCH64_ARCH_EXT_NAME("ebf16", AArch64::AEK_NONE, {}, {}, EBF16, "+bf16", 290)
+AARCH64_ARCH_EXT_NAME("rpres", AArch64::AEK_NONE, {}, {}, RPRES, "", 300)
+AARCH64_ARCH_EXT_NAME("sve-bf16", AArch64::AEK_NONE, {}, {}, SVE_BF16,
+ "+sve,+bf16,+fullfp16,+fp-armv8,+neon", 320)
+AARCH64_ARCH_EXT_NAME("sve-ebf16", AArch64::AEK_NONE, {}, {}, SVE_EBF16,
+ "+sve,+bf16,+fullfp16,+fp-armv8,+neon", 330)
+AARCH64_ARCH_EXT_NAME("sve-i8mm", AArch64::AEK_NONE, {}, {}, SVE_I8MM,
+ "+sve,+i8mm,+fullfp16,+fp-armv8,+neon", 340)
+AARCH64_ARCH_EXT_NAME("sve2-pmull128", AArch64::AEK_NONE, {}, {}, SVE_PMULL128,
+ "+sve2,+sve,+sve2-aes,+fullfp16,+fp-armv8,+neon", 390)
+AARCH64_ARCH_EXT_NAME("bti", AArch64::AEK_NONE, {}, {}, BTI, "+bti", 510)
+AARCH64_ARCH_EXT_NAME("ls64_v", AArch64::AEK_NONE, {}, {}, LS64_V, "", 530)
+AARCH64_ARCH_EXT_NAME("ls64_accdata", AArch64::AEK_NONE, {}, {}, LS64_ACCDATA,
+ "+ls64", 540)
+AARCH64_ARCH_EXT_NAME("wfxt", AArch64::AEK_NONE, {}, {}, WFXT, "+wfxt", 550)
#undef AARCH64_ARCH_EXT_NAME
#ifndef AARCH64_CPU_NAME
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index 2214b70a1cd89..309e35568bf5f 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -14,6 +14,7 @@
#ifndef LLVM_TARGETPARSER_AARCH64TARGETPARSER_H
#define LLVM_TARGETPARSER_AARCH64TARGETPARSER_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include <vector>
@@ -23,6 +24,67 @@ namespace llvm {
class Triple;
namespace AArch64 {
+enum CPUFeatures {
+ FEAT_RNG,
+ FEAT_FLAGM,
+ FEAT_FLAGM2,
+ FEAT_FP16FML,
+ FEAT_DOTPROD,
+ FEAT_SM4,
+ FEAT_RDM,
+ FEAT_LSE,
+ FEAT_FP,
+ FEAT_SIMD,
+ FEAT_CRC,
+ FEAT_SHA1,
+ FEAT_SHA2,
+ FEAT_SHA3,
+ FEAT_AES,
+ FEAT_PMULL,
+ FEAT_FP16,
+ FEAT_DIT,
+ FEAT_DPB,
+ FEAT_DPB2,
+ FEAT_JSCVT,
+ FEAT_FCMA,
+ FEAT_RCPC,
+ FEAT_RCPC2,
+ FEAT_FRINTTS,
+ FEAT_DGH,
+ FEAT_I8MM,
+ FEAT_BF16,
+ FEAT_EBF16,
+ FEAT_RPRES,
+ FEAT_SVE,
+ FEAT_SVE_BF16,
+ FEAT_SVE_EBF16,
+ FEAT_SVE_I8MM,
+ FEAT_SVE_F32MM,
+ FEAT_SVE_F64MM,
+ FEAT_SVE2,
+ FEAT_SVE_AES,
+ FEAT_SVE_PMULL128,
+ FEAT_SVE_BITPERM,
+ FEAT_SVE_SHA3,
+ FEAT_SVE_SM4,
+ FEAT_SME,
+ FEAT_MEMTAG,
+ FEAT_MEMTAG2,
+ FEAT_MEMTAG3,
+ FEAT_SB,
+ FEAT_PREDRES,
+ FEAT_SSBS,
+ FEAT_SSBS2,
+ FEAT_BTI,
+ FEAT_LS64,
+ FEAT_LS64_V,
+ FEAT_LS64_ACCDATA,
+ FEAT_WFXT,
+ FEAT_SME_F64,
+ FEAT_SME_I64,
+ FEAT_SME2,
+ FEAT_MAX
+};
// Arch extension modifiers for CPUs. These are labelled with their Arm ARM
// feature name (though the canonical reference for those is AArch64.td)
@@ -117,7 +179,8 @@ struct ExtName {
};
const ExtName AArch64ARCHExtNames[] = {
-#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) \
+#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE, FMV_ID, \
+ DEP_FEATURES, FMV_PRIORITY) \
{NAME, ID, FEATURE, NEGFEATURE},
#include "AArch64TargetParser.def"
};
@@ -175,6 +238,7 @@ StringRef resolveCPUAlias(StringRef CPU);
// Information by Name
uint64_t getDefaultExtensions(StringRef CPU, ArchKind AK);
+void getFeatureOption(StringRef Name, std::string &Feature);
ArchKind getCPUArchKind(StringRef CPU);
ArchKind getSubArchArchKind(StringRef SubArch);
@@ -186,6 +250,7 @@ ArchKind parseCPUArch(StringRef CPU);
void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values);
bool isX18ReservedByDefault(const Triple &TT);
+uint64_t getCpuSupportsMask(ArrayRef<StringRef> FeatureStrs);
} // namespace AArch64
} // namespace llvm
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 46a604a7401b5..7d959b6b20e94 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -77,6 +77,9 @@ def FeatureLSE2 : SubtargetFeature<"lse2", "HasLSE2", "true",
def FeatureOutlineAtomics : SubtargetFeature<"outline-atomics", "OutlineAtomics", "true",
"Enable out of line atomics to support LSE instructions">;
+def FeatureFMV : SubtargetFeature<"fmv", "HasFMV", "true",
+ "Enable Function Multi Versioning support.">;
+
def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true",
"Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions (FEAT_RDM)">;
diff --git a/llvm/lib/TargetParser/AArch64TargetParser.cpp b/llvm/lib/TargetParser/AArch64TargetParser.cpp
index e54cf4ed64a29..baa6cbfcdbee6 100644
--- a/llvm/lib/TargetParser/AArch64TargetParser.cpp
+++ b/llvm/lib/TargetParser/AArch64TargetParser.cpp
@@ -38,6 +38,15 @@ uint64_t AArch64::getDefaultExtensions(StringRef CPU, AArch64::ArchKind AK) {
.Default(AArch64::AEK_INVALID);
}
+void AArch64::getFeatureOption(StringRef Name, std::string &Feature) {
+ Feature = llvm::StringSwitch<std::string>(Name.substr(1))
+#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE, FMV_ID, \
+ DEP_FEATURES, FMV_PRIORITY) \
+ .Case(NAME, FEATURE)
+#include "../../include/llvm/TargetParser/AArch64TargetParser.def"
+ .Default(Name.str());
+}
+
AArch64::ArchKind AArch64::getCPUArchKind(StringRef CPU) {
if (CPU == "generic")
return ArchKind::ARMV8A;
@@ -55,12 +64,27 @@ AArch64::ArchKind AArch64::getSubArchArchKind(StringRef SubArch) {
return ArchKind::INVALID;
}
+uint64_t AArch64::getCpuSupportsMask(ArrayRef<StringRef> FeatureStrs) {
+ uint64_t FeaturesMask = 0;
+ for (const StringRef &FeatureStr : FeatureStrs) {
+ unsigned Feature = StringSwitch<unsigned>(FeatureStr)
+#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE, FMV_ID, \
+ DEP_FEATURES, FMV_PRIORITY) \
+ .Case(NAME, llvm::AArch64::FEAT_##FMV_ID)
+#include "../../include/llvm/TargetParser/AArch64TargetParser.def"
+ ;
+ FeaturesMask |= (1ULL << Feature);
+ }
+ return FeaturesMask;
+}
+
bool AArch64::getExtensionFeatures(uint64_t Extensions,
std::vector<StringRef> &Features) {
if (Extensions == AArch64::AEK_INVALID)
return false;
-#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) \
+#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE, FMV_ID, \
+ DEP_FEATURES, FMV_PRIORITY) \
if (Extensions & ID) { \
const char *feature = FEATURE; \
/* INVALID and NONE have no feature name. */ \
More information about the cfe-commits
mailing list