[clang] fc53eb6 - Reapply 'Implement target_clones multiversioning'
Erich Keane via cfe-commits
cfe-commits at lists.llvm.org
Mon Nov 29 06:46:48 PST 2021
Author: Erich Keane
Date: 2021-11-29T06:30:01-08:00
New Revision: fc53eb69c26cdd7efa6b629c187d04326f0448ca
URL: https://github.com/llvm/llvm-project/commit/fc53eb69c26cdd7efa6b629c187d04326f0448ca
DIFF: https://github.com/llvm/llvm-project/commit/fc53eb69c26cdd7efa6b629c187d04326f0448ca.diff
LOG: Reapply 'Implement target_clones multiversioning'
See discussion in D51650, this change was a little aggressive in an
error while doing a 'while we were here', so this removes that error
condition, as it is apparently useful.
This reverts commit bb4934601d731465e01e2e22c80ce2dbe687d73f.
Added:
clang/test/CodeGen/attr-target-clones.c
clang/test/CodeGenCXX/attr-target-clones.cpp
clang/test/Sema/attr-target-clones.c
clang/test/SemaCXX/attr-target-clones.cpp
Modified:
clang/include/clang/AST/Decl.h
clang/include/clang/Basic/Attr.td
clang/include/clang/Basic/AttrDocs.td
clang/include/clang/Basic/DiagnosticGroups.td
clang/include/clang/Basic/DiagnosticSemaKinds.td
clang/include/clang/Sema/Sema.h
clang/lib/AST/ASTContext.cpp
clang/lib/AST/Decl.cpp
clang/lib/CodeGen/CodeGenModule.cpp
clang/lib/CodeGen/CodeGenModule.h
clang/lib/Sema/SemaDecl.cpp
clang/lib/Sema/SemaDeclAttr.cpp
clang/test/Misc/pragma-attribute-supported-attributes-list.test
clang/test/Sema/attr-cpuspecific.c
Removed:
################################################################################
diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h
index 85a3a8ab69708..2eacf1105c18c 100644
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -1840,7 +1840,8 @@ enum class MultiVersionKind {
None,
Target,
CPUSpecific,
- CPUDispatch
+ CPUDispatch,
+ TargetClones
};
/// Represents a function declaration or definition.
@@ -2459,6 +2460,10 @@ class FunctionDecl : public DeclaratorDecl,
/// the target functionality.
bool isTargetMultiVersion() const;
+ /// True if this function is a multiversioned dispatch function as a part of
+ /// the target-clones functionality.
+ bool isTargetClonesMultiVersion() const;
+
/// \brief Get the associated-constraints of this function declaration.
/// Currently, this will either be a vector of size 1 containing the
/// trailing-requires-clause or an empty vector.
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 39588d94cf09b..fab3f3edfb831 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -2677,6 +2677,40 @@ def Target : InheritableAttr {
}];
}
+def TargetClones : InheritableAttr {
+ let Spellings = [GCC<"target_clones">];
+ let Args = [VariadicStringArgument<"featuresStrs">];
+ let Documentation = [TargetClonesDocs];
+ let Subjects = SubjectList<[Function], ErrorDiag>;
+ let AdditionalMembers = [{
+ StringRef getFeatureStr(unsigned Index) const {
+ return *(featuresStrs_begin() + Index);
+ }
+ // 'default' is always moved to the end, so it isn't considered
+ // when mangling the index.
+ unsigned getMangledIndex(unsigned Index) const {
+ if (getFeatureStr(Index) == "default")
+ return std::count_if(featuresStrs_begin(), featuresStrs_end(),
+ [](StringRef S) { return S != "default"; });
+
+ return std::count_if(featuresStrs_begin(), featuresStrs_begin() + Index,
+ [](StringRef S) { return S != "default"; });
+ }
+
+ // True if this is the first of this version to appear in the config string.
+ // This is used to make sure we don't try to emit this function multiple
+ // times.
+ bool isFirstOfVersion(unsigned Index) const {
+ StringRef FeatureStr(getFeatureStr(Index));
+ return 0 == std::count_if(
+ featuresStrs_begin(), featuresStrs_begin() + Index,
+ [FeatureStr](StringRef S) { return S == FeatureStr; });
+ }
+ }];
+}
+
+def : MutualExclusions<[TargetClones, Target, CPUDispatch, CPUSpecific]>;
+
def MinVectorWidth : InheritableAttr {
let Spellings = [Clang<"min_vector_width">];
let Args = [UnsignedArgument<"VectorWidth">];
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index e7afb3699eb17..10cce4c2d6898 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -2233,6 +2233,40 @@ Additionally, a function may not become multiversioned after its first use.
}];
}
+def TargetClonesDocs : Documentation {
+ let Category = DocCatFunction;
+ let Content = [{
+Clang supports the ``target_clones("OPTIONS")`` attribute. This attribute may be
+attached to a function declaration and causes function multiversioning, where
+multiple versions of the function will be emitted with
diff erent code
+generation options. Additionally, these versions will be resolved at runtime
+based on the priority of their attribute options. All ``target_clone`` functions
+are considered multiversioned functions.
+
+All multiversioned functions must contain a ``default`` (fallback)
+implementation, otherwise usages of the function are considered invalid.
+Additionally, a function may not become multiversioned after its first use.
+
+The options to ``target_clones`` can either be a target-specific architecture
+(specified as ``arch=CPU``), or one of a list of subtarget features.
+
+Example "subtarget features" from the x86 backend include: "mmx", "sse", "sse4.2",
+"avx", "xop" and largely correspond to the machine specific options handled by
+the front end.
+
+The versions can either be listed as a comma-separated sequence of string
+literals or as a single string literal containing a comma-separated list of
+versions. For compatibility with GCC, the two formats can be mixed. For
+example, the following will emit 4 versions of the function:
+
+ .. code-block:: c++
+
+ __attribute__((target_clones("arch=atom,avx2","arch=ivybridge","default")))
+ void foo() {}
+
+}];
+}
+
def MinVectorWidthDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 68e0da72550e2..629e553d66e32 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -1275,9 +1275,14 @@ def : DiagGroup<"spirv-compat", [SpirCompat]>; // Alias.
// Warning for the GlobalISel options.
def GlobalISel : DiagGroup<"global-isel">;
+// A warning group for the GNU extension to allow mixed specifier types for
+// target-clones multiversioning.
+def TargetClonesMixedSpecifiers : DiagGroup<"target-clones-mixed-specifiers">;
+
// A warning group specifically for warnings related to function
// multiversioning.
-def FunctionMultiVersioning : DiagGroup<"function-multiversion">;
+def FunctionMultiVersioning
+ : DiagGroup<"function-multiversion", [TargetClonesMixedSpecifiers]>;
def NoDeref : DiagGroup<"noderef">;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index dc67f86f25cab..d37bc86ce0738 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -2981,7 +2981,8 @@ def err_invalid_branch_protection_spec : Error<
"invalid or misplaced branch protection specification '%0'">;
def warn_unsupported_target_attribute
: Warning<"%select{unsupported|duplicate|unknown}0%select{| architecture|"
- " tune CPU}1 '%2' in the 'target' attribute string; 'target' "
+ " tune CPU}1 '%2' in the '%select{target|target_clones}3' "
+ "attribute string; '%select{target|target_clones}3' "
"attribute ignored">,
InGroup<IgnoredAttributes>;
def err_attribute_unsupported
@@ -9864,6 +9865,8 @@ def warn_duplicate_attribute_exact : Warning<
def warn_duplicate_attribute : Warning<
"attribute %0 is already applied with
diff erent arguments">,
InGroup<IgnoredAttributes>;
+def err_disallowed_duplicate_attribute : Error<
+ "attribute %0 cannot appear more than once on a declaration">;
def warn_sync_fetch_and_nand_semantics_change : Warning<
"the semantics of this intrinsic changed with GCC "
@@ -11254,9 +11257,11 @@ def err_multiversion_duplicate : Error<
"multiversioned function redeclarations require identical target attributes">;
def err_multiversion_noproto : Error<
"multiversioned function must have a prototype">;
-def err_multiversion_disallowed_other_attr : Error<
- "attribute '%select{target|cpu_specific|cpu_dispatch}0' multiversioning cannot be combined"
- " with attribute %1">;
+def err_multiversion_disallowed_other_attr
+ : Error<"attribute "
+ "'%select{|target|cpu_specific|cpu_dispatch|target_clones}0' "
+ "multiversioning cannot be combined"
+ " with attribute %1">;
def err_multiversion_mismatched_attrs
: Error<"attributes on multiversioned functions must all match, attribute "
"%0 %select{is missing|has
diff erent arguments}1">;
@@ -11264,11 +11269,14 @@ def err_multiversion_
diff : Error<
"multiversioned function declaration has a
diff erent %select{calling convention"
"|return type|constexpr specification|inline specification|linkage|"
"language linkage}0">;
-def err_multiversion_doesnt_support : Error<
- "attribute '%select{target|cpu_specific|cpu_dispatch}0' multiversioned functions do not "
- "yet support %select{function templates|virtual functions|"
- "deduced return types|constructors|destructors|deleted functions|"
- "defaulted functions|constexpr functions|consteval function}1">;
+def err_multiversion_doesnt_support
+ : Error<"attribute "
+ "'%select{|target|cpu_specific|cpu_dispatch|target_clones}0' "
+ "multiversioned functions do not "
+ "yet support %select{function templates|virtual functions|"
+ "deduced return types|constructors|destructors|deleted functions|"
+ "defaulted functions|constexpr functions|consteval "
+ "function|lambdas}1">;
def err_multiversion_not_allowed_on_main : Error<
"'main' cannot be a multiversioned function">;
def err_multiversion_not_supported : Error<
@@ -11285,6 +11293,19 @@ def warn_multiversion_duplicate_entries : Warning<
def warn_dispatch_body_ignored : Warning<
"body of cpu_dispatch function will be ignored">,
InGroup<FunctionMultiVersioning>;
+def err_target_clone_must_have_default
+ : Error<"'target_clones' multiversioning requires a default target">;
+def err_target_clone_doesnt_match
+ : Error<"'target_clones' attribute does not match previous declaration">;
+def warn_target_clone_mixed_values
+ : ExtWarn<
+ "mixing 'target_clones' specifier mechanisms is permitted for GCC "
+ "compatibility; use a comma separated sequence of string literals, "
+ "or a string literal containing a comma-separated list of versions">,
+ InGroup<TargetClonesMixedSpecifiers>;
+def warn_target_clone_duplicate_options
+ : Warning<"version list contains duplicate entries">,
+ InGroup<FunctionMultiVersioning>;
// three-way comparison operator diagnostics
def err_implied_comparison_category_type_not_found : Error<
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 43ce5d983217f..c969d97baccce 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -4351,6 +4351,10 @@ class Sema final {
llvm::Error isValidSectionSpecifier(StringRef Str);
bool checkSectionName(SourceLocation LiteralLoc, StringRef Str);
bool checkTargetAttr(SourceLocation LiteralLoc, StringRef Str);
+ bool checkTargetClonesAttrString(SourceLocation LiteralLoc, StringRef Str,
+ const StringLiteral *Literal,
+ bool &HasDefault, bool &HasCommas,
+ SmallVectorImpl<StringRef> &Strings);
bool checkMSInheritanceAttrOnDefinition(
CXXRecordDecl *RD, SourceRange Range, bool BestCase,
MSInheritanceModel SemanticSpelling);
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 294cc20f76c53..2d85d72e5b8a1 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -11800,6 +11800,15 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
Target->getTargetOpts().FeaturesAsWritten.begin(),
Target->getTargetOpts().FeaturesAsWritten.end());
Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features);
+ } else if (const auto *TC = FD->getAttr<TargetClonesAttr>()) {
+ std::vector<std::string> Features;
+ StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex());
+ if (VersionStr.startswith("arch="))
+ TargetCPU = VersionStr.drop_front(sizeof("arch=") - 1);
+ else if (VersionStr != "default")
+ Features.push_back((StringRef{"+"} + VersionStr).str());
+
+ Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features);
} else {
FeatureMap = Target->getTargetOpts().FeatureMap;
}
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 5ea091edcf4c9..68dfef248f65a 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -3271,6 +3271,8 @@ MultiVersionKind FunctionDecl::getMultiVersionKind() const {
return MultiVersionKind::CPUDispatch;
if (hasAttr<CPUSpecificAttr>())
return MultiVersionKind::CPUSpecific;
+ if (hasAttr<TargetClonesAttr>())
+ return MultiVersionKind::TargetClones;
return MultiVersionKind::None;
}
@@ -3286,6 +3288,10 @@ bool FunctionDecl::isTargetMultiVersion() const {
return isMultiVersion() && hasAttr<TargetAttr>();
}
+bool FunctionDecl::isTargetClonesMultiVersion() const {
+ return isMultiVersion() && hasAttr<TargetClonesAttr>();
+}
+
void
FunctionDecl::setPreviousDeclaration(FunctionDecl *PrevDecl) {
redeclarable_base::setPreviousDecl(PrevDecl);
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 59f3e02705713..7a7ed22e4381a 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1266,6 +1266,20 @@ static bool isUniqueInternalLinkageDecl(GlobalDecl GD,
(CGM.getFunctionLinkage(GD) == llvm::GlobalValue::InternalLinkage);
}
+static void AppendTargetClonesMangling(const CodeGenModule &CGM,
+ const TargetClonesAttr *Attr,
+ unsigned VersionIndex,
+ raw_ostream &Out) {
+ Out << '.';
+ StringRef FeatureStr = Attr->getFeatureStr(VersionIndex);
+ if (FeatureStr.startswith("arch="))
+ Out << "arch_" << FeatureStr.substr(sizeof("arch=") - 1);
+ else
+ Out << FeatureStr;
+
+ Out << '.' << Attr->getMangledIndex(VersionIndex);
+}
+
static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
const NamedDecl *ND,
bool OmitMultiVersionMangling = false) {
@@ -1319,6 +1333,10 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
case MultiVersionKind::Target:
AppendTargetMangling(CGM, FD->getAttr<TargetAttr>(), Out);
break;
+ case MultiVersionKind::TargetClones:
+ AppendTargetClonesMangling(CGM, FD->getAttr<TargetClonesAttr>(),
+ GD.getMultiVersionIndex(), Out);
+ break;
case MultiVersionKind::None:
llvm_unreachable("None multiversion type isn't valid here");
}
@@ -1983,8 +2001,9 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
FD = FD ? FD->getMostRecentDecl() : FD;
const auto *TD = FD ? FD->getAttr<TargetAttr>() : nullptr;
const auto *SD = FD ? FD->getAttr<CPUSpecificAttr>() : nullptr;
+ const auto *TC = FD ? FD->getAttr<TargetClonesAttr>() : nullptr;
bool AddedAttr = false;
- if (TD || SD) {
+ if (TD || SD || TC) {
llvm::StringMap<bool> FeatureMap;
getContext().getFunctionFeatureMap(FeatureMap, GD);
@@ -3226,6 +3245,12 @@ void CodeGenModule::EmitMultiVersionFunctionDefinition(GlobalDecl GD,
for (unsigned I = 0; I < Spec->cpus_size(); ++I)
EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr);
// Requires multiple emits.
+ } else if (FD->isTargetClonesMultiVersion()) {
+ auto *Clone = FD->getAttr<TargetClonesAttr>();
+ for (unsigned I = 0; I < Clone->featuresStrs_size(); ++I)
+ if (Clone->isFirstOfVersion(I))
+ EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr);
+ EmitTargetClonesResolver(GD);
} else
EmitGlobalFunctionDefinition(GD, GV);
}
@@ -3307,6 +3332,63 @@ llvm::GlobalValue::LinkageTypes getMultiversionLinkage(CodeGenModule &CGM,
return llvm::GlobalValue::WeakODRLinkage;
}
+void CodeGenModule::EmitTargetClonesResolver(GlobalDecl GD) {
+ const auto *FD = cast<FunctionDecl>(GD.getDecl());
+ assert(FD && "Not a FunctionDecl?");
+ const auto *TC = FD->getAttr<TargetClonesAttr>();
+ assert(TC && "Not a target_clones Function?");
+
+ QualType CanonTy = Context.getCanonicalType(FD->getType());
+ llvm::Type *DeclTy = getTypes().ConvertType(CanonTy);
+
+ if (const auto *CXXFD = dyn_cast<CXXMethodDecl>(FD)) {
+ const CGFunctionInfo &FInfo = getTypes().arrangeCXXMethodDeclaration(CXXFD);
+ DeclTy = getTypes().GetFunctionType(FInfo);
+ }
+
+ llvm::Function *ResolverFunc;
+ if (getTarget().supportsIFunc()) {
+ auto *IFunc = cast<llvm::GlobalIFunc>(
+ GetOrCreateMultiVersionResolver(GD, DeclTy, FD));
+ ResolverFunc = cast<llvm::Function>(IFunc->getResolver());
+ } else
+ ResolverFunc =
+ cast<llvm::Function>(GetOrCreateMultiVersionResolver(GD, DeclTy, FD));
+
+ SmallVector<CodeGenFunction::MultiVersionResolverOption, 10> Options;
+ for (unsigned VersionIndex = 0; VersionIndex < TC->featuresStrs_size();
+ ++VersionIndex) {
+ if (!TC->isFirstOfVersion(VersionIndex))
+ continue;
+ StringRef Version = TC->getFeatureStr(VersionIndex);
+ StringRef MangledName =
+ getMangledName(GD.getWithMultiVersionIndex(VersionIndex));
+ llvm::Constant *Func = GetGlobalValue(MangledName);
+ assert(Func &&
+ "Should have already been created before calling resolver emit");
+
+ StringRef Architecture;
+ llvm::SmallVector<StringRef, 1> Feature;
+
+ if (Version.startswith("arch="))
+ Architecture = Version.drop_front(sizeof("arch=") - 1);
+ else if (Version != "default")
+ Feature.push_back(Version);
+
+ Options.emplace_back(cast<llvm::Function>(Func), Architecture, Feature);
+ }
+
+ const TargetInfo &TI = getTarget();
+ std::stable_sort(
+ Options.begin(), Options.end(),
+ [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS,
+ const CodeGenFunction::MultiVersionResolverOption &RHS) {
+ return TargetMVPriority(TI, LHS) > TargetMVPriority(TI, RHS);
+ });
+ CodeGenFunction CGF(*this);
+ CGF.EmitMultiVersionResolver(ResolverFunc, Options);
+}
+
void CodeGenModule::emitMultiVersionFunctions() {
std::vector<GlobalDecl> MVFuncsToEmit;
MultiVersionFuncs.swap(MVFuncsToEmit);
@@ -3511,8 +3593,25 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(
// Since this is the first time we've created this IFunc, make sure
// that we put this multiversioned function into the list to be
// replaced later if necessary (target multiversioning only).
- if (!FD->isCPUDispatchMultiVersion() && !FD->isCPUSpecificMultiVersion())
+ if (FD->isTargetMultiVersion())
MultiVersionFuncs.push_back(GD);
+ else if (FD->isTargetClonesMultiVersion()) {
+ // In target_clones multiversioning, make sure we emit this if used.
+ auto DDI =
+ DeferredDecls.find(getMangledName(GD.getWithMultiVersionIndex(0)));
+ if (DDI != DeferredDecls.end()) {
+ addDeferredDeclToEmit(GD);
+ DeferredDecls.erase(DDI);
+ } else {
+ // Emit the symbol of the 1st variant, so that the deferred decls know we
+ // need it, otherwise the only global value will be the resolver/ifunc,
+ // which end up getting broken if we search for them with GetGlobalValue'.
+ GetOrCreateLLVMFunction(
+ getMangledName(GD.getWithMultiVersionIndex(0)), DeclTy, FD,
+ /*ForVTable=*/false, /*DontDefer=*/true,
+ /*IsThunk=*/false, llvm::AttributeList(), ForDefinition);
+ }
+ }
if (getTarget().supportsIFunc()) {
llvm::Type *ResolverType = llvm::FunctionType::get(
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index fbed22376c827..e1c7f486d334e 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1500,6 +1500,7 @@ class CodeGenModule : public CodeGenTypeCache {
void EmitAliasDefinition(GlobalDecl GD);
void emitIFuncDefinition(GlobalDecl GD);
void emitCPUDispatchDefinition(GlobalDecl GD);
+ void EmitTargetClonesResolver(GlobalDecl GD);
void EmitObjCPropertyImplementations(const ObjCImplementationDecl *D);
void EmitObjCIvarInitializations(ObjCImplementationDecl *D);
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index af174ac1ca1a7..7be71ca49ea23 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -10268,13 +10268,9 @@ static bool checkNonMultiVersionCompatAttributes(Sema &S,
const FunctionDecl *FD,
const FunctionDecl *CausedFD,
MultiVersionKind MVType) {
- bool IsCPUSpecificCPUDispatchMVType =
- MVType == MultiVersionKind::CPUDispatch ||
- MVType == MultiVersionKind::CPUSpecific;
- const auto Diagnose = [FD, CausedFD, IsCPUSpecificCPUDispatchMVType](
- Sema &S, const Attr *A) {
+ const auto Diagnose = [FD, CausedFD, MVType](Sema &S, const Attr *A) {
S.Diag(FD->getLocation(), diag::err_multiversion_disallowed_other_attr)
- << IsCPUSpecificCPUDispatchMVType << A;
+ << static_cast<unsigned>(MVType) << A;
if (CausedFD)
S.Diag(CausedFD->getLocation(), diag::note_multiversioning_caused_here);
return true;
@@ -10292,6 +10288,10 @@ static bool checkNonMultiVersionCompatAttributes(Sema &S,
if (MVType != MultiVersionKind::Target)
return Diagnose(S, A);
break;
+ case attr::TargetClones:
+ if (MVType != MultiVersionKind::TargetClones)
+ return Diagnose(S, A);
+ break;
default:
if (!AttrCompatibleWithMultiVersion(A->getKind(), MVType))
return Diagnose(S, A);
@@ -10318,6 +10318,7 @@ bool Sema::areMultiversionVariantFunctionsCompatible(
DefaultedFuncs = 6,
ConstexprFuncs = 7,
ConstevalFuncs = 8,
+ Lambda = 9,
};
enum Different {
CallingConv = 0,
@@ -10445,7 +10446,7 @@ static bool CheckMultiVersionAdditionalRules(Sema &S, const FunctionDecl *OldFD,
S.PDiag(diag::note_multiversioning_caused_here)),
PartialDiagnosticAt(NewFD->getLocation(),
S.PDiag(diag::err_multiversion_doesnt_support)
- << IsCPUSpecificCPUDispatchMVType),
+ << static_cast<unsigned>(MVType)),
PartialDiagnosticAt(NewFD->getLocation(),
S.PDiag(diag::err_multiversion_
diff )),
/*TemplatesSupported=*/false,
@@ -10574,21 +10575,30 @@ static bool CheckTargetCausesMultiVersioning(
return false;
}
+static bool MultiVersionTypesCompatible(MultiVersionKind Old,
+ MultiVersionKind New) {
+ if (Old == New || Old == MultiVersionKind::None ||
+ New == MultiVersionKind::None)
+ return true;
+
+ return (Old == MultiVersionKind::CPUDispatch &&
+ New == MultiVersionKind::CPUSpecific) ||
+ (Old == MultiVersionKind::CPUSpecific &&
+ New == MultiVersionKind::CPUDispatch);
+}
+
/// Check the validity of a new function declaration being added to an existing
/// multiversioned declaration collection.
static bool CheckMultiVersionAdditionalDecl(
Sema &S, FunctionDecl *OldFD, FunctionDecl *NewFD,
MultiVersionKind NewMVType, const TargetAttr *NewTA,
const CPUDispatchAttr *NewCPUDisp, const CPUSpecificAttr *NewCPUSpec,
- bool &Redeclaration, NamedDecl *&OldDecl, bool &MergeTypeWithPrevious,
- LookupResult &Previous) {
+ const TargetClonesAttr *NewClones, bool &Redeclaration, NamedDecl *&OldDecl,
+ bool &MergeTypeWithPrevious, LookupResult &Previous) {
MultiVersionKind OldMVType = OldFD->getMultiVersionKind();
// Disallow mixing of multiversioning types.
- if ((OldMVType == MultiVersionKind::Target &&
- NewMVType != MultiVersionKind::Target) ||
- (NewMVType == MultiVersionKind::Target &&
- OldMVType != MultiVersionKind::Target)) {
+ if (!MultiVersionTypesCompatible(OldMVType, NewMVType)) {
S.Diag(NewFD->getLocation(), diag::err_multiversion_types_mixed);
S.Diag(OldFD->getLocation(), diag::note_previous_declaration);
NewFD->setInvalidDecl();
@@ -10613,7 +10623,12 @@ static bool CheckMultiVersionAdditionalDecl(
if (S.IsOverload(NewFD, CurFD, UseMemberUsingDeclRules))
continue;
- if (NewMVType == MultiVersionKind::Target) {
+ switch (NewMVType) {
+ case MultiVersionKind::None:
+ assert(OldMVType == MultiVersionKind::TargetClones &&
+ "Only target_clones can be omitted in subsequent declarations");
+ break;
+ case MultiVersionKind::Target: {
const auto *CurTA = CurFD->getAttr<TargetAttr>();
if (CurTA->getFeaturesStr() == NewTA->getFeaturesStr()) {
NewFD->setIsMultiVersion();
@@ -10629,7 +10644,30 @@ static bool CheckMultiVersionAdditionalDecl(
NewFD->setInvalidDecl();
return true;
}
- } else {
+ break;
+ }
+ case MultiVersionKind::TargetClones: {
+ const auto *CurClones = CurFD->getAttr<TargetClonesAttr>();
+ Redeclaration = true;
+ OldDecl = CurFD;
+ MergeTypeWithPrevious = true;
+ NewFD->setIsMultiVersion();
+
+ if (CurClones && NewClones &&
+ (CurClones->featuresStrs_size() != NewClones->featuresStrs_size() ||
+ !std::equal(CurClones->featuresStrs_begin(),
+ CurClones->featuresStrs_end(),
+ NewClones->featuresStrs_begin()))) {
+ S.Diag(NewFD->getLocation(), diag::err_target_clone_doesnt_match);
+ S.Diag(CurFD->getLocation(), diag::note_previous_declaration);
+ NewFD->setInvalidDecl();
+ return true;
+ }
+
+ return false;
+ }
+ case MultiVersionKind::CPUSpecific:
+ case MultiVersionKind::CPUDispatch: {
const auto *CurCPUSpec = CurFD->getAttr<CPUSpecificAttr>();
const auto *CurCPUDisp = CurFD->getAttr<CPUDispatchAttr>();
// Handle CPUDispatch/CPUSpecific versions.
@@ -10684,8 +10722,8 @@ static bool CheckMultiVersionAdditionalDecl(
}
}
}
- // If the two decls aren't the same MVType, there is no possible error
- // condition.
+ break;
+ }
}
}
@@ -10721,7 +10759,6 @@ static bool CheckMultiVersionAdditionalDecl(
return false;
}
-
/// Check the validity of a mulitversion function declaration.
/// Also sets the multiversion'ness' of the function itself.
///
@@ -10735,23 +10772,14 @@ static bool CheckMultiVersionFunction(Sema &S, FunctionDecl *NewFD,
const auto *NewTA = NewFD->getAttr<TargetAttr>();
const auto *NewCPUDisp = NewFD->getAttr<CPUDispatchAttr>();
const auto *NewCPUSpec = NewFD->getAttr<CPUSpecificAttr>();
-
- // Mixing Multiversioning types is prohibited.
- if ((NewTA && NewCPUDisp) || (NewTA && NewCPUSpec) ||
- (NewCPUDisp && NewCPUSpec)) {
- S.Diag(NewFD->getLocation(), diag::err_multiversion_types_mixed);
- NewFD->setInvalidDecl();
- return true;
- }
-
- MultiVersionKind MVType = NewFD->getMultiVersionKind();
+ const auto *NewClones = NewFD->getAttr<TargetClonesAttr>();
+ MultiVersionKind MVType = NewFD->getMultiVersionKind();
// Main isn't allowed to become a multiversion function, however it IS
// permitted to have 'main' be marked with the 'target' optimization hint.
if (NewFD->isMain()) {
- if ((MVType == MultiVersionKind::Target && NewTA->isDefaultVersion()) ||
- MVType == MultiVersionKind::CPUDispatch ||
- MVType == MultiVersionKind::CPUSpecific) {
+ if (MVType != MultiVersionKind::None &&
+ !(MVType == MultiVersionKind::Target && !NewTA->isDefaultVersion())) {
S.Diag(NewFD->getLocation(), diag::err_multiversion_not_allowed_on_main);
NewFD->setInvalidDecl();
return true;
@@ -10774,13 +10802,35 @@ static bool CheckMultiVersionFunction(Sema &S, FunctionDecl *NewFD,
if (!OldFD->isMultiVersion() && MVType == MultiVersionKind::None)
return false;
- if (OldFD->isMultiVersion() && MVType == MultiVersionKind::None) {
+ // Multiversioned redeclarations aren't allowed to omit the attribute, except
+ // for target_clones.
+ if (OldFD->isMultiVersion() && MVType == MultiVersionKind::None &&
+ OldFD->getMultiVersionKind() != MultiVersionKind::TargetClones) {
S.Diag(NewFD->getLocation(), diag::err_multiversion_required_in_redecl)
<< (OldFD->getMultiVersionKind() != MultiVersionKind::Target);
NewFD->setInvalidDecl();
return true;
}
+ if (!OldFD->isMultiVersion()) {
+ switch (MVType) {
+ case MultiVersionKind::Target:
+ return CheckTargetCausesMultiVersioning(S, OldFD, NewFD, NewTA,
+ Redeclaration, OldDecl,
+ MergeTypeWithPrevious, Previous);
+ case MultiVersionKind::TargetClones:
+ if (OldFD->isUsed(false)) {
+ NewFD->setInvalidDecl();
+ return S.Diag(NewFD->getLocation(), diag::err_multiversion_after_used);
+ }
+ OldFD->setIsMultiVersion();
+ break;
+ case MultiVersionKind::CPUDispatch:
+ case MultiVersionKind::CPUSpecific:
+ case MultiVersionKind::None:
+ break;
+ }
+ }
// Handle the target potentially causes multiversioning case.
if (!OldFD->isMultiVersion() && MVType == MultiVersionKind::Target)
return CheckTargetCausesMultiVersioning(S, OldFD, NewFD, NewTA,
@@ -10791,8 +10841,8 @@ static bool CheckMultiVersionFunction(Sema &S, FunctionDecl *NewFD,
// appropriate attribute in the current function decl. Resolve that these are
// still compatible with previous declarations.
return CheckMultiVersionAdditionalDecl(
- S, OldFD, NewFD, MVType, NewTA, NewCPUDisp, NewCPUSpec, Redeclaration,
- OldDecl, MergeTypeWithPrevious, Previous);
+ S, OldFD, NewFD, MVType, NewTA, NewCPUDisp, NewCPUSpec, NewClones,
+ Redeclaration, OldDecl, MergeTypeWithPrevious, Previous);
}
/// Perform semantic checking of a new function declaration.
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index ef889a36bd55c..ebc81f357cecb 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -1965,6 +1965,28 @@ static void handleRestrictAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
}
static void handleCPUSpecificAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+ // Ensure we don't combine these with themselves, since that causes some
+ // confusing behavior.
+ if (AL.getParsedKind() == ParsedAttr::AT_CPUDispatch) {
+ if (checkAttrMutualExclusion<CPUSpecificAttr>(S, D, AL))
+ return;
+
+ if (const auto *Other = D->getAttr<CPUDispatchAttr>()) {
+ S.Diag(AL.getLoc(), diag::err_disallowed_duplicate_attribute) << AL;
+ S.Diag(Other->getLocation(), diag::note_conflicting_attribute);
+ return;
+ }
+ } else if (AL.getParsedKind() == ParsedAttr::AT_CPUSpecific) {
+ if (checkAttrMutualExclusion<CPUDispatchAttr>(S, D, AL))
+ return;
+
+ if (const auto *Other = D->getAttr<CPUSpecificAttr>()) {
+ S.Diag(AL.getLoc(), diag::err_disallowed_duplicate_attribute) << AL;
+ S.Diag(Other->getLocation(), diag::note_conflicting_attribute);
+ return;
+ }
+ }
+
FunctionDecl *FD = cast<FunctionDecl>(D);
if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) {
@@ -3211,40 +3233,41 @@ static void handleCodeSegAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
bool Sema::checkTargetAttr(SourceLocation LiteralLoc, StringRef AttrStr) {
enum FirstParam { Unsupported, Duplicate, Unknown };
enum SecondParam { None, Architecture, Tune };
+ enum ThirdParam { Target, TargetClones };
if (AttrStr.contains("fpmath="))
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
- << Unsupported << None << "fpmath=";
+ << Unsupported << None << "fpmath=" << Target;
// Diagnose use of tune if target doesn't support it.
if (!Context.getTargetInfo().supportsTargetAttributeTune() &&
AttrStr.contains("tune="))
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
- << Unsupported << None << "tune=";
+ << Unsupported << None << "tune=" << Target;
ParsedTargetAttr ParsedAttrs = TargetAttr::parse(AttrStr);
if (!ParsedAttrs.Architecture.empty() &&
!Context.getTargetInfo().isValidCPUName(ParsedAttrs.Architecture))
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
- << Unknown << Architecture << ParsedAttrs.Architecture;
+ << Unknown << Architecture << ParsedAttrs.Architecture << Target;
if (!ParsedAttrs.Tune.empty() &&
!Context.getTargetInfo().isValidCPUName(ParsedAttrs.Tune))
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
- << Unknown << Tune << ParsedAttrs.Tune;
+ << Unknown << Tune << ParsedAttrs.Tune << Target;
if (ParsedAttrs.DuplicateArchitecture)
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
- << Duplicate << None << "arch=";
+ << Duplicate << None << "arch=" << Target;
if (ParsedAttrs.DuplicateTune)
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
- << Duplicate << None << "tune=";
+ << Duplicate << None << "tune=" << Target;
for (const auto &Feature : ParsedAttrs.Features) {
auto CurFeature = StringRef(Feature).drop_front(); // remove + or -.
if (!Context.getTargetInfo().isValidFeatureName(CurFeature))
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
- << Unsupported << None << CurFeature;
+ << Unsupported << None << CurFeature << Target;
}
TargetInfo::BranchProtectionInfo BPI;
@@ -3254,7 +3277,7 @@ bool Sema::checkTargetAttr(SourceLocation LiteralLoc, StringRef AttrStr) {
ParsedAttrs.BranchProtection, BPI, Error)) {
if (Error.empty())
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
- << Unsupported << None << "branch-protection";
+ << Unsupported << None << "branch-protection" << Target;
else
return Diag(LiteralLoc, diag::err_invalid_branch_protection_spec)
<< Error;
@@ -3274,6 +3297,107 @@ static void handleTargetAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
D->addAttr(NewAttr);
}
+bool Sema::checkTargetClonesAttrString(SourceLocation LiteralLoc, StringRef Str,
+ const StringLiteral *Literal,
+ bool &HasDefault, bool &HasCommas,
+ SmallVectorImpl<StringRef> &Strings) {
+ enum FirstParam { Unsupported, Duplicate, Unknown };
+ enum SecondParam { None, Architecture, Tune };
+ enum ThirdParam { Target, TargetClones };
+ HasCommas = HasCommas || Str.contains(',');
+ // Warn on empty at the beginning of a string.
+ if (Str.size() == 0)
+ return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
+ << Unsupported << None << "" << TargetClones;
+
+ std::pair<StringRef, StringRef> Parts = {{}, Str};
+ while (!Parts.second.empty()) {
+ Parts = Parts.second.split(',');
+ StringRef Cur = Parts.first.trim();
+ SourceLocation CurLoc = Literal->getLocationOfByte(
+ Cur.data() - Literal->getString().data(), getSourceManager(),
+ getLangOpts(), Context.getTargetInfo());
+
+ bool DefaultIsDupe = false;
+ if (Cur.empty())
+ return Diag(CurLoc, diag::warn_unsupported_target_attribute)
+ << Unsupported << None << "" << TargetClones;
+
+ if (Cur.startswith("arch=")) {
+ if (!Context.getTargetInfo().isValidCPUName(
+ Cur.drop_front(sizeof("arch=") - 1)))
+ return Diag(CurLoc, diag::warn_unsupported_target_attribute)
+ << Unsupported << Architecture
+ << Cur.drop_front(sizeof("arch=") - 1) << TargetClones;
+ } else if (Cur == "default") {
+ DefaultIsDupe = HasDefault;
+ HasDefault = true;
+ } else if (!Context.getTargetInfo().isValidFeatureName(Cur))
+ return Diag(CurLoc, diag::warn_unsupported_target_attribute)
+ << Unsupported << None << Cur << TargetClones;
+
+ if (llvm::find(Strings, Cur) != Strings.end() || DefaultIsDupe)
+ Diag(CurLoc, diag::warn_target_clone_duplicate_options);
+ // Note: Add even if there are duplicates, since it changes name mangling.
+ Strings.push_back(Cur);
+ }
+
+ if (Str.rtrim().endswith(","))
+ return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
+ << Unsupported << None << "" << TargetClones;
+ return false;
+}
+
+static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+ // Ensure we don't combine these with themselves, since that causes some
+ // confusing behavior.
+ if (const auto *Other = D->getAttr<TargetClonesAttr>()) {
+ S.Diag(AL.getLoc(), diag::err_disallowed_duplicate_attribute) << AL;
+ S.Diag(Other->getLocation(), diag::note_conflicting_attribute);
+ return;
+ }
+ if (checkAttrMutualExclusion<TargetClonesAttr>(S, D, AL))
+ return;
+
+ SmallVector<StringRef, 2> Strings;
+ bool HasCommas = false, HasDefault = false;
+
+ for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) {
+ StringRef CurStr;
+ SourceLocation LiteralLoc;
+ if (!S.checkStringLiteralArgumentAttr(AL, I, CurStr, &LiteralLoc) ||
+ S.checkTargetClonesAttrString(
+ LiteralLoc, CurStr,
+ cast<StringLiteral>(AL.getArgAsExpr(I)->IgnoreParenCasts()),
+ HasDefault, HasCommas, Strings))
+ return;
+ }
+
+ if (HasCommas && AL.getNumArgs() > 1)
+ S.Diag(AL.getLoc(), diag::warn_target_clone_mixed_values);
+
+ if (!HasDefault) {
+ S.Diag(AL.getLoc(), diag::err_target_clone_must_have_default);
+ return;
+ }
+
+ // FIXME: We could probably figure out how to get this to work for lambdas
+ // someday.
+ if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) {
+ if (MD->getParent()->isLambda()) {
+ S.Diag(D->getLocation(), diag::err_multiversion_doesnt_support)
+ << static_cast<unsigned>(MultiVersionKind::TargetClones)
+ << /*Lambda*/ 9;
+ return;
+ }
+ }
+
+ cast<FunctionDecl>(D)->setIsMultiVersion();
+ TargetClonesAttr *NewAttr = ::new (S.Context)
+ TargetClonesAttr(S.Context, AL, Strings.data(), Strings.size());
+ D->addAttr(NewAttr);
+}
+
static void handleMinVectorWidthAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
Expr *E = AL.getArgAsExpr(0);
uint32_t VecWidth;
@@ -8217,6 +8341,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
case ParsedAttr::AT_Target:
handleTargetAttr(S, D, AL);
break;
+ case ParsedAttr::AT_TargetClones:
+ handleTargetClonesAttr(S, D, AL);
+ break;
case ParsedAttr::AT_MinVectorWidth:
handleMinVectorWidthAttr(S, D, AL);
break;
diff --git a/clang/test/CodeGen/attr-target-clones.c b/clang/test/CodeGen/attr-target-clones.c
new file mode 100644
index 0000000000000..e17cca125b5ba
--- /dev/null
+++ b/clang/test/CodeGen/attr-target-clones.c
@@ -0,0 +1,126 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes=LINUX,CHECK
+// RUN: %clang_cc1 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefixes=WINDOWS,CHECK
+
+// LINUX: @foo.ifunc = weak_odr ifunc i32 (), i32 ()* ()* @foo.resolver
+// LINUX: @foo_dupes.ifunc = weak_odr ifunc void (), void ()* ()* @foo_dupes.resolver
+// LINUX: @unused.ifunc = weak_odr ifunc void (), void ()* ()* @unused.resolver
+// LINUX: @foo_inline.ifunc = weak_odr ifunc i32 (), i32 ()* ()* @foo_inline.resolver
+// LINUX: @foo_inline2.ifunc = weak_odr ifunc i32 (), i32 ()* ()* @foo_inline2.resolver
+
+int __attribute__((target_clones("sse4.2, default"))) foo(void) { return 0; }
+// LINUX: define {{.*}}i32 @foo.sse4.2.0()
+// LINUX: define {{.*}}i32 @foo.default.1()
+// LINUX: define i32 ()* @foo.resolver()
+// LINUX: ret i32 ()* @foo.sse4.2.0
+// LINUX: ret i32 ()* @foo.default.1
+
+// WINDOWS: define dso_local i32 @foo.sse4.2.0()
+// WINDOWS: define dso_local i32 @foo.default.1()
+// WINDOWS: define dso_local i32 @foo()
+// WINDOWS: musttail call i32 @foo.sse4.2.0
+// WINDOWS: musttail call i32 @foo.default.1
+
+__attribute__((target_clones("default,default ,sse4.2"))) void foo_dupes(void) {}
+// LINUX: define {{.*}}void @foo_dupes.default.1()
+// LINUX: define {{.*}}void @foo_dupes.sse4.2.0()
+// LINUX: define void ()* @foo_dupes.resolver()
+// LINUX: ret void ()* @foo_dupes.sse4.2.0
+// LINUX: ret void ()* @foo_dupes.default.1
+
+// WINDOWS: define dso_local void @foo_dupes.default.1()
+// WINDOWS: define dso_local void @foo_dupes.sse4.2.0()
+// WINDOWS: define dso_local void @foo_dupes()
+// WINDOWS: musttail call void @foo_dupes.sse4.2.0
+// WINDOWS: musttail call void @foo_dupes.default.1
+
+void bar2() {
+ // LINUX: define {{.*}}void @bar2()
+ // WINDOWS: define dso_local void @bar2()
+ foo_dupes();
+ // LINUX: call void @foo_dupes.ifunc()
+ // WINDOWS: call void @foo_dupes()
+}
+
+int bar() {
+ // LINUX: define {{.*}}i32 @bar() #[[DEF:[0-9]+]]
+ // WINDOWS: define dso_local i32 @bar() #[[DEF:[0-9]+]]
+ return foo();
+ // LINUX: call i32 @foo.ifunc()
+ // WINDOWS: call i32 @foo()
+}
+
+void __attribute__((target_clones("default, arch=ivybridge"))) unused(void) {}
+// LINUX: define {{.*}}void @unused.default.1()
+// LINUX: define {{.*}}void @unused.arch_ivybridge.0()
+// LINUX: define void ()* @unused.resolver()
+// LINUX: ret void ()* @unused.arch_ivybridge.0
+// LINUX: ret void ()* @unused.default.1
+
+// WINDOWS: define dso_local void @unused.default.1()
+// WINDOWS: define dso_local void @unused.arch_ivybridge.0()
+// WINDOWS: define dso_local void @unused()
+// WINDOWS: musttail call void @unused.arch_ivybridge.0
+// WINDOWS: musttail call void @unused.default.1
+
+
+inline int __attribute__((target_clones("arch=sandybridge,default,sse4.2")))
+foo_inline(void) { return 0; }
+inline int __attribute__((target_clones("arch=sandybridge,default,sse4.2")))
+foo_inline2(void);
+
+int bar3() {
+ // LINUX: define {{.*}}i32 @bar3()
+ // WINDOWS: define dso_local i32 @bar3()
+ return foo_inline() + foo_inline2();
+ // LINUX: call i32 @foo_inline.ifunc()
+ // LINUX: call i32 @foo_inline2.ifunc()
+ // WINDOWS: call i32 @foo_inline()
+ // WINDOWS: call i32 @foo_inline2()
+}
+
+// Deferred emission of foo_inline, which got delayed because it is inline.
+// LINUX: define i32 ()* @foo_inline.resolver()
+// LINUX: ret i32 ()* @foo_inline.arch_sandybridge.0
+// LINUX: ret i32 ()* @foo_inline.sse4.2.1
+// LINUX: ret i32 ()* @foo_inline.default.2
+
+// WINDOWS: define dso_local i32 @foo_inline()
+// WINDOWS: musttail call i32 @foo_inline.arch_sandybridge.0
+// WINDOWS: musttail call i32 @foo_inline.sse4.2.1
+// WINDOWS: musttail call i32 @foo_inline.default.2
+
+inline int __attribute__((target_clones("arch=sandybridge,default,sse4.2")))
+foo_inline2(void){ return 0; }
+// LINUX: define linkonce i32 @foo_inline2.arch_sandybridge.0() #[[SB:[0-9]+]]
+// LINUX: define i32 ()* @foo_inline2.resolver()
+// LINUX: ret i32 ()* @foo_inline2.arch_sandybridge.0
+// LINUX: ret i32 ()* @foo_inline2.sse4.2.1
+// LINUX: ret i32 ()* @foo_inline2.default.2
+
+// WINDOWS: define linkonce_odr dso_local i32 @foo_inline2.arch_sandybridge.0() #[[SB:[0-9]+]]
+// WINDOWS: define dso_local i32 @foo_inline2()
+// WINDOWS: musttail call i32 @foo_inline2.arch_sandybridge.0
+// WINDOWS: musttail call i32 @foo_inline2.sse4.2.1
+// WINDOWS: musttail call i32 @foo_inline2.default.2
+
+// LINUX: define linkonce i32 @foo_inline.arch_sandybridge.0() #[[SB]]
+// LINUX: define linkonce i32 @foo_inline.default.2() #[[DEF]]
+// LINUX: define linkonce i32 @foo_inline.sse4.2.1() #[[SSE42:[0-9]+]]
+
+// WINDOWS: define linkonce_odr dso_local i32 @foo_inline.arch_sandybridge.0() #[[SB]]
+// WINDOWS: define linkonce_odr dso_local i32 @foo_inline.default.2() #[[DEF]]
+// WINDOWS: define linkonce_odr dso_local i32 @foo_inline.sse4.2.1() #[[SSE42:[0-9]+]]
+
+
+// LINUX: define linkonce i32 @foo_inline2.default.2() #[[DEF]]
+// LINUX: define linkonce i32 @foo_inline2.sse4.2.1() #[[SSE42]]
+
+// WINDOWS: define linkonce_odr dso_local i32 @foo_inline2.default.2() #[[DEF]]
+// WINDOWS: define linkonce_odr dso_local i32 @foo_inline2.sse4.2.1() #[[SSE42]]
+
+// CHECK: attributes #[[SSE42]] =
+// CHECK-SAME: "target-features"="+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
+// CHECK: attributes #[[DEF]] =
+// Don't bother checking features, we verified it is the same as a normal function.
+// CHECK: attributes #[[SB]] =
+// CHECK-SAME: "target-features"="+avx,+crc32,+cx16,+cx8,+fxsr,+mmx,+pclmul,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
diff --git a/clang/test/CodeGenCXX/attr-target-clones.cpp b/clang/test/CodeGenCXX/attr-target-clones.cpp
new file mode 100644
index 0000000000000..9830ba54c4f1f
--- /dev/null
+++ b/clang/test/CodeGenCXX/attr-target-clones.cpp
@@ -0,0 +1,116 @@
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS
+
+// Overloaded ifuncs
+// LINUX: @_Z10overloadedi.ifunc = weak_odr ifunc i32 (i32), i32 (i32)* ()* @_Z10overloadedi.resolver
+// LINUX: @_Z10overloadedPKc.ifunc = weak_odr ifunc i32 (i8*), i32 (i8*)* ()* @_Z10overloadedPKc.resolver
+// struct 'C' ifuncs, note the 'float, U' one doesn't get one.
+// LINUX: @_ZN1CIssE3fooEv.ifunc = weak_odr ifunc i32 (%struct.C*), i32 (%struct.C*)* ()* @_ZN1CIssE3fooEv.resolver
+// LINUX: @_ZN1CIisE3fooEv.ifunc = weak_odr ifunc i32 (%struct.C.0*), i32 (%struct.C.0*)* ()* @_ZN1CIisE3fooEv.resolver
+// LINUX: @_ZN1CIdfE3fooEv.ifunc = weak_odr ifunc i32 (%struct.C.2*), i32 (%struct.C.2*)* ()* @_ZN1CIdfE3fooEv.resolver
+
+int __attribute__((target_clones("sse4.2", "default"))) overloaded(int) { return 1; }
+// LINUX: define {{.*}}i32 @_Z10overloadedi.sse4.2.0(i32{{.+}})
+// LINUX: define {{.*}}i32 @_Z10overloadedi.default.1(i32{{.+}})
+// LINUX: define i32 (i32)* @_Z10overloadedi.resolver
+// LINUX: ret i32 (i32)* @_Z10overloadedi.sse4.2.0
+// LINUX: ret i32 (i32)* @_Z10overloadedi.default.1
+
+// WINDOWS: define dso_local i32 @"?overloaded@@YAHH at Z.sse4.2.0"(i32{{.+}})
+// WINDOWS: define dso_local i32 @"?overloaded@@YAHH at Z.default.1"(i32{{.+}})
+// WINDOWS: define dso_local i32 @"?overloaded@@YAHH at Z"(i32{{.+}})
+// WINDOWS: call i32 @"?overloaded@@YAHH at Z.sse4.2.0"
+// WINDOWS: call i32 @"?overloaded@@YAHH at Z.default.1"
+
+int __attribute__((target_clones("arch=ivybridge", "default"))) overloaded(const char *) { return 2; }
+// LINUX: define {{.*}}i32 @_Z10overloadedPKc.arch_ivybridge.0(i8*{{.+}})
+// LINUX: define {{.*}}i32 @_Z10overloadedPKc.default.1(i8*{{.+}})
+// LINUX: define i32 (i8*)* @_Z10overloadedPKc.resolver
+// LINUX: ret i32 (i8*)* @_Z10overloadedPKc.arch_ivybridge.0
+// LINUX: ret i32 (i8*)* @_Z10overloadedPKc.default.1
+
+// WINDOWS: define dso_local i32 @"?overloaded@@YAHPEBD at Z.arch_ivybridge.0"(i8*{{.+}})
+// WINDOWS: define dso_local i32 @"?overloaded@@YAHPEBD at Z.default.1"(i8*{{.+}})
+// WINDOWS: define dso_local i32 @"?overloaded@@YAHPEBD at Z"(i8*{{.+}})
+// WINDOWS: call i32 @"?overloaded@@YAHPEBD at Z.arch_ivybridge.0"
+// WINDOWS: call i32 @"?overloaded@@YAHPEBD at Z.default.1"
+//
+void use_overloaded() {
+ overloaded(1);
+ // LINUX: call i32 @_Z10overloadedi.ifunc
+ // WINDOWS: call i32 @"?overloaded@@YAHH at Z"
+ overloaded(nullptr);
+ // LINUX: call i32 @_Z10overloadedPKc.ifunc
+ // WINDOWS: call i32 @"?overloaded@@YAHPEBD at Z"
+}
+
+template<typename T, typename U>
+struct C {
+int __attribute__((target_clones("sse4.2", "default"))) foo(){ return 1;}
+};
+template<typename U>
+struct C<int, U> {
+int __attribute__((target_clones("sse4.2", "default"))) foo(){ return 2;}
+};
+template<typename U>
+struct C<float, U> {
+int foo(){ return 2;}
+};
+template<>
+struct C<double, float> {
+int __attribute__((target_clones("sse4.2", "default"))) foo(){ return 3;}
+};
+
+void uses_specialized() {
+ C<short, short> c;
+ c.foo();
+ // LINUX: call i32 @_ZN1CIssE3fooEv.ifunc(%struct.C
+ // WINDOWS: call i32 @"?foo@?$C at FF@@QEAAHXZ"(%struct.C
+ C<int, short> c2;
+ c2.foo();
+ // LINUX: call i32 @_ZN1CIisE3fooEv.ifunc(%struct.C
+ // WINDOWS: call i32 @"?foo@?$C at HF@@QEAAHXZ"(%struct.C
+ C<float, short> c3;
+ c3.foo();
+ // Note this is not an ifunc/mv
+ // LINUX: call i32 @_ZN1CIfsE3fooEv(%struct.C
+ // WINDOWS: call i32 @"?foo@?$C at MF@@QEAAHXZ"(%struct.C
+ C<double, float> c4;
+ c4.foo();
+ // LINUX: call i32 @_ZN1CIdfE3fooEv.ifunc(%struct.C
+ // WINDOWS: call i32 @"?foo@?$C at NM@@QEAAHXZ"(%struct.C
+}
+
+// LINUX: define {{.*}}i32 @_ZN1CIssE3fooEv.sse4.2.0(%struct.C{{.+}})
+// WINDOWS: define {{.*}}i32 @"?foo@?$C at FF@@QEAAHXZ.sse4.2.0"(%struct.C{{.+}})
+// LINUX: define i32 (%struct.C*)* @_ZN1CIssE3fooEv.resolver
+// LINUX: ret i32 (%struct.C*)* @_ZN1CIssE3fooEv.sse4.2.0
+// LINUX: ret i32 (%struct.C*)* @_ZN1CIssE3fooEv.default.1
+// WINDOWS: define {{.*}}i32 @"?foo@?$C at FF@@QEAAHXZ"(%struct.C{{.+}})
+// WINDOWS: call i32 @"?foo@?$C at FF@@QEAAHXZ.sse4.2.0"
+// WINDOWS: call i32 @"?foo@?$C at FF@@QEAAHXZ.default.1"
+
+// LINUX: define {{.*}}i32 @_ZN1CIisE3fooEv.sse4.2.0(%struct.C{{.+}})
+// WINDOWS: define {{.*}}i32 @"?foo@?$C at HF@@QEAAHXZ.sse4.2.0"(%struct.C{{.+}})
+// LINUX: define i32 (%struct.C{{.+}})* @_ZN1CIisE3fooEv.resolver
+// LINUX: ret i32 (%struct.C{{.+}})* @_ZN1CIisE3fooEv.sse4.2.0
+// LINUX: ret i32 (%struct.C{{.+}})* @_ZN1CIisE3fooEv.default.1
+// WINDOWS: define {{.*}}i32 @"?foo@?$C at HF@@QEAAHXZ"(%struct.C{{.+}})
+// WINDOWS: call i32 @"?foo@?$C at HF@@QEAAHXZ.sse4.2.0"
+// WINDOWS: call i32 @"?foo@?$C at HF@@QEAAHXZ.default.1"
+
+// LINUX: define i32 (%struct.C{{.+}})* @_ZN1CIdfE3fooEv.resolver
+// LINUX: ret i32 (%struct.C{{.+}})* @_ZN1CIdfE3fooEv.sse4.2.0
+// LINUX: ret i32 (%struct.C{{.+}})* @_ZN1CIdfE3fooEv.default.1
+// WINDOWS: define {{.*}}i32 @"?foo@?$C at NM@@QEAAHXZ"(%struct.C{{.+}})
+// WINDOWS: call i32 @"?foo@?$C at NM@@QEAAHXZ.sse4.2.0"
+// WINDOWS: call i32 @"?foo@?$C at NM@@QEAAHXZ.default.1"
+
+// LINUX: define {{.*}}i32 @_ZN1CIdfE3fooEv.sse4.2.0(%struct.C{{.+}})
+// WINDOWS: define {{.*}}i32 @"?foo@?$C at NM@@QEAAHXZ.sse4.2.0"(%struct.C{{.+}})
+// LINUX: define {{.*}}i32 @_ZN1CIdfE3fooEv.default.1(%struct.C{{.+}})
+// WINDOWS: define {{.*}}i32 @"?foo@?$C at NM@@QEAAHXZ.default.1"(%struct.C{{.+}})
+// LINUX: define {{.*}}i32 @_ZN1CIssE3fooEv.default.1(%struct.C{{.+}})
+// WINDOWS: define {{.*}}i32 @"?foo@?$C at FF@@QEAAHXZ.default.1"(%struct.C{{.+}})
+// LINUX: define {{.*}}i32 @_ZN1CIisE3fooEv.default.1(%struct.C{{.+}})
+// WINDOWS: define {{.*}}i32 @"?foo@?$C at HF@@QEAAHXZ.default.1"(%struct.C{{.+}})
diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index b565f7d00314b..a3b1ce0adca77 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -173,6 +173,7 @@
// CHECK-NEXT: SwiftObjCMembers (SubjectMatchRule_objc_interface)
// CHECK-NEXT: TLSModel (SubjectMatchRule_variable_is_thread_local)
// CHECK-NEXT: Target (SubjectMatchRule_function)
+// CHECK-NEXT: TargetClones (SubjectMatchRule_function)
// CHECK-NEXT: TestTypestate (SubjectMatchRule_function_is_member)
// CHECK-NEXT: TrivialABI (SubjectMatchRule_record)
// CHECK-NEXT: Uninitialized (SubjectMatchRule_variable_is_local)
diff --git a/clang/test/Sema/attr-cpuspecific.c b/clang/test/Sema/attr-cpuspecific.c
index 9cfeef8a23562..07ca516c8ae04 100644
--- a/clang/test/Sema/attr-cpuspecific.c
+++ b/clang/test/Sema/attr-cpuspecific.c
@@ -88,7 +88,8 @@ void __attribute__((target("default"))) addtl_attrs2(void);
// expected-note at -2 {{previous declaration is here}}
void __attribute__((cpu_specific(sandybridge))) addtl_attrs2(void);
-// expected-error at +2 {{multiversioning attributes cannot be combined}}
+// expected-error at +2 {{'cpu_dispatch' and 'cpu_specific' attributes are not compatible}}
+// expected-note at +1 {{conflicting attribute is here}}
void __attribute((cpu_specific(sandybridge), cpu_dispatch(atom, sandybridge)))
combine_attrs(void);
diff --git a/clang/test/Sema/attr-target-clones.c b/clang/test/Sema/attr-target-clones.c
new file mode 100644
index 0000000000000..ea7cf91c3ab86
--- /dev/null
+++ b/clang/test/Sema/attr-target-clones.c
@@ -0,0 +1,88 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -verify %s
+
+// expected-error at +1 {{'target_clones' multiversioning requires a default target}}
+void __attribute__((target_clones("sse4.2", "arch=sandybridge")))
+no_default(void);
+
+// expected-error at +2 {{'target_clones' and 'target' attributes are not compatible}}
+// expected-note at +1 {{conflicting attribute is here}}
+void __attribute__((target("sse4.2"), target_clones("arch=sandybridge")))
+ignored_attr(void);
+// expected-error at +2 {{'target' and 'target_clones' attributes are not compatible}}
+// expected-note at +1 {{conflicting attribute is here}}
+void __attribute__((target_clones("arch=sandybridge,default"), target("sse4.2")))
+ignored_attr2(void);
+
+int redecl(void);
+int __attribute__((target_clones("sse4.2", "default"))) redecl(void) { return 1; }
+
+int __attribute__((target_clones("sse4.2", "default"))) redecl2(void);
+int __attribute__((target_clones("sse4.2", "default"))) redecl2(void) { return 1; }
+
+int __attribute__((target_clones("sse4.2", "default"))) redecl3(void);
+int redecl3(void);
+
+int __attribute__((target_clones("sse4.2", "arch=atom", "default"))) redecl4(void);
+// expected-error at +3 {{'target_clones' attribute does not match previous declaration}}
+// expected-note at -2 {{previous declaration is here}}
+int __attribute__((target_clones("sse4.2", "arch=sandybridge", "default")))
+redecl4(void) { return 1; }
+
+int __attribute__((target("sse4.2"))) redef2(void) { return 1; }
+// expected-error at +2 {{multiversioning attributes cannot be combined}}
+// expected-note at -2 {{previous declaration is here}}
+int __attribute__((target_clones("sse4.2", "default"))) redef2(void) { return 1; }
+
+int __attribute__((target_clones("sse4.2,default"))) redef3(void) { return 1; }
+// expected-error at +2 {{redefinition of 'redef3'}}
+// expected-note at -2 {{previous definition is here}}
+int __attribute__((target_clones("sse4.2,default"))) redef3(void) { return 1; }
+
+int __attribute__((target_clones("sse4.2,default"))) redef4(void) { return 1; }
+// expected-error at +2 {{redefinition of 'redef4'}}
+// expected-note at -2 {{previous definition is here}}
+int __attribute__((target_clones("sse4.2,default"))) redef4(void) { return 1; }
+
+// Duplicates are allowed, however they alter name mangling.
+// expected-warning at +2 {{mixing 'target_clones' specifier mechanisms is permitted for GCC compatibility}}
+// expected-warning at +1 2 {{version list contains duplicate entries}}
+int __attribute__((target_clones("arch=atom,arch=atom", "arch=atom,default")))
+dupes(void) { return 1; }
+
+// expected-warning at +1 {{unsupported '' in the 'target_clones' attribute string;}}
+void __attribute__((target_clones("")))
+empty_target_1(void);
+// expected-warning at +1 {{unsupported '' in the 'target_clones' attribute string;}}
+void __attribute__((target_clones(",default")))
+empty_target_2(void);
+// expected-warning at +1 {{unsupported '' in the 'target_clones' attribute string;}}
+void __attribute__((target_clones("default,")))
+empty_target_3(void);
+// expected-warning at +1 {{unsupported '' in the 'target_clones' attribute string;}}
+void __attribute__((target_clones("default, ,avx2")))
+empty_target_4(void);
+
+// expected-warning at +1 {{unsupported '' in the 'target_clones' attribute string;}}
+void __attribute__((target_clones("default,avx2", "")))
+empty_target_5(void);
+
+// expected-warning at +1 {{version list contains duplicate entries}}
+void __attribute__((target_clones("default", "default")))
+dupe_default(void);
+
+// expected-warning at +1 {{version list contains duplicate entries}}
+void __attribute__((target_clones("avx2,avx2,default")))
+dupe_normal(void);
+
+// expected-error at +2 {{attribute 'target_clones' cannot appear more than once on a declaration}}
+// expected-note at +1 {{conflicting attribute is here}}
+void __attribute__((target_clones("avx2,default"), target_clones("arch=atom,default")))
+dupe_normal2(void);
+
+int mv_after_use(void);
+int useage() {
+ return mv_after_use();
+}
+// expected-error at +1 {{function declaration cannot become a multiversioned function after first usage}}
+int __attribute__((target_clones("sse4.2", "default"))) mv_after_use(void) { return 1; }
+
diff --git a/clang/test/SemaCXX/attr-target-clones.cpp b/clang/test/SemaCXX/attr-target-clones.cpp
new file mode 100644
index 0000000000000..68c9a4ff48ed9
--- /dev/null
+++ b/clang/test/SemaCXX/attr-target-clones.cpp
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -verify -fexceptions -fcxx-exceptions %s -std=c++14
+
+// expected-error at +2 {{attribute 'target_clones' multiversioned functions do not yet support function templates}}
+template<typename T, typename U>
+int __attribute__((target_clones("sse4.2", "default"))) foo(){ return 1;}
+
+void uses_lambda() {
+ // expected-error at +1 {{attribute 'target_clones' multiversioned functions do not yet support lambdas}}
+ auto x = []()__attribute__((target_clones("sse4.2", "arch=ivybridge", "default"))) {};
+ x();
+}
More information about the cfe-commits
mailing list