[clang] [FMV][AIX] Implement target_clones (cpu-only) (PR #177428)
Wael Yehia via cfe-commits
cfe-commits at lists.llvm.org
Thu Mar 12 19:58:56 PDT 2026
https://github.com/w2yehia updated https://github.com/llvm/llvm-project/pull/177428
>From 4ff6a9bb683c64b731d74c4efd0d93bf249b553c Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 23 Sep 2025 14:22:57 -0400
Subject: [PATCH 01/24] refactor EmitPPCBuiltinCpu
---
clang/lib/CodeGen/CodeGenFunction.h | 1 +
clang/lib/CodeGen/TargetBuiltins/PPC.cpp | 62 +++++++++++++-----------
2 files changed, 36 insertions(+), 27 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 9771b89b55aae..1d1108a4c4b1b 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4912,6 +4912,7 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Value *BuildVector(ArrayRef<llvm::Value *> Ops);
llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+ llvm::Value *EmitPPCBuiltinCpu(unsigned BuiltinID, llvm::Type *ReturnType, StringRef CPUStr);
llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
diff --git a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
index 01926878085e0..f3b1147ebcbbc 100644
--- a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
@@ -70,31 +70,18 @@ static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF,
return CI;
}
-Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
- const CallExpr *E) {
- // Do not emit the builtin arguments in the arguments of a function call,
- // because the evaluation order of function arguments is not specified in C++.
- // This is important when testing to ensure the arguments are emitted in the
- // same order every time. Eg:
- // Instead of:
- // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
- // EmitScalarExpr(E->getArg(1)), "swdiv");
- // Use:
- // Value *Op0 = EmitScalarExpr(E->getArg(0));
- // Value *Op1 = EmitScalarExpr(E->getArg(1));
- // return Builder.CreateFDiv(Op0, Op1, "swdiv")
-
- Intrinsic::ID ID = Intrinsic::not_intrinsic;
+Value *CodeGenFunction::EmitPPCBuiltinCpu(
+ unsigned BuiltinID, llvm::Type *ReturnType, StringRef CPUStr) {
#include "llvm/TargetParser/PPCTargetParser.def"
auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
unsigned Mask, CmpInst::Predicate CompOp,
unsigned OpValue) -> Value * {
if (SupportMethod == BUILTIN_PPC_FALSE)
- return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
+ return llvm::ConstantInt::getFalse(ReturnType);
if (SupportMethod == BUILTIN_PPC_TRUE)
- return llvm::ConstantInt::getTrue(ConvertType(E->getType()));
+ return llvm::ConstantInt::getTrue(ReturnType);
assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod.");
@@ -137,12 +124,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue));
};
- switch (BuiltinID) {
- default: return nullptr;
-
- case Builtin::BI__builtin_cpu_is: {
- const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
- StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
+ if (BuiltinID == Builtin::BI__builtin_cpu_is) {
llvm::Triple Triple = getTarget().getTriple();
typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUInfo;
@@ -170,7 +152,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
"Invalid CPU name. Missed by SemaChecking?");
if (LinuxSupportMethod == BUILTIN_PPC_FALSE)
- return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
+ return llvm::ConstantInt::getFalse(ReturnType);
Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID);
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
@@ -178,10 +160,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
return Builder.CreateICmpEQ(TheCall,
llvm::ConstantInt::get(Int32Ty, LinuxIDValue));
}
- case Builtin::BI__builtin_cpu_supports: {
+ else if (BuiltinID == Builtin::BI__builtin_cpu_supports) {
llvm::Triple Triple = getTarget().getTriple();
- const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
- StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
if (Triple.isOSAIX()) {
typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
unsigned>
@@ -218,7 +198,35 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
#undef PPC_FAWORD_HWCAP2
#undef PPC_FAWORD_CPUID
}
+ else
+ assert(0 && "unexpected builtin");
+}
+Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
+ const CallExpr *E) {
+ // Do not emit the builtin arguments in the arguments of a function call,
+ // because the evaluation order of function arguments is not specified in C++.
+ // This is important when testing to ensure the arguments are emitted in the
+ // same order every time. Eg:
+ // Instead of:
+ // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
+ // EmitScalarExpr(E->getArg(1)), "swdiv");
+ // Use:
+ // Value *Op0 = EmitScalarExpr(E->getArg(0));
+ // Value *Op1 = EmitScalarExpr(E->getArg(1));
+ // return Builder.CreateFDiv(Op0, Op1, "swdiv")
+
+ Intrinsic::ID ID = Intrinsic::not_intrinsic;
+
+ switch (BuiltinID) {
+ default: return nullptr;
+
+ case Builtin::BI__builtin_cpu_is:
+ case Builtin::BI__builtin_cpu_supports: {
+ const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
+ StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
+ return EmitPPCBuiltinCpu(BuiltinID, ConvertType(E->getType()), CPUStr);
+ }
// __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
// call __builtin_readcyclecounter.
case PPC::BI__builtin_ppc_get_timebase:
>From aef34e443477ad9e705ecb66cd5f297d71d497d9 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Fri, 13 Mar 2026 03:01:57 +0000
Subject: [PATCH 02/24] clang codegen for target_clones
---
clang/include/clang/Basic/TargetInfo.h | 2 +-
clang/include/clang/Sema/SemaPPC.h | 4 ++
clang/lib/AST/ASTContext.cpp | 2 +
clang/lib/Basic/Targets/PPC.cpp | 36 +++++++++++++
clang/lib/Basic/Targets/PPC.h | 4 ++
clang/lib/CodeGen/CodeGenFunction.cpp | 70 +++++++++++++++++++++++++-
clang/lib/CodeGen/CodeGenFunction.h | 2 +
clang/lib/CodeGen/CodeGenModule.cpp | 10 ++--
clang/lib/CodeGen/Targets/PPC.cpp | 47 +++++++++++++++++
clang/lib/Sema/SemaDeclAttr.cpp | 4 ++
clang/lib/Sema/SemaPPC.cpp | 56 +++++++++++++++++++++
11 files changed, 230 insertions(+), 7 deletions(-)
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index ec6cd2be7c3c5..68160e9bd9b29 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1567,7 +1567,7 @@ class TargetInfo : public TransferrableTargetInfo,
/// which requires support for cpu_supports and cpu_is functionality.
bool supportsMultiVersioning() const {
return getTriple().isX86() || getTriple().isAArch64() ||
- getTriple().isRISCV();
+ getTriple().isRISCV() || getTriple().isOSBinFormatXCOFF();
}
/// Identify whether this target supports IFuncs.
diff --git a/clang/include/clang/Sema/SemaPPC.h b/clang/include/clang/Sema/SemaPPC.h
index f8edecc4fcb7b..0cf6ba7ff29dd 100644
--- a/clang/include/clang/Sema/SemaPPC.h
+++ b/clang/include/clang/Sema/SemaPPC.h
@@ -53,6 +53,10 @@ class SemaPPC : public SemaBase {
// vector double vec_xxpermdi(vector double, vector double, int);
// vector short vec_xxsldwi(vector short, vector short, int);
bool BuiltinVSX(CallExpr *TheCall);
+
+ bool checkTargetClonesAttr(SmallVectorImpl<StringRef> &Params,
+ SmallVectorImpl<SourceLocation> &Locs,
+ SmallVectorImpl<SmallString<64>> &NewParams);
};
} // namespace clang
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index d8d2fc23974c2..dd97009ac2404 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -15264,6 +15264,8 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex());
if (VersionStr.starts_with("arch="))
TargetCPU = VersionStr.drop_front(sizeof("arch=") - 1);
+ else if (Target->getTriple().isOSAIX() && VersionStr.starts_with("cpu=")) // TODO make a function that extracts CPU from a feature string
+ TargetCPU = VersionStr.drop_front(sizeof("cpu=") - 1);
else if (VersionStr != "default")
Features.push_back((StringRef{"+"} + VersionStr).str());
Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features);
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index a37a68ad91724..7f8005db3cd3e 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -678,6 +678,42 @@ void PPCTargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features,
}
}
+ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const {
+ ParsedTargetAttr Ret;
+ if (Features == "default")
+ return Ret;
+ SmallVector<StringRef, 1> AttrFeatures;
+ Features.split(AttrFeatures, ",");
+
+ // Grab the various features and prepend a "+" to turn on the feature to
+ // the backend and add them to our existing set of features.
+ for (auto &Feature : AttrFeatures) {
+ // Go ahead and trim whitespace rather than either erroring or
+ // accepting it weirdly.
+ Feature = Feature.trim();
+
+ // While we're here iterating check for a different target cpu.
+ if (Feature.starts_with("cpu=")) {
+ assert(Ret.CPU.empty());
+ Ret.CPU = Feature.split("=").second.trim();
+ } else assert(0);
+// else if (Feature.starts_with("tune=")) {
+// if (!Ret.Tune.empty())
+// Ret.Duplicate = "tune=";
+// else
+// Ret.Tune = Feature.split("=").second.trim();
+// } else if (Feature.starts_with("no-"))
+// Ret.Features.push_back("-" + Feature.split("-").second.str());
+// else
+// Ret.Features.push_back("+" + Feature.str());
+ }
+ return Ret;
+}
+
+llvm::APInt PPCTargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
+ return llvm::APInt(32, Features.empty() ? 0 : 1);
+}
+
// Make sure that registers are added in the correct array index which should be
// the DWARF number for PPC registers.
const char *const PPCTargetInfo::GCCRegNames[] = {
diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index 664c9e15d8d18..6f90ff1f5d57c 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -199,6 +199,10 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
bool supportsTargetAttributeTune() const override { return true; }
+ ParsedTargetAttr parseTargetAttr(StringRef Str) const override;
+
+ llvm::APInt getFMVPriority(ArrayRef<StringRef> Features) const override;
+
ArrayRef<const char *> getGCCRegNames() const override;
ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override;
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 1dee656b5f2d0..21752e90552b2 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -45,6 +45,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsPowerPC.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/Support/CRC.h"
#include "llvm/Support/SipHash.h"
@@ -3076,12 +3077,77 @@ void CodeGenFunction::EmitMultiVersionResolver(
case llvm::Triple::riscv64be:
EmitRISCVMultiVersionResolver(Resolver, Options);
return;
-
+ case llvm::Triple::ppc:
+ case llvm::Triple::ppc64:
+ if (getContext().getTargetInfo().getTriple().isOSAIX()) {
+ EmitPPCAIXMultiVersionResolver(Resolver, Options);
+ return;
+ }
+ [[fallthrough]];
default:
- assert(false && "Only implemented for x86, AArch64 and RISC-V targets");
+ assert(false && "Only implemented for x86, AArch64, RISC-V, and PowerPC targets");
+ }
+}
+
+/*
+ * Desc_t *foo_desc = ppc_get_function_descriptor(&foo);
+ * if (foo_desc->addr == ppc_get_function_entry(&foo)) {
+ * FuncPtr fp = resolver();
+ * __c11_atomic_store((_Atomic FuncPtr *)&foo_desc->addr, fp, 0);
+ * }
+ * return ((int (*)(int)) foo_desc)(a);
+ */
+void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
+ llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) {
+
+ llvm::PointerType *PtrTy = Builder.getPtrTy();
+ // entry:
+ llvm::BasicBlock *CurBlock = createBasicBlock("entry", Resolver);
+
+ SmallVector<std::pair<llvm::Value *, llvm::BasicBlock *>, 3> PhiArgs;
+ for (const FMVResolverOption &RO : Options) {
+ Builder.SetInsertPoint(CurBlock);
+ // The 'default' or 'generic' case.
+ if (!RO.Architecture && RO.Features.empty()) {
+ // if.default:
+ // %fmv.default = call ptr @getEntryPoint(ptr noundef @foo_default)
+ // br label %resolver_exit
+ assert(&RO == Options.end() - 1 && "Default or Generic case must be last");
+ Builder.CreateRet(RO.Function);
+ break;
+ }
+ // if.else_n:
+ // %is_version_n = __builtin_cpu_supports(version_n)
+ // br i1 %is_version_n, label %if.version_n, label %if.default
+ //
+ // if.version_n:
+ // %fmv.version.n = call ptr @getEntryPoint(ptr noundef @foo_version_n)
+ // br label %resolver_exit
+ assert(RO.Features.size() == 1 && "for now one feature requirement per version");
+ llvm::Value *Condition;
+ if (RO.Features[0].starts_with("cpu=")) {
+ Condition = EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_is, Builder.getInt1Ty(), RO.Features[0].split("=").second.trim());
+ } else {
+ Condition = EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_supports, Builder.getInt1Ty(), RO.Features[0]);
+ }
+ llvm::BasicBlock *ThenBlock = createBasicBlock("if.version", Resolver);
+ CurBlock = createBasicBlock("if.else", Resolver);
+ Builder.CreateCondBr(Condition, ThenBlock, CurBlock);
+
+ Builder.SetInsertPoint(ThenBlock);
+ Builder.CreateRet(RO.Function);
}
+
+ // If no generic/default, emit an unreachable.
+// Builder.SetInsertPoint(CurBlock);
+// llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap);
+// TrapCall->setDoesNotReturn();
+// TrapCall->setDoesNotThrow();
+// Builder.CreateUnreachable();
+// Builder.ClearInsertionPoint();
}
+
void CodeGenFunction::EmitRISCVMultiVersionResolver(
llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 1d1108a4c4b1b..07bb9fae5cd85 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -5579,6 +5579,8 @@ class CodeGenFunction : public CodeGenTypeCache {
ArrayRef<FMVResolverOption> Options);
void EmitRISCVMultiVersionResolver(llvm::Function *Resolver,
ArrayRef<FMVResolverOption> Options);
+ void EmitPPCAIXMultiVersionResolver(llvm::Function *Resolver,
+ ArrayRef<FMVResolverOption> Options);
Address EmitAddressOfPFPField(Address RecordPtr, const PFPField &Field);
Address EmitAddressOfPFPField(Address RecordPtr, Address FieldPtr,
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 3b64be7a477d6..bfb3d22173449 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -3053,9 +3053,10 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
// While we populated the feature map above, we still need to
// get and parse the target attribute so we can get the cpu for
// the function.
- if (TD) {
- ParsedTargetAttr ParsedAttr =
- Target.parseTargetAttr(TD->getFeaturesStr());
+ StringRef FeatureStr = TD ? TD->getFeaturesStr() :
+ (TC ? TC->getFeatureStr(GD.getMultiVersionIndex()) : StringRef());
+ if (!FeatureStr.empty()) {
+ ParsedTargetAttr ParsedAttr = Target.parseTargetAttr(FeatureStr);
if (!ParsedAttr.CPU.empty() &&
getTarget().isValidCPUName(ParsedAttr.CPU)) {
TargetCPU = ParsedAttr.CPU;
@@ -4840,7 +4841,8 @@ void CodeGenModule::emitMultiVersionFunctions() {
if (auto *IFunc = dyn_cast<llvm::GlobalIFunc>(ResolverConstant)) {
ResolverConstant = IFunc->getResolver();
if (FD->isTargetClonesMultiVersion() &&
- !getTarget().getTriple().isAArch64()) {
+ !getTarget().getTriple().isAArch64() &&
+ !getTarget().getTriple().isOSAIX()) {
std::string MangledName = getMangledNameImpl(
*this, GD, FD, /*OmitMultiVersionMangling=*/true);
if (!GetGlobalValue(MangledName + ".ifunc")) {
diff --git a/clang/lib/CodeGen/Targets/PPC.cpp b/clang/lib/CodeGen/Targets/PPC.cpp
index 35e7655646ade..bc357e0908a5e 100644
--- a/clang/lib/CodeGen/Targets/PPC.cpp
+++ b/clang/lib/CodeGen/Targets/PPC.cpp
@@ -128,8 +128,55 @@ class AIXABIInfo : public ABIInfo {
RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
AggValueSlot Slot) const override;
+
+ using ABIInfo::appendAttributeMangling;
+ void appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index,
+ raw_ostream &Out) const override;
+ void appendAttributeMangling(StringRef AttrStr,
+ raw_ostream &Out) const override;
};
+void AIXABIInfo::appendAttributeMangling(TargetClonesAttr *Attr,
+ unsigned Index,
+ raw_ostream &Out) const {
+ appendAttributeMangling(Attr->getFeatureStr(Index), Out);
+}
+
+void AIXABIInfo::appendAttributeMangling(StringRef AttrStr,
+ raw_ostream &Out) const {
+ if (AttrStr == "default") {
+ Out << ".default";
+ return;
+ }
+
+ Out << '.';
+ const TargetInfo &TI = CGT.getTarget();
+ ParsedTargetAttr Info = TI.parseTargetAttr(AttrStr);
+
+ llvm::sort(Info.Features, [&TI](StringRef LHS, StringRef RHS) {
+ // Multiversioning doesn't allow "no-${feature}", so we can
+ // only have "+" prefixes here.
+ assert(LHS.starts_with("+") && RHS.starts_with("+") &&
+ "Features should always have a prefix.");
+ return TI.getFMVPriority({LHS.substr(1)})
+ .ugt(TI.getFMVPriority({RHS.substr(1)}));
+ });
+
+ bool IsFirst = true;
+ if (!Info.CPU.empty()) {
+ IsFirst = false;
+ Out << "cpu_" << Info.CPU;
+ }
+
+ assert(Info.Features.empty() && "unhandled case");
+ for (StringRef Feat : Info.Features) {
+ if (!IsFirst)
+ Out << '_';
+ IsFirst = false;
+ Out << Feat.substr(1);
+ }
+}
+
class AIXTargetCodeGenInfo : public TargetCodeGenInfo {
const bool Is64Bit;
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 6fc749464586d..83bd1206e2009 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -55,6 +55,7 @@
#include "clang/Sema/SemaObjC.h"
#include "clang/Sema/SemaOpenCL.h"
#include "clang/Sema/SemaOpenMP.h"
+#include "clang/Sema/SemaPPC.h"
#include "clang/Sema/SemaRISCV.h"
#include "clang/Sema/SemaSYCL.h"
#include "clang/Sema/SemaSwift.h"
@@ -3642,6 +3643,9 @@ static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
if (S.X86().checkTargetClonesAttr(Params, Locations, NewParams,
AL.getLoc()))
return;
+ } else if (S.Context.getTargetInfo().getTriple().isOSAIX()) {
+ if (S.PPC().checkTargetClonesAttr(Params, Locations, NewParams))
+ return;
}
Params.clear();
for (auto &SmallStr : NewParams)
diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp
index f48c975ff1106..0521db9ac950a 100644
--- a/clang/lib/Sema/SemaPPC.cpp
+++ b/clang/lib/Sema/SemaPPC.cpp
@@ -573,4 +573,60 @@ bool SemaPPC::BuiltinVSX(CallExpr *TheCall) {
return false;
}
+bool SemaPPC::checkTargetClonesAttr(
+ SmallVectorImpl<StringRef> &Params, SmallVectorImpl<SourceLocation> &Locs,
+ SmallVectorImpl<SmallString<64>> &NewParams) {
+ using namespace DiagAttrParams;
+
+ assert(Params.size() == Locs.size() &&
+ "Mismatch between number of string parameters and locations");
+
+ bool HasDefault = false;
+ bool HasComma = false;
+ for (unsigned I = 0, E = Params.size(); I < E; ++I) {
+ const StringRef Param = Params[I].trim();
+ const SourceLocation &Loc = Locs[I];
+
+ if (Param.empty() || Param.ends_with(','))
+ return Diag(Loc, diag::warn_unsupported_target_attribute)
+ << Unsupported << None << "" << TargetClones;
+
+ if (Param.contains(','))
+ HasComma = true;
+
+ StringRef LHS;
+ StringRef RHS = Param;
+ do {
+ std::tie(LHS, RHS) = RHS.split(',');
+ LHS = LHS.trim();
+ const SourceLocation &CurLoc =
+ Loc.getLocWithOffset(LHS.data() - Param.data());
+
+ if (LHS.starts_with("cpu=")) {
+ if (!getASTContext().getTargetInfo().isValidCPUName(
+ LHS.drop_front(sizeof("cpu=") - 1)))
+ return Diag(CurLoc, diag::warn_unsupported_target_attribute)
+ << Unsupported << CPU << LHS.drop_front(sizeof("cpu=") - 1)
+ << TargetClones;
+ } else if (LHS == "default")
+ HasDefault = true;
+ else if (!getASTContext().getTargetInfo().isValidFeatureName(LHS) ||
+ getASTContext().getTargetInfo().getFMVPriority(LHS) == 0)
+ return Diag(CurLoc, diag::warn_unsupported_target_attribute)
+ << Unsupported << None << LHS << TargetClones;
+
+ if (llvm::is_contained(NewParams, LHS))
+ Diag(CurLoc, diag::warn_target_clone_duplicate_options);
+ // Note: Add even if there are duplicates, since it changes name mangling.
+ NewParams.push_back(LHS);
+ } while (!RHS.empty());
+ }
+ if (HasComma && Params.size() > 1)
+ Diag(Locs[0], diag::warn_target_clone_mixed_values);
+
+ if (!HasDefault)
+ return Diag(Locs[0], diag::err_target_clone_must_have_default);
+
+ return false;
+}
} // namespace clang
>From 4e62d88463187ecf4fcc0898280e29f14d514c36 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Sat, 27 Sep 2025 23:26:33 -0400
Subject: [PATCH 03/24] ignore target_clones on a declaration and internalize
the resolver and the clones
---
clang/lib/CodeGen/CodeGenModule.cpp | 40 ++++++++++++++++++++++++-----
1 file changed, 34 insertions(+), 6 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index bfb3d22173449..aa4874e861bab 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2127,6 +2127,19 @@ static bool isUniqueInternalLinkageDecl(GlobalDecl GD,
(CGM.getFunctionLinkage(GD) == llvm::GlobalValue::InternalLinkage);
}
+// On certain platforms, a declared (but not defined) FMV shall be treated
+// like a regular non-FMV function.
+static bool IgnoreFMVOnADeclaration(const llvm::Triple &Triple, const FunctionDecl *FD) {
+ if (!FD->isMultiVersion())
+ return false;
+
+ if (Triple.isOSAIX()) {
+ assert(FD->isTargetClonesMultiVersion());
+ return !FD->isDefined();
+ }
+ return false;
+}
+
static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
const NamedDecl *ND,
bool OmitMultiVersionMangling = false) {
@@ -2176,8 +2189,9 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
Out << CGM.getModuleNameHash();
}
- if (const auto *FD = dyn_cast<FunctionDecl>(ND))
- if (FD->isMultiVersion() && !OmitMultiVersionMangling) {
+ if (const auto *FD = dyn_cast<FunctionDecl>(ND)) {
+ if (FD->isMultiVersion() && !OmitMultiVersionMangling &&
+ !IgnoreFMVOnADeclaration(CGM.getTriple(), FD)) {
switch (FD->getMultiVersionKind()) {
case MultiVersionKind::CPUDispatch:
case MultiVersionKind::CPUSpecific:
@@ -2214,6 +2228,7 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
llvm_unreachable("None multiversion type isn't valid here");
}
}
+ }
// Make unique name for device side static file-scope variable for HIP.
if (CGM.getContext().shouldExternalize(ND) &&
@@ -4755,7 +4770,8 @@ getFMVPriority(const TargetInfo &TI,
static llvm::GlobalValue::LinkageTypes
getMultiversionLinkage(CodeGenModule &CGM, GlobalDecl GD) {
const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
- if (FD->getFormalLinkage() == Linkage::Internal)
+ if (FD->getFormalLinkage() == Linkage::Internal ||
+ CGM.getTriple().isOSAIX())
return llvm::GlobalValue::InternalLinkage;
return llvm::GlobalValue::WeakODRLinkage;
}
@@ -4789,7 +4805,7 @@ void CodeGenModule::emitMultiVersionFunctions() {
// For AArch64, a resolver is only emitted if a function marked with
// target_version("default")) or target_clones("default") is defined
// in this TU. For other architectures it is always emitted.
- bool ShouldEmitResolver = !getTarget().getTriple().isAArch64();
+ bool ShouldEmitResolver = !getTriple().isAArch64();
SmallVector<CodeGenFunction::FMVResolverOption, 10> Options;
llvm::DenseMap<llvm::Function *, const FunctionDecl *> DeclMap;
@@ -5142,8 +5158,11 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) {
llvm::Constant *Resolver = GetOrCreateLLVMFunction(
MangledName + ".resolver", ResolverType, GlobalDecl{},
/*ForVTable=*/false);
+
+ auto Linkage = getTriple().isOSAIX() ? getFunctionLinkage(GD) : getMultiversionLinkage(*this, GD);
+
llvm::GlobalIFunc *GIF =
- llvm::GlobalIFunc::create(DeclTy, AS, getMultiversionLinkage(*this, GD),
+ llvm::GlobalIFunc::create(DeclTy, AS, Linkage,
"", Resolver, &getModule());
GIF->setName(ResolverName);
SetCommonAttributes(FD, GIF);
@@ -5163,7 +5182,9 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) {
void CodeGenModule::setMultiVersionResolverAttributes(llvm::Function *Resolver,
GlobalDecl GD) {
const NamedDecl *D = dyn_cast_or_null<NamedDecl>(GD.getDecl());
- Resolver->setLinkage(getMultiversionLinkage(*this, GD));
+
+ auto ResolverLinkage = getTriple().isOSAIX() ? llvm::GlobalValue::InternalLinkage : getMultiversionLinkage(*this, GD);
+ Resolver->setLinkage(ResolverLinkage);
// Function body has to be emitted before calling setGlobalVisibility
// for Resolver to be considered as definition.
@@ -5242,6 +5263,10 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
AddDeferredMultiVersionResolverToEmit(GD);
NameWithoutMultiVersionMangling = getMangledNameImpl(
*this, GD, FD, /*OmitMultiVersionMangling=*/true);
+ } else if (IgnoreFMVOnADeclaration(getTriple(), FD)) {
+ // TODO this might not be necessary after fix in getMangledNameImpl
+ NameWithoutMultiVersionMangling = getMangledNameImpl(
+ *this, GD, FD, /*OmitMultiVersionMangling=*/true);
} else
return GetOrCreateMultiVersionResolver(GD);
}
@@ -6700,6 +6725,9 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
auto *Fn = cast<llvm::Function>(GV);
setFunctionLinkage(GD, Fn);
+ if (getTriple().isOSAIX() && D->isTargetClonesMultiVersion())
+ Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
+
// FIXME: this is redundant with part of setFunctionDefinitionAttributes
setGVProperties(Fn, GD);
>From 0813d6533a846e9cd582079929cd78567561a2e5 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 30 Sep 2025 15:27:07 -0400
Subject: [PATCH 04/24] fix PPCTargetInfo::parseTargetAttr
---
clang/lib/Basic/Targets/PPC.cpp | 26 ++++++++++++++------------
1 file changed, 14 insertions(+), 12 deletions(-)
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 7f8005db3cd3e..dd51b46727a6a 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -694,18 +694,20 @@ ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const {
// While we're here iterating check for a different target cpu.
if (Feature.starts_with("cpu=")) {
- assert(Ret.CPU.empty());
- Ret.CPU = Feature.split("=").second.trim();
- } else assert(0);
-// else if (Feature.starts_with("tune=")) {
-// if (!Ret.Tune.empty())
-// Ret.Duplicate = "tune=";
-// else
-// Ret.Tune = Feature.split("=").second.trim();
-// } else if (Feature.starts_with("no-"))
-// Ret.Features.push_back("-" + Feature.split("-").second.str());
-// else
-// Ret.Features.push_back("+" + Feature.str());
+ if (!Ret.CPU.empty())
+ Ret.Duplicate = "cpu=";
+ else
+ Ret.CPU = Feature.split("=").second.trim();
+ }
+ else if (Feature.starts_with("tune=")) {
+ if (!Ret.Tune.empty())
+ Ret.Duplicate = "tune=";
+ else
+ Ret.Tune = Feature.split("=").second.trim();
+ } else if (Feature.starts_with("no-"))
+ Ret.Features.push_back("-" + Feature.split("-").second.str());
+ else
+ Ret.Features.push_back("+" + Feature.str());
}
return Ret;
}
>From c46dc5c709190d4bbbc287cc16b10b7e596997cb Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 30 Sep 2025 19:38:35 +0000
Subject: [PATCH 05/24] fix Sema/attr-target.c
---
clang/test/Sema/attr-target.c | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/clang/test/Sema/attr-target.c b/clang/test/Sema/attr-target.c
index 65ece3c27d299..ddf6654632187 100644
--- a/clang/test/Sema/attr-target.c
+++ b/clang/test/Sema/attr-target.c
@@ -75,15 +75,13 @@ int __attribute__((target("tune=pwr8"))) baz(void) { return 4; }
//expected-warning at +1 {{unsupported 'fpmath=' in the 'target' attribute string; 'target' attribute ignored}}
int __attribute__((target("fpmath=387"))) walrus(void) { return 4; }
//expected-warning at +1 {{unknown CPU 'hiss' in the 'target' attribute string; 'target' attribute ignored}}
-int __attribute__((target("float128,arch=hiss"))) meow(void) { return 4; }
+int __attribute__((target("float128,cpu=hiss"))) meow(void) { return 4; }
// no warning, same as saying 'nothing'.
-int __attribute__((target("arch="))) turtle(void) { return 4; }
+int __attribute__((target("cpu="))) turtle(void) { return 4; }
//expected-warning at +1 {{unknown CPU 'hiss' in the 'target' attribute string; 'target' attribute ignored}}
-int __attribute__((target("arch=hiss,arch=woof"))) pine_tree(void) { return 4; }
-//expected-warning at +1 {{duplicate 'arch=' in the 'target' attribute string; 'target' attribute ignored}}
-int __attribute__((target("arch=pwr9,arch=pwr10"))) oak_tree(void) { return 4; }
-//expected-warning at +1 {{unsupported 'branch-protection' in the 'target' attribute string; 'target' attribute ignored}}
-int __attribute__((target("branch-protection=none"))) birch_tree(void) { return 5; }
+int __attribute__((target("cpu=hiss,cpu=woof"))) pine_tree(void) { return 4; }
+//expected-warning at +1 {{duplicate 'cpu=' in the 'target' attribute string; 'target' attribute ignored}}
+int __attribute__((target("cpu=pwr9,cpu=pwr10"))) oak_tree(void) { return 4; }
//expected-warning at +1 {{unknown tune CPU 'hiss' in the 'target' attribute string; 'target' attribute ignored}}
int __attribute__((target("tune=hiss,tune=woof"))) apple_tree(void) { return 4; }
>From dcb0c9262ff8847daaec83b13dc94d92ca8e3786 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 7 Oct 2025 23:11:21 +0000
Subject: [PATCH 06/24] clang-format
---
clang/lib/AST/ASTContext.cpp | 5 +++-
clang/lib/Basic/Targets/PPC.cpp | 3 +--
clang/lib/CodeGen/CodeGenFunction.cpp | 31 ++++++++++++++----------
clang/lib/CodeGen/CodeGenFunction.h | 5 ++--
clang/lib/CodeGen/CodeGenModule.cpp | 29 ++++++++++++----------
clang/lib/CodeGen/TargetBuiltins/PPC.cpp | 14 +++++------
clang/lib/CodeGen/Targets/PPC.cpp | 5 ++--
7 files changed, 51 insertions(+), 41 deletions(-)
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index dd97009ac2404..1bb912fdbb8f7 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -15264,7 +15264,10 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex());
if (VersionStr.starts_with("arch="))
TargetCPU = VersionStr.drop_front(sizeof("arch=") - 1);
- else if (Target->getTriple().isOSAIX() && VersionStr.starts_with("cpu=")) // TODO make a function that extracts CPU from a feature string
+ else if (Target->getTriple().isOSAIX() &&
+ VersionStr.starts_with(
+ "cpu=")) // TODO make a function that extracts CPU from a
+ // feature string
TargetCPU = VersionStr.drop_front(sizeof("cpu=") - 1);
else if (VersionStr != "default")
Features.push_back((StringRef{"+"} + VersionStr).str());
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index dd51b46727a6a..5b86f84264905 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -698,8 +698,7 @@ ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const {
Ret.Duplicate = "cpu=";
else
Ret.CPU = Feature.split("=").second.trim();
- }
- else if (Feature.starts_with("tune=")) {
+ } else if (Feature.starts_with("tune=")) {
if (!Ret.Tune.empty())
Ret.Duplicate = "tune=";
else
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 21752e90552b2..7881f95334f0b 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -3085,7 +3085,8 @@ void CodeGenFunction::EmitMultiVersionResolver(
}
[[fallthrough]];
default:
- assert(false && "Only implemented for x86, AArch64, RISC-V, and PowerPC targets");
+ assert(false &&
+ "Only implemented for x86, AArch64, RISC-V, and PowerPC targets");
}
}
@@ -3098,7 +3099,7 @@ void CodeGenFunction::EmitMultiVersionResolver(
* return ((int (*)(int)) foo_desc)(a);
*/
void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
- llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) {
+ llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) {
llvm::PointerType *PtrTy = Builder.getPtrTy();
// entry:
@@ -3112,7 +3113,8 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
// if.default:
// %fmv.default = call ptr @getEntryPoint(ptr noundef @foo_default)
// br label %resolver_exit
- assert(&RO == Options.end() - 1 && "Default or Generic case must be last");
+ assert(&RO == Options.end() - 1 &&
+ "Default or Generic case must be last");
Builder.CreateRet(RO.Function);
break;
}
@@ -3123,12 +3125,16 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
// if.version_n:
// %fmv.version.n = call ptr @getEntryPoint(ptr noundef @foo_version_n)
// br label %resolver_exit
- assert(RO.Features.size() == 1 && "for now one feature requirement per version");
+ assert(RO.Features.size() == 1 &&
+ "for now one feature requirement per version");
llvm::Value *Condition;
if (RO.Features[0].starts_with("cpu=")) {
- Condition = EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_is, Builder.getInt1Ty(), RO.Features[0].split("=").second.trim());
+ Condition =
+ EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_is, Builder.getInt1Ty(),
+ RO.Features[0].split("=").second.trim());
} else {
- Condition = EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_supports, Builder.getInt1Ty(), RO.Features[0]);
+ Condition = EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_supports,
+ Builder.getInt1Ty(), RO.Features[0]);
}
llvm::BasicBlock *ThenBlock = createBasicBlock("if.version", Resolver);
CurBlock = createBasicBlock("if.else", Resolver);
@@ -3139,15 +3145,14 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
}
// If no generic/default, emit an unreachable.
-// Builder.SetInsertPoint(CurBlock);
-// llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap);
-// TrapCall->setDoesNotReturn();
-// TrapCall->setDoesNotThrow();
-// Builder.CreateUnreachable();
-// Builder.ClearInsertionPoint();
+ // Builder.SetInsertPoint(CurBlock);
+ // llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap);
+ // TrapCall->setDoesNotReturn();
+ // TrapCall->setDoesNotThrow();
+ // Builder.CreateUnreachable();
+ // Builder.ClearInsertionPoint();
}
-
void CodeGenFunction::EmitRISCVMultiVersionResolver(
llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 07bb9fae5cd85..85c058ba237ee 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4912,7 +4912,8 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Value *BuildVector(ArrayRef<llvm::Value *> Ops);
llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
- llvm::Value *EmitPPCBuiltinCpu(unsigned BuiltinID, llvm::Type *ReturnType, StringRef CPUStr);
+ llvm::Value *EmitPPCBuiltinCpu(unsigned BuiltinID, llvm::Type *ReturnType,
+ StringRef CPUStr);
llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
@@ -5580,7 +5581,7 @@ class CodeGenFunction : public CodeGenTypeCache {
void EmitRISCVMultiVersionResolver(llvm::Function *Resolver,
ArrayRef<FMVResolverOption> Options);
void EmitPPCAIXMultiVersionResolver(llvm::Function *Resolver,
- ArrayRef<FMVResolverOption> Options);
+ ArrayRef<FMVResolverOption> Options);
Address EmitAddressOfPFPField(Address RecordPtr, const PFPField &Field);
Address EmitAddressOfPFPField(Address RecordPtr, Address FieldPtr,
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index aa4874e861bab..871b25f24e6a5 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2129,7 +2129,8 @@ static bool isUniqueInternalLinkageDecl(GlobalDecl GD,
// On certain platforms, a declared (but not defined) FMV shall be treated
// like a regular non-FMV function.
-static bool IgnoreFMVOnADeclaration(const llvm::Triple &Triple, const FunctionDecl *FD) {
+static bool IgnoreFMVOnADeclaration(const llvm::Triple &Triple,
+ const FunctionDecl *FD) {
if (!FD->isMultiVersion())
return false;
@@ -2191,7 +2192,7 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
if (const auto *FD = dyn_cast<FunctionDecl>(ND)) {
if (FD->isMultiVersion() && !OmitMultiVersionMangling &&
- !IgnoreFMVOnADeclaration(CGM.getTriple(), FD)) {
+ !IgnoreFMVOnADeclaration(CGM.getTriple(), FD)) {
switch (FD->getMultiVersionKind()) {
case MultiVersionKind::CPUDispatch:
case MultiVersionKind::CPUSpecific:
@@ -3068,8 +3069,9 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
// While we populated the feature map above, we still need to
// get and parse the target attribute so we can get the cpu for
// the function.
- StringRef FeatureStr = TD ? TD->getFeaturesStr() :
- (TC ? TC->getFeatureStr(GD.getMultiVersionIndex()) : StringRef());
+ StringRef FeatureStr =
+ TD ? TD->getFeaturesStr()
+ : (TC ? TC->getFeatureStr(GD.getMultiVersionIndex()) : StringRef());
if (!FeatureStr.empty()) {
ParsedTargetAttr ParsedAttr = Target.parseTargetAttr(FeatureStr);
if (!ParsedAttr.CPU.empty() &&
@@ -4770,8 +4772,7 @@ getFMVPriority(const TargetInfo &TI,
static llvm::GlobalValue::LinkageTypes
getMultiversionLinkage(CodeGenModule &CGM, GlobalDecl GD) {
const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
- if (FD->getFormalLinkage() == Linkage::Internal ||
- CGM.getTriple().isOSAIX())
+ if (FD->getFormalLinkage() == Linkage::Internal || CGM.getTriple().isOSAIX())
return llvm::GlobalValue::InternalLinkage;
return llvm::GlobalValue::WeakODRLinkage;
}
@@ -4858,7 +4859,7 @@ void CodeGenModule::emitMultiVersionFunctions() {
ResolverConstant = IFunc->getResolver();
if (FD->isTargetClonesMultiVersion() &&
!getTarget().getTriple().isAArch64() &&
- !getTarget().getTriple().isOSAIX()) {
+ !getTarget().getTriple().isOSAIX()) {
std::string MangledName = getMangledNameImpl(
*this, GD, FD, /*OmitMultiVersionMangling=*/true);
if (!GetGlobalValue(MangledName + ".ifunc")) {
@@ -5159,11 +5160,11 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) {
MangledName + ".resolver", ResolverType, GlobalDecl{},
/*ForVTable=*/false);
- auto Linkage = getTriple().isOSAIX() ? getFunctionLinkage(GD) : getMultiversionLinkage(*this, GD);
+ auto Linkage = getTriple().isOSAIX() ? getFunctionLinkage(GD)
+ : getMultiversionLinkage(*this, GD);
- llvm::GlobalIFunc *GIF =
- llvm::GlobalIFunc::create(DeclTy, AS, Linkage,
- "", Resolver, &getModule());
+ llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create(DeclTy, AS, Linkage, "",
+ Resolver, &getModule());
GIF->setName(ResolverName);
SetCommonAttributes(FD, GIF);
if (ResolverGV)
@@ -5183,7 +5184,9 @@ void CodeGenModule::setMultiVersionResolverAttributes(llvm::Function *Resolver,
GlobalDecl GD) {
const NamedDecl *D = dyn_cast_or_null<NamedDecl>(GD.getDecl());
- auto ResolverLinkage = getTriple().isOSAIX() ? llvm::GlobalValue::InternalLinkage : getMultiversionLinkage(*this, GD);
+ auto ResolverLinkage = getTriple().isOSAIX()
+ ? llvm::GlobalValue::InternalLinkage
+ : getMultiversionLinkage(*this, GD);
Resolver->setLinkage(ResolverLinkage);
// Function body has to be emitted before calling setGlobalVisibility
@@ -6726,7 +6729,7 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
setFunctionLinkage(GD, Fn);
if (getTriple().isOSAIX() && D->isTargetClonesMultiVersion())
- Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
+ Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
// FIXME: this is redundant with part of setFunctionDefinitionAttributes
setGVProperties(Fn, GD);
diff --git a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
index f3b1147ebcbbc..59001fbccc053 100644
--- a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
@@ -70,8 +70,9 @@ static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF,
return CI;
}
-Value *CodeGenFunction::EmitPPCBuiltinCpu(
- unsigned BuiltinID, llvm::Type *ReturnType, StringRef CPUStr) {
+Value *CodeGenFunction::EmitPPCBuiltinCpu(unsigned BuiltinID,
+ llvm::Type *ReturnType,
+ StringRef CPUStr) {
#include "llvm/TargetParser/PPCTargetParser.def"
auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
@@ -159,8 +160,7 @@ Value *CodeGenFunction::EmitPPCBuiltinCpu(
Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is");
return Builder.CreateICmpEQ(TheCall,
llvm::ConstantInt::get(Int32Ty, LinuxIDValue));
- }
- else if (BuiltinID == Builtin::BI__builtin_cpu_supports) {
+ } else if (BuiltinID == Builtin::BI__builtin_cpu_supports) {
llvm::Triple Triple = getTarget().getTriple();
if (Triple.isOSAIX()) {
typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
@@ -197,8 +197,7 @@ Value *CodeGenFunction::EmitPPCBuiltinCpu(
#undef PPC_FAWORD_HWCAP
#undef PPC_FAWORD_HWCAP2
#undef PPC_FAWORD_CPUID
- }
- else
+ } else
assert(0 && "unexpected builtin");
}
@@ -219,7 +218,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
Intrinsic::ID ID = Intrinsic::not_intrinsic;
switch (BuiltinID) {
- default: return nullptr;
+ default:
+ return nullptr;
case Builtin::BI__builtin_cpu_is:
case Builtin::BI__builtin_cpu_supports: {
diff --git a/clang/lib/CodeGen/Targets/PPC.cpp b/clang/lib/CodeGen/Targets/PPC.cpp
index bc357e0908a5e..61d110e3c7ae3 100644
--- a/clang/lib/CodeGen/Targets/PPC.cpp
+++ b/clang/lib/CodeGen/Targets/PPC.cpp
@@ -136,8 +136,7 @@ class AIXABIInfo : public ABIInfo {
raw_ostream &Out) const override;
};
-void AIXABIInfo::appendAttributeMangling(TargetClonesAttr *Attr,
- unsigned Index,
+void AIXABIInfo::appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index,
raw_ostream &Out) const {
appendAttributeMangling(Attr->getFeatureStr(Index), Out);
}
@@ -159,7 +158,7 @@ void AIXABIInfo::appendAttributeMangling(StringRef AttrStr,
assert(LHS.starts_with("+") && RHS.starts_with("+") &&
"Features should always have a prefix.");
return TI.getFMVPriority({LHS.substr(1)})
- .ugt(TI.getFMVPriority({RHS.substr(1)}));
+ .ugt(TI.getFMVPriority({RHS.substr(1)}));
});
bool IsFirst = true;
>From b3be1240061edf5a4318877a92cd6ac6bee4460b Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Fri, 6 Feb 2026 04:12:47 +0000
Subject: [PATCH 07/24] normalize the CPU name on the target* attribute
---
clang/lib/Basic/Targets/PPC.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 5b86f84264905..53c7af1b57767 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -708,6 +708,8 @@ ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const {
else
Ret.Features.push_back("+" + Feature.str());
}
+ Ret.CPU = llvm::PPC::normalizeCPUName(Ret.CPU);
+ Ret.Tune = llvm::PPC::normalizeCPUName(Ret.Tune);
return Ret;
}
>From 1322dd973266b955070a24e0af2d3392bf3a78c4 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 3 Mar 2026 17:10:55 +0000
Subject: [PATCH 08/24] limit support to cpu-only versions, and implement
getFMVPriority
---
clang/lib/AST/ASTContext.cpp | 13 ++++++-----
clang/lib/Basic/Targets/PPC.cpp | 18 ++++++++++++++-
clang/lib/CodeGen/CodeGenFunction.cpp | 32 +++++++++++++--------------
clang/lib/CodeGen/Targets/PPC.cpp | 24 ++++----------------
clang/lib/Sema/SemaPPC.cpp | 13 ++++++-----
5 files changed, 52 insertions(+), 48 deletions(-)
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 1bb912fdbb8f7..dce33304009ca 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -15259,16 +15259,19 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
Target->getTargetOpts().FeaturesAsWritten.begin(),
Target->getTargetOpts().FeaturesAsWritten.end());
Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features);
+ } else if (Target->getTriple().isOSAIX()) {
+ std::vector<std::string> Features;
+ StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex());
+ if (VersionStr.starts_with("cpu="))
+ TargetCPU = VersionStr.drop_front(sizeof("cpu=") - 1);
+ else
+ assert(VersionStr == "default");
+ Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features);
} else {
std::vector<std::string> Features;
StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex());
if (VersionStr.starts_with("arch="))
TargetCPU = VersionStr.drop_front(sizeof("arch=") - 1);
- else if (Target->getTriple().isOSAIX() &&
- VersionStr.starts_with(
- "cpu=")) // TODO make a function that extracts CPU from a
- // feature string
- TargetCPU = VersionStr.drop_front(sizeof("cpu=") - 1);
else if (VersionStr != "default")
Features.push_back((StringRef{"+"} + VersionStr).str());
Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features);
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 53c7af1b57767..fbff0af711b13 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -714,7 +714,23 @@ ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const {
}
llvm::APInt PPCTargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
- return llvm::APInt(32, Features.empty() ? 0 : 1);
+ if (Features.empty())
+ return llvm::APInt(32, 0);
+ assert(Features.size() == 1 && "one feature/cpu per clone on PowerPC");
+ ParsedTargetAttr ParsedAttr = parseTargetAttr(Features[0]);
+ if (!ParsedAttr.CPU.empty()) {
+ StringRef CPU = llvm::PPC::normalizeCPUName(ParsedAttr.CPU);
+ int Priority = llvm::StringSwitch<int>(CPU)
+ .Case("pwr7", 1)
+ .Case("pwr8", 2)
+ .Case("pwr9", 3)
+ .Case("pwr10", 4)
+ .Case("pwr11", 5)
+ .Default(0);
+ return llvm::APInt(32, Priority);
+ }
+ assert(false && "unimplemented");
+ return llvm::APInt(32, 0);
}
// Make sure that registers are added in the correct array index which should be
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 7881f95334f0b..ca68f31306a29 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -3090,13 +3090,8 @@ void CodeGenFunction::EmitMultiVersionResolver(
}
}
-/*
- * Desc_t *foo_desc = ppc_get_function_descriptor(&foo);
- * if (foo_desc->addr == ppc_get_function_entry(&foo)) {
- * FuncPtr fp = resolver();
- * __c11_atomic_store((_Atomic FuncPtr *)&foo_desc->addr, fp, 0);
- * }
- * return ((int (*)(int)) foo_desc)(a);
+/**
+ *
*/
void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) {
@@ -3127,15 +3122,20 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
// br label %resolver_exit
assert(RO.Features.size() == 1 &&
"for now one feature requirement per version");
- llvm::Value *Condition;
- if (RO.Features[0].starts_with("cpu=")) {
- Condition =
- EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_is, Builder.getInt1Ty(),
- RO.Features[0].split("=").second.trim());
- } else {
- Condition = EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_supports,
- Builder.getInt1Ty(), RO.Features[0]);
- }
+
+ assert(RO.Features[0].starts_with("cpu="));
+ StringRef CPU = RO.Features[0].split("=").second.trim();
+ StringRef Feature = llvm::StringSwitch<StringRef>(CPU)
+ .Cases({"power7","pwr7"}, "arch_2_06")
+ .Cases({"power8","pwr8"}, "arch_2_07")
+ .Cases({"power9","pwr9"}, "arch_3_00")
+ .Cases({"power10","pwr10"}, "arch_3_1")
+ .Cases({"power11","pwr11"}, "arch_3_1")
+ .Default("error");
+
+ llvm::Value *Condition = EmitPPCBuiltinCpu(
+ Builtin::BI__builtin_cpu_supports, Builder.getInt1Ty(), Feature);
+
llvm::BasicBlock *ThenBlock = createBasicBlock("if.version", Resolver);
CurBlock = createBasicBlock("if.else", Resolver);
Builder.CreateCondBr(Condition, ThenBlock, CurBlock);
diff --git a/clang/lib/CodeGen/Targets/PPC.cpp b/clang/lib/CodeGen/Targets/PPC.cpp
index 61d110e3c7ae3..6694725277144 100644
--- a/clang/lib/CodeGen/Targets/PPC.cpp
+++ b/clang/lib/CodeGen/Targets/PPC.cpp
@@ -148,32 +148,16 @@ void AIXABIInfo::appendAttributeMangling(StringRef AttrStr,
return;
}
- Out << '.';
const TargetInfo &TI = CGT.getTarget();
ParsedTargetAttr Info = TI.parseTargetAttr(AttrStr);
- llvm::sort(Info.Features, [&TI](StringRef LHS, StringRef RHS) {
- // Multiversioning doesn't allow "no-${feature}", so we can
- // only have "+" prefixes here.
- assert(LHS.starts_with("+") && RHS.starts_with("+") &&
- "Features should always have a prefix.");
- return TI.getFMVPriority({LHS.substr(1)})
- .ugt(TI.getFMVPriority({RHS.substr(1)}));
- });
-
- bool IsFirst = true;
if (!Info.CPU.empty()) {
- IsFirst = false;
- Out << "cpu_" << Info.CPU;
+ assert(Info.Features.empty() && "cannot have both a CPU and a feature");
+ Out << ".cpu_" << Info.CPU;
+ return;
}
- assert(Info.Features.empty() && "unhandled case");
- for (StringRef Feat : Info.Features) {
- if (!IsFirst)
- Out << '_';
- IsFirst = false;
- Out << Feat.substr(1);
- }
+ assert(false && "specifying target features on an FMV is not supported on AIX");
}
class AIXTargetCodeGenInfo : public TargetCodeGenInfo {
diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp
index 0521db9ac950a..60270d82908fb 100644
--- a/clang/lib/Sema/SemaPPC.cpp
+++ b/clang/lib/Sema/SemaPPC.cpp
@@ -608,16 +608,17 @@ bool SemaPPC::checkTargetClonesAttr(
return Diag(CurLoc, diag::warn_unsupported_target_attribute)
<< Unsupported << CPU << LHS.drop_front(sizeof("cpu=") - 1)
<< TargetClones;
- } else if (LHS == "default")
+ } else if (LHS == "default") {
HasDefault = true;
- else if (!getASTContext().getTargetInfo().isValidFeatureName(LHS) ||
- getASTContext().getTargetInfo().getFMVPriority(LHS) == 0)
+ } else if (!getASTContext().getTargetInfo().isValidFeatureName(LHS) ||
+ getASTContext().getTargetInfo().getFMVPriority(LHS) == 0) {
return Diag(CurLoc, diag::warn_unsupported_target_attribute)
<< Unsupported << None << LHS << TargetClones;
-
- if (llvm::is_contained(NewParams, LHS))
+ }
+ if (llvm::is_contained(NewParams, LHS)) {
Diag(CurLoc, diag::warn_target_clone_duplicate_options);
- // Note: Add even if there are duplicates, since it changes name mangling.
+ continue;
+ }
NewParams.push_back(LHS);
} while (!RHS.empty());
}
>From d898e694b4a382ca41613e893beff4698bca547a Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Mon, 9 Mar 2026 18:35:26 +0000
Subject: [PATCH 09/24] Handle case when an FMV function is declared, used,
then defined by: fixing getMangledNameImpl such that it does not need to
special case for FMV declarations because GetOrCreateLLVMFunction already can
return the non-mangled name of declared FMV functions
---
clang/lib/CodeGen/CodeGenModule.cpp | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 871b25f24e6a5..c0f2870258acc 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2191,8 +2191,7 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
}
if (const auto *FD = dyn_cast<FunctionDecl>(ND)) {
- if (FD->isMultiVersion() && !OmitMultiVersionMangling &&
- !IgnoreFMVOnADeclaration(CGM.getTriple(), FD)) {
+ if (FD->isMultiVersion() && !OmitMultiVersionMangling) {
switch (FD->getMultiVersionKind()) {
case MultiVersionKind::CPUDispatch:
case MultiVersionKind::CPUSpecific:
@@ -5267,7 +5266,6 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
NameWithoutMultiVersionMangling = getMangledNameImpl(
*this, GD, FD, /*OmitMultiVersionMangling=*/true);
} else if (IgnoreFMVOnADeclaration(getTriple(), FD)) {
- // TODO this might not be necessary after fix in getMangledNameImpl
NameWithoutMultiVersionMangling = getMangledNameImpl(
*this, GD, FD, /*OmitMultiVersionMangling=*/true);
} else
>From caf6c943b2db400b5246e71631ed37437ce128e6 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 3 Mar 2026 19:04:28 -0500
Subject: [PATCH 10/24] test
test
test
---
clang/lib/CodeGen/CodeGenFunction.cpp | 23 +++-
clang/test/CodeGen/attr-target-clones-ppc.c | 116 ++++++++++++++++++++
2 files changed, 133 insertions(+), 6 deletions(-)
create mode 100644 clang/test/CodeGen/attr-target-clones-ppc.c
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index ca68f31306a29..206cab9fda302 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -3091,7 +3091,20 @@ void CodeGenFunction::EmitMultiVersionResolver(
}
/**
+ * define internal ptr @foo.resolver() {
+ * entry:
+ * %is_version_1 = __builtin_cpu_supports(version_1)
+ * br i1 %1, label %if.version_1, label %if.else_2
*
+ * if.version_1:
+ * ret ptr @foo.version_1
+ *
+ * if.else_2:
+ * %is_version_2 = __builtin_cpu_supports(version_2)
+ * ...
+ * if.else: ; preds = %entry
+ * ret ptr @foo.default
+ * }
*/
void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) {
@@ -3105,9 +3118,8 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
Builder.SetInsertPoint(CurBlock);
// The 'default' or 'generic' case.
if (!RO.Architecture && RO.Features.empty()) {
- // if.default:
- // %fmv.default = call ptr @getEntryPoint(ptr noundef @foo_default)
- // br label %resolver_exit
+ // if.else:
+ // ret ptr @foo.default
assert(&RO == Options.end() - 1 &&
"Default or Generic case must be last");
Builder.CreateRet(RO.Function);
@@ -3115,11 +3127,10 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
}
// if.else_n:
// %is_version_n = __builtin_cpu_supports(version_n)
- // br i1 %is_version_n, label %if.version_n, label %if.default
+ // br i1 %is_version_n, label %if.version_n, label %if.else_n+1
//
// if.version_n:
- // %fmv.version.n = call ptr @getEntryPoint(ptr noundef @foo_version_n)
- // br label %resolver_exit
+ // ret ptr @foo_version_n
assert(RO.Features.size() == 1 &&
"for now one feature requirement per version");
diff --git a/clang/test/CodeGen/attr-target-clones-ppc.c b/clang/test/CodeGen/attr-target-clones-ppc.c
new file mode 100644
index 0000000000000..08d54a391b58f
--- /dev/null
+++ b/clang/test/CodeGen/attr-target-clones-ppc.c
@@ -0,0 +1,116 @@
+// RUN: %clang_cc1 -triple powerpc-ibm-aix-xcoff -target-cpu pwr7 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64-ibm-aix-xcoff -target-cpu pwr7 -emit-llvm %s -o - | FileCheck %s
+
+// CHECK: @internal = internal ifunc i32 (), ptr @internal.resolver
+// CHECK: @foo = ifunc i32 (), ptr @foo.resolver
+// CHECK: @foo_dupes = ifunc void (), ptr @foo_dupes.resolver
+// CHECK: @unused = ifunc void (), ptr @unused.resolver
+// CHECK: @foo_inline = linkonce ifunc i32 (), ptr @foo_inline.resolver
+// CHECK: @foo_ref_then_def = ifunc i32 (), ptr @foo_ref_then_def.resolver
+// CHECK: @foo_priority = ifunc i32 (i32), ptr @foo_priority.resolver
+// CHEECK: @isa_level = ifunc i32 (i32), ptr @isa_level.resolver
+
+
+static int __attribute__((target_clones("cpu=power10, default"))) internal(void) { return 0; }
+int use(void) { return internal(); }
+// CHECK: define internal ptr @internal.resolver()
+
+int __attribute__((target_clones("cpu=power10, default"))) foo(void) { return 0; }
+// CHECK: define internal {{.*}}i32 @foo.cpu_pwr10() #[[#ATTR_P10:]]
+// CHECK: define internal {{.*}}i32 @foo.default() #[[#ATTR_P7:]]
+// CHECK: define internal ptr @foo.resolver()
+// CHECK: ret ptr @foo.cpu_pwr10
+// CHECK: ret ptr @foo.default
+
+__attribute__((target_clones("default,default ,cpu=pwr8"))) void foo_dupes(void) {}
+// CHECK: define internal void @foo_dupes.default() #[[#ATTR_P7]]
+// CHECK: define internal void @foo_dupes.cpu_pwr8() #[[#ATTR_P8:]]
+// CHECK: define internal ptr @foo_dupes.resolver()
+// CHECK: ret ptr @foo_dupes.cpu_pwr8
+// CHECK: ret ptr @foo_dupes.default
+
+void bar2(void) {
+ // CHECK: define {{.*}}void @bar2()
+ foo_dupes();
+ // CHECK: call void @foo_dupes()
+}
+
+int bar(void) {
+ // CHECK: define {{.*}}i32 @bar()
+ return foo();
+ // CHECK: call {{.*}}i32 @foo()
+}
+
+void __attribute__((target_clones("default, cpu=pwr9"))) unused(void) {}
+// CHECK: define internal void @unused.default() #[[#ATTR_P7]]
+// CHECK: define internal void @unused.cpu_pwr9() #[[#ATTR_P9:]]
+// CHECK: define internal ptr @unused.resolver()
+// CHECK: ret ptr @unused.cpu_pwr9
+// CHECK: ret ptr @unused.default
+
+int __attribute__((target_clones("cpu=power10, default"))) inherited(void);
+int inherited(void) { return 0; }
+// CHECK: define internal {{.*}}i32 @inherited.cpu_pwr10() #[[#ATTR_P10]]
+// CHECK: define internal {{.*}}i32 @inherited.default() #[[#ATTR_P7]]
+// CHECK: define internal ptr @inherited.resolver()
+// CHECK: ret ptr @inherited.cpu_pwr10
+// CHECK: ret ptr @inherited.default
+
+
+int test_inherited(void) {
+ // CHECK: define {{.*}}i32 @test_inherited()
+ return inherited();
+ // CHECK: call {{.*}}i32 @inherited()
+}
+
+inline int __attribute__((target_clones("default,cpu=pwr8")))
+foo_inline(void) { return 0; }
+int __attribute__((target_clones("cpu=pwr7,default")))
+foo_ref_then_def(void);
+
+int bar3(void) {
+ // CHECK: define {{.*}}i32 @bar3()
+ return foo_inline() + foo_ref_then_def();
+ // CHECK: call {{.*}}i32 @foo_inline()
+ // CHECK: call {{.*}}i32 @foo_ref_then_def()
+}
+
+// CHECK: define internal ptr @foo_inline.resolver()
+// CHECK: ret ptr @foo_inline.cpu_pwr8
+// CHECK: ret ptr @foo_inline.default
+
+int __attribute__((target_clones("cpu=pwr7,default")))
+foo_ref_then_def(void){ return 0; }
+// CHECK: define internal ptr @foo_ref_then_def.resolver()
+// CHECK: ret ptr @foo_ref_then_def.cpu_pwr7
+// CHECK: ret ptr @foo_ref_then_def.default
+
+int __attribute__((target_clones("default", "cpu=pwr8")))
+foo_unused_no_defn(void);
+// CHECK-NOT: foo_unused_no_defn
+
+int __attribute__((target_clones("default", "cpu=pwr9")))
+foo_used_no_defn(void);
+
+int test_foo_used_no_defn(void) {
+ // CHECK: define {{.*}}i32 @test_foo_used_no_defn()
+ return foo_used_no_defn();
+ // CHECK: call {{.*}}i32 @foo_used_no_defn()
+}
+// CHECK: declare {{.*}}i32 @foo_used_no_defn()
+
+// test that the CPU checks are done in most to least restrictive (highest to lowest CPU)
+int __attribute__((target_clones("cpu=pwr7", "cpu=pwr9", "default", "cpu=pwr8")))
+foo_priority(int x) { return x & (x - 1); }
+// CHECK: define internal ptr @foo_priority.resolver()
+// CHECK: ret ptr @foo_priority.cpu_pwr9
+// CHECK: ret ptr @foo_priority.cpu_pwr8
+// CHECK: ret ptr @foo_priority.cpu_pwr7
+// CHECK: ret ptr @foo_priority.default
+
+
+// CHECK: attributes #[[#ATTR_P7]] = {{.*}} "target-cpu"="pwr7"
+// CHECK: attributes #[[#ATTR_P10]] = {{.*}} "target-cpu"="pwr10"
+// CHECK: attributes #[[#ATTR_P8]] = {{.*}} "target-cpu"="pwr8"
+// CHECK: attributes #[[#ATTR_P9]] = {{.*}} "target-cpu"="pwr9"
+
>From 268d3fbec0de9573c95b7af18ed5d8ccb71f20dd Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 10 Mar 2026 04:02:06 +0000
Subject: [PATCH 11/24] clang-formt
---
clang/lib/CodeGen/CodeGenFunction.cpp | 12 ++++++------
clang/lib/CodeGen/Targets/PPC.cpp | 2 +-
clang/lib/Sema/SemaPPC.cpp | 2 +-
3 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 206cab9fda302..de63bbdaa7b55 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -3137,12 +3137,12 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
assert(RO.Features[0].starts_with("cpu="));
StringRef CPU = RO.Features[0].split("=").second.trim();
StringRef Feature = llvm::StringSwitch<StringRef>(CPU)
- .Cases({"power7","pwr7"}, "arch_2_06")
- .Cases({"power8","pwr8"}, "arch_2_07")
- .Cases({"power9","pwr9"}, "arch_3_00")
- .Cases({"power10","pwr10"}, "arch_3_1")
- .Cases({"power11","pwr11"}, "arch_3_1")
- .Default("error");
+ .Cases({"power7", "pwr7"}, "arch_2_06")
+ .Cases({"power8", "pwr8"}, "arch_2_07")
+ .Cases({"power9", "pwr9"}, "arch_3_00")
+ .Cases({"power10", "pwr10"}, "arch_3_1")
+ .Cases({"power11", "pwr11"}, "arch_3_1")
+ .Default("error");
llvm::Value *Condition = EmitPPCBuiltinCpu(
Builtin::BI__builtin_cpu_supports, Builder.getInt1Ty(), Feature);
diff --git a/clang/lib/CodeGen/Targets/PPC.cpp b/clang/lib/CodeGen/Targets/PPC.cpp
index 6694725277144..ab069bfbd1b51 100644
--- a/clang/lib/CodeGen/Targets/PPC.cpp
+++ b/clang/lib/CodeGen/Targets/PPC.cpp
@@ -157,7 +157,7 @@ void AIXABIInfo::appendAttributeMangling(StringRef AttrStr,
return;
}
- assert(false && "specifying target features on an FMV is not supported on AIX");
+ assert(0 && "specifying target features on an FMV is unsupported on AIX");
}
class AIXTargetCodeGenInfo : public TargetCodeGenInfo {
diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp
index 60270d82908fb..a4392ede2ac78 100644
--- a/clang/lib/Sema/SemaPPC.cpp
+++ b/clang/lib/Sema/SemaPPC.cpp
@@ -611,7 +611,7 @@ bool SemaPPC::checkTargetClonesAttr(
} else if (LHS == "default") {
HasDefault = true;
} else if (!getASTContext().getTargetInfo().isValidFeatureName(LHS) ||
- getASTContext().getTargetInfo().getFMVPriority(LHS) == 0) {
+ getASTContext().getTargetInfo().getFMVPriority(LHS) == 0) {
return Diag(CurLoc, diag::warn_unsupported_target_attribute)
<< Unsupported << None << LHS << TargetClones;
}
>From 02257f6539c05e4796e2fe190f4d4fddec979a51 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 10 Mar 2026 14:49:11 +0000
Subject: [PATCH 12/24] add IR codegen test
---
clang/test/CodeGen/attr-target-clones-ppc.c | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)
diff --git a/clang/test/CodeGen/attr-target-clones-ppc.c b/clang/test/CodeGen/attr-target-clones-ppc.c
index 08d54a391b58f..05a9b788f701a 100644
--- a/clang/test/CodeGen/attr-target-clones-ppc.c
+++ b/clang/test/CodeGen/attr-target-clones-ppc.c
@@ -99,16 +99,31 @@ int test_foo_used_no_defn(void) {
}
// CHECK: declare {{.*}}i32 @foo_used_no_defn()
-// test that the CPU checks are done in most to least restrictive (highest to lowest CPU)
-int __attribute__((target_clones("cpu=pwr7", "cpu=pwr9", "default", "cpu=pwr8")))
+// Test that the CPU conditions are checked from the most to the least
+// restrictive (highest to lowest CPU). Also test the codegen for the
+// conditions
+int __attribute__((target_clones("cpu=pwr10", "cpu=pwr7", "cpu=pwr9", "default", "cpu=pwr8")))
foo_priority(int x) { return x & (x - 1); }
// CHECK: define internal ptr @foo_priority.resolver()
+// CHECK-NEXT: entry
+// if (__builtin_cpu_supports("arch_3_1")) return &foo_priority.cpu_pwr10;
+// CHECK-NEXT: %[[#L1:]] = load i32, {{.*}} ptr @_system_configuration, i32 0, i32 1)
+// CHECK-NEXT: icmp uge i32 %[[#L1]], 262144
+// CHECK: ret ptr @foo_priority.cpu_pwr10
+// if (__builtin_cpu_supports("arch_3_00")) return &foo_priority.cpu_pwr9;
+// CHECK: %[[#L2:]] = load i32, {{.*}} ptr @_system_configuration, i32 0, i32 1)
+// CHECK-NEXT: icmp uge i32 %[[#L2]], 131072
// CHECK: ret ptr @foo_priority.cpu_pwr9
+// if (__builtin_cpu_supports("arch_2_07")) return &foo_priority.cpu_pwr8;
+// CHECK: %[[#L3:]] = load i32, {{.*}} ptr @_system_configuration, i32 0, i32 1)
+// CHECK-NEXT: icmp uge i32 %[[#L3]], 65536
// CHECK: ret ptr @foo_priority.cpu_pwr8
+// if (__builtin_cpu_supports("arch_2_06")) return &foo_priority.cpu_pwr8;
+// CHECK: %[[#L4:]] = load i32, {{.*}} ptr @_system_configuration, i32 0, i32 1)
+// CHECK-NEXT: icmp uge i32 %[[#L4]], 32768
// CHECK: ret ptr @foo_priority.cpu_pwr7
// CHECK: ret ptr @foo_priority.default
-
// CHECK: attributes #[[#ATTR_P7]] = {{.*}} "target-cpu"="pwr7"
// CHECK: attributes #[[#ATTR_P10]] = {{.*}} "target-cpu"="pwr10"
// CHECK: attributes #[[#ATTR_P8]] = {{.*}} "target-cpu"="pwr8"
>From 11313b0b352b57b4ea42acf97e1bf1ae715d5dc8 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 10 Mar 2026 15:17:18 +0000
Subject: [PATCH 13/24] code review
---
clang/include/clang/Basic/TargetInfo.h | 2 +-
clang/lib/CodeGen/TargetBuiltins/PPC.cpp | 57 ++++++++++++------------
2 files changed, 30 insertions(+), 29 deletions(-)
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 68160e9bd9b29..b08631baf2532 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1567,7 +1567,7 @@ class TargetInfo : public TransferrableTargetInfo,
/// which requires support for cpu_supports and cpu_is functionality.
bool supportsMultiVersioning() const {
return getTriple().isX86() || getTriple().isAArch64() ||
- getTriple().isRISCV() || getTriple().isOSBinFormatXCOFF();
+ getTriple().isRISCV() || getTriple().isOSAIX();
}
/// Identify whether this target supports IFuncs.
diff --git a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
index 59001fbccc053..721071308c251 100644
--- a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
@@ -73,6 +73,8 @@ static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF,
Value *CodeGenFunction::EmitPPCBuiltinCpu(unsigned BuiltinID,
llvm::Type *ReturnType,
StringRef CPUStr) {
+ assert(BuiltinID == Builtin::BI__builtin_cpu_is ||
+ BuiltinID == Builtin::BI__builtin_cpu_supports);
#include "llvm/TargetParser/PPCTargetParser.def"
auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
@@ -160,45 +162,44 @@ Value *CodeGenFunction::EmitPPCBuiltinCpu(unsigned BuiltinID,
Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is");
return Builder.CreateICmpEQ(TheCall,
llvm::ConstantInt::get(Int32Ty, LinuxIDValue));
- } else if (BuiltinID == Builtin::BI__builtin_cpu_supports) {
- llvm::Triple Triple = getTarget().getTriple();
- if (Triple.isOSAIX()) {
- typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
- unsigned>
- CPUSupportType;
- auto [SupportMethod, FieldIdx, Mask, CompOp, Value] =
- static_cast<CPUSupportType>(StringSwitch<CPUSupportType>(CPUStr)
+ }
+ // else BuiltinID == Builtin::BI__builtin_cpu_supports
+ llvm::Triple Triple = getTarget().getTriple();
+ if (Triple.isOSAIX()) {
+ typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
+ unsigned>
+ CPUSupportType;
+ auto [SupportMethod, FieldIdx, Mask, CompOp, Value] =
+ static_cast<CPUSupportType>(
+ StringSwitch<CPUSupportType>(CPUStr)
#define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \
VALUE) \
.Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE})
#include "llvm/TargetParser/PPCTargetParser.def"
- .Default({BUILTIN_PPC_FALSE, 0, 0,
- CmpInst::Predicate(), 0}));
- return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp,
- Value);
- }
+ .Default({BUILTIN_PPC_FALSE, 0, 0, CmpInst::Predicate(), 0}));
+ return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp,
+ Value);
+ }
- assert(Triple.isOSLinux() &&
- "__builtin_cpu_supports() is only supported for AIX and Linux.");
- auto [FeatureWord, BitMask] =
- StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
+ assert(Triple.isOSLinux() &&
+ "__builtin_cpu_supports() is only supported for AIX and Linux.");
+ auto [FeatureWord, BitMask] =
+ StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
#define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \
.Case(Name, {FA_WORD, Bitmask})
#include "llvm/TargetParser/PPCTargetParser.def"
- .Default({0, 0});
- if (!BitMask)
- return Builder.getFalse();
- Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
- llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
- Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
- Value *Mask =
- Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
- return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
+ .Default({0, 0});
+ if (!BitMask)
+ return Builder.getFalse();
+ Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
+ llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
+ Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
+ Value *Mask =
+ Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
+ return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
#undef PPC_FAWORD_HWCAP
#undef PPC_FAWORD_HWCAP2
#undef PPC_FAWORD_CPUID
- } else
- assert(0 && "unexpected builtin");
}
Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
>From 07e2abc988a0cdc8c0a44b01c2e165fc41d0a2a6 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 10 Mar 2026 16:34:59 +0000
Subject: [PATCH 14/24] inline the only call to IgnoreFMVOnADeclaration
---
clang/lib/CodeGen/CodeGenModule.cpp | 22 +++++++---------------
1 file changed, 7 insertions(+), 15 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index c0f2870258acc..c600eeb021772 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2127,20 +2127,6 @@ static bool isUniqueInternalLinkageDecl(GlobalDecl GD,
(CGM.getFunctionLinkage(GD) == llvm::GlobalValue::InternalLinkage);
}
-// On certain platforms, a declared (but not defined) FMV shall be treated
-// like a regular non-FMV function.
-static bool IgnoreFMVOnADeclaration(const llvm::Triple &Triple,
- const FunctionDecl *FD) {
- if (!FD->isMultiVersion())
- return false;
-
- if (Triple.isOSAIX()) {
- assert(FD->isTargetClonesMultiVersion());
- return !FD->isDefined();
- }
- return false;
-}
-
static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
const NamedDecl *ND,
bool OmitMultiVersionMangling = false) {
@@ -5265,7 +5251,13 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
AddDeferredMultiVersionResolverToEmit(GD);
NameWithoutMultiVersionMangling = getMangledNameImpl(
*this, GD, FD, /*OmitMultiVersionMangling=*/true);
- } else if (IgnoreFMVOnADeclaration(getTriple(), FD)) {
+ }
+ // On AIX, a declared (but not defined) FMV shall be treated like a
+ // regular non-FMV function. If a definition is later seen, then
+ // GetOrCreateMultiVersionResolver will get called (when processing said
+ // definition) which will replace the IR declaration we're creating here
+ // with the FMV ifunc.
+ else if (getTriple().isOSAIX() && !FD->isDefined()) {
NameWithoutMultiVersionMangling = getMangledNameImpl(
*this, GD, FD, /*OmitMultiVersionMangling=*/true);
} else
>From eaf36ebf4f96db5490bca4d8b3a964a36465ffc4 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 10 Mar 2026 17:01:28 +0000
Subject: [PATCH 15/24] code review: add const to parameters
---
clang/include/clang/Sema/SemaPPC.h | 4 ++--
clang/lib/Sema/SemaPPC.cpp | 3 ++-
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/clang/include/clang/Sema/SemaPPC.h b/clang/include/clang/Sema/SemaPPC.h
index 0cf6ba7ff29dd..8dcd638d3f722 100644
--- a/clang/include/clang/Sema/SemaPPC.h
+++ b/clang/include/clang/Sema/SemaPPC.h
@@ -54,8 +54,8 @@ class SemaPPC : public SemaBase {
// vector short vec_xxsldwi(vector short, vector short, int);
bool BuiltinVSX(CallExpr *TheCall);
- bool checkTargetClonesAttr(SmallVectorImpl<StringRef> &Params,
- SmallVectorImpl<SourceLocation> &Locs,
+ bool checkTargetClonesAttr(const SmallVectorImpl<StringRef> &Params,
+ const SmallVectorImpl<SourceLocation> &Locs,
SmallVectorImpl<SmallString<64>> &NewParams);
};
} // namespace clang
diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp
index a4392ede2ac78..cd7bbdd3b7a82 100644
--- a/clang/lib/Sema/SemaPPC.cpp
+++ b/clang/lib/Sema/SemaPPC.cpp
@@ -574,7 +574,8 @@ bool SemaPPC::BuiltinVSX(CallExpr *TheCall) {
}
bool SemaPPC::checkTargetClonesAttr(
- SmallVectorImpl<StringRef> &Params, SmallVectorImpl<SourceLocation> &Locs,
+ const SmallVectorImpl<StringRef> &Params,
+ const SmallVectorImpl<SourceLocation> &Locs,
SmallVectorImpl<SmallString<64>> &NewParams) {
using namespace DiagAttrParams;
>From d900078db76ae9dab1a21ef66e1fad25fb404fa9 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 10 Mar 2026 17:17:39 +0000
Subject: [PATCH 16/24] checkTargetClonesAttr: compute TargetInfo once
---
clang/lib/Sema/SemaPPC.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp
index cd7bbdd3b7a82..e38fb6687372b 100644
--- a/clang/lib/Sema/SemaPPC.cpp
+++ b/clang/lib/Sema/SemaPPC.cpp
@@ -582,6 +582,7 @@ bool SemaPPC::checkTargetClonesAttr(
assert(Params.size() == Locs.size() &&
"Mismatch between number of string parameters and locations");
+ auto &TargetInfo = getASTContext().getTargetInfo();
bool HasDefault = false;
bool HasComma = false;
for (unsigned I = 0, E = Params.size(); I < E; ++I) {
@@ -604,15 +605,14 @@ bool SemaPPC::checkTargetClonesAttr(
Loc.getLocWithOffset(LHS.data() - Param.data());
if (LHS.starts_with("cpu=")) {
- if (!getASTContext().getTargetInfo().isValidCPUName(
- LHS.drop_front(sizeof("cpu=") - 1)))
+ if (!TargetInfo.isValidCPUName(LHS.drop_front(sizeof("cpu=") - 1)))
return Diag(CurLoc, diag::warn_unsupported_target_attribute)
<< Unsupported << CPU << LHS.drop_front(sizeof("cpu=") - 1)
<< TargetClones;
} else if (LHS == "default") {
HasDefault = true;
- } else if (!getASTContext().getTargetInfo().isValidFeatureName(LHS) ||
- getASTContext().getTargetInfo().getFMVPriority(LHS) == 0) {
+ } else if (!TargetInfo.isValidFeatureName(LHS) ||
+ TargetInfo.getFMVPriority(LHS) == 0) {
return Diag(CurLoc, diag::warn_unsupported_target_attribute)
<< Unsupported << None << LHS << TargetClones;
}
>From 74e434202c0fc478afd529d09a46b595723a2731 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 10 Mar 2026 22:46:51 -0400
Subject: [PATCH 17/24] normalize CPU during Sema
fix Sema and create ppc target_clones tests based on the x86 test
---
clang/include/clang/Sema/SemaPPC.h | 3 +-
clang/lib/Basic/Targets/PPC.cpp | 7 ++
clang/lib/Basic/Targets/PPC.h | 1 +
clang/lib/Sema/SemaDeclAttr.cpp | 3 +-
clang/lib/Sema/SemaPPC.cpp | 20 +++-
clang/test/Sema/attr-target-clones-ppc.c | 130 +++++++++++++++++++++++
6 files changed, 157 insertions(+), 7 deletions(-)
create mode 100644 clang/test/Sema/attr-target-clones-ppc.c
diff --git a/clang/include/clang/Sema/SemaPPC.h b/clang/include/clang/Sema/SemaPPC.h
index 8dcd638d3f722..9dad80acc1747 100644
--- a/clang/include/clang/Sema/SemaPPC.h
+++ b/clang/include/clang/Sema/SemaPPC.h
@@ -56,7 +56,8 @@ class SemaPPC : public SemaBase {
bool checkTargetClonesAttr(const SmallVectorImpl<StringRef> &Params,
const SmallVectorImpl<SourceLocation> &Locs,
- SmallVectorImpl<SmallString<64>> &NewParams);
+ SmallVectorImpl<SmallString<64>> &NewParams,
+ SourceLocation AttrLoc);
};
} // namespace clang
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index fbff0af711b13..0f498c382f5d0 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -678,6 +678,13 @@ void PPCTargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features,
}
}
+bool PPCTargetInfo::isValidFeatureName(StringRef Name) const {
+ if (Name.empty())
+ return false;
+ // TODO: filter out unknown features
+ return true;
+}
+
ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const {
ParsedTargetAttr Ret;
if (Features == "default")
diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index 6f90ff1f5d57c..9dc501d33c95f 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -199,6 +199,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
bool supportsTargetAttributeTune() const override { return true; }
+ bool isValidFeatureName(StringRef Name) const override;
ParsedTargetAttr parseTargetAttr(StringRef Str) const override;
llvm::APInt getFMVPriority(ArrayRef<StringRef> Features) const override;
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 83bd1206e2009..edf6182e3ddd7 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -3644,7 +3644,8 @@ static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
AL.getLoc()))
return;
} else if (S.Context.getTargetInfo().getTriple().isOSAIX()) {
- if (S.PPC().checkTargetClonesAttr(Params, Locations, NewParams))
+ if (S.PPC().checkTargetClonesAttr(Params, Locations, NewParams,
+ AL.getLoc()))
return;
}
Params.clear();
diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp
index e38fb6687372b..0de08e4dec046 100644
--- a/clang/lib/Sema/SemaPPC.cpp
+++ b/clang/lib/Sema/SemaPPC.cpp
@@ -22,6 +22,7 @@
#include "clang/Basic/TargetInfo.h"
#include "clang/Sema/Sema.h"
#include "llvm/ADT/APSInt.h"
+#include "llvm/TargetParser/PPCTargetParser.h"
namespace clang {
@@ -573,10 +574,10 @@ bool SemaPPC::BuiltinVSX(CallExpr *TheCall) {
return false;
}
-bool SemaPPC::checkTargetClonesAttr(
- const SmallVectorImpl<StringRef> &Params,
- const SmallVectorImpl<SourceLocation> &Locs,
- SmallVectorImpl<SmallString<64>> &NewParams) {
+bool SemaPPC::checkTargetClonesAttr(const SmallVectorImpl<StringRef> &Params,
+ const SmallVectorImpl<SourceLocation> &Locs,
+ SmallVectorImpl<SmallString<64>> &NewParams,
+ SourceLocation AttrLoc) {
using namespace DiagAttrParams;
assert(Params.size() == Locs.size() &&
@@ -615,6 +616,15 @@ bool SemaPPC::checkTargetClonesAttr(
TargetInfo.getFMVPriority(LHS) == 0) {
return Diag(CurLoc, diag::warn_unsupported_target_attribute)
<< Unsupported << None << LHS << TargetClones;
+ } else
+ assert(0 && "specifying target-features on target clones not supported yet");
+
+ SmallString<64> CPU;
+ if (LHS.starts_with("cpu=")) {
+ CPU.append("cpu=");
+ CPU.append(
+ llvm::PPC::normalizeCPUName(LHS.drop_front(sizeof("cpu=") - 1)));
+ LHS = CPU.str();
}
if (llvm::is_contained(NewParams, LHS)) {
Diag(CurLoc, diag::warn_target_clone_duplicate_options);
@@ -627,7 +637,7 @@ bool SemaPPC::checkTargetClonesAttr(
Diag(Locs[0], diag::warn_target_clone_mixed_values);
if (!HasDefault)
- return Diag(Locs[0], diag::err_target_clone_must_have_default);
+ return Diag(AttrLoc, diag::err_target_clone_must_have_default);
return false;
}
diff --git a/clang/test/Sema/attr-target-clones-ppc.c b/clang/test/Sema/attr-target-clones-ppc.c
new file mode 100644
index 0000000000000..2f6aadde528fb
--- /dev/null
+++ b/clang/test/Sema/attr-target-clones-ppc.c
@@ -0,0 +1,130 @@
+// RUN: %clang_cc1 -triple powerpc-ibm-aix-xcoff -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple powerpc64-ibm-aix-xcoff -fsyntax-only -verify %s
+
+// expected-error at +1 {{'target_clones' multiversioning requires a default target}}
+void __attribute__((target_clones("cpu=pwr7")))
+no_default(void);
+
+// expected-error at +2 {{'target_clones' and 'target' attributes are not compatible}}
+// expected-note at +1 {{conflicting attribute is here}}
+void __attribute__((target("cpu=pwr7"), target_clones("cpu=pwr8")))
+ignored_attr(void);
+
+// expected-error at +2 {{'target' and 'target_clones' attributes are not compatible}}
+// expected-note at +1 {{conflicting attribute is here}}
+void __attribute__((target_clones("default", "cpu=pwr8"), target("cpu=pwr7")))
+ignored_attr2(void);
+
+int __attribute__((target_clones("cpu=pwr9", "default"))) redecl4(void);
+// expected-error at +3 {{'target_clones' attribute does not match previous declaration}}
+// expected-note at -2 {{previous declaration is here}}
+int __attribute__((target_clones("cpu=pwr7", "default")))
+redecl4(void) { return 1; }
+
+int __attribute__((target_clones("cpu=pwr7", "default"))) redecl7(void);
+// expected-error at +2 {{multiversioning attributes cannot be combined}}
+// expected-note at -2 {{previous declaration is here}}
+int __attribute__((target("cpu=pwr8"))) redecl7(void) { return 1; }
+
+int __attribute__((target("cpu=pwr9"))) redef2(void) { return 1; }
+// expected-error at +2 {{multiversioning attributes cannot be combined}}
+// expected-note at -2 {{previous declaration is here}}
+int __attribute__((target_clones("cpu=pwr7", "default"))) redef2(void) { return 1; }
+
+int __attribute__((target_clones("cpu=pwr9,default"))) redef3(void) { return 1; }
+// expected-error at +2 {{redefinition of 'redef3'}}
+// expected-note at -2 {{previous definition is here}}
+int __attribute__((target_clones("cpu=pwr9,default"))) redef3(void) { return 1; }
+
+// Duplicates are allowed
+// expected-warning at +2 {{mixing 'target_clones' specifier mechanisms is permitted for GCC compatibility}}
+// expected-warning at +1 2 {{version list contains duplicate entries}}
+int __attribute__((target_clones("cpu=pwr9,cpu=power9", "cpu=power9, default")))
+dupes(void) { return 1; }
+
+// expected-warning at +1 {{unsupported '' in the 'target_clones' attribute string;}}
+void __attribute__((target_clones("")))
+empty_target_1(void);
+// expected-warning at +1 {{unsupported '' in the 'target_clones' attribute string;}}
+void __attribute__((target_clones(",default")))
+empty_target_2(void);
+// expected-warning at +1 {{unsupported '' in the 'target_clones' attribute string;}}
+void __attribute__((target_clones("default,")))
+empty_target_3(void);
+// expected-warning at +1 {{unsupported '' in the 'target_clones' attribute string;}}
+void __attribute__((target_clones("default, ,cpu=pwr7")))
+empty_target_4(void);
+
+// expected-warning at +1 {{unsupported '' in the 'target_clones' attribute string;}}
+void __attribute__((target_clones("default,cpu=pwr7", "")))
+empty_target_5(void);
+
+// expected-warning at +1 {{version list contains duplicate entries}}
+void __attribute__((target_clones("default", "default")))
+dupe_default(void);
+
+// expected-warning at +1 {{version list contains duplicate entries}}
+void __attribute__((target_clones("cpu=pwr9,cpu=power9,default")))
+dupe_normal(void);
+
+// expected-error at +2 {{attribute 'target_clones' cannot appear more than once on a declaration}}
+// expected-note at +1 {{conflicting attribute is here}}
+void __attribute__((target_clones("cpu=pwr7,default"), target_clones("cpu=pwr8,default")))
+dupe_normal2(void);
+
+int mv_after_use(void);
+int useage(void) {
+ return mv_after_use();
+}
+// expected-error at +1 {{function declaration cannot become a multiversioned function after first usage}}
+int __attribute__((target_clones("cpu=pwr9", "default"))) mv_after_use(void) { return 1; }
+
+void bad_overload1(void) __attribute__((target_clones("cpu=pwr8", "default")));
+// expected-error at +2 {{conflicting types for 'bad_overload1'}}
+// expected-note at -2 {{previous declaration is here}}
+void bad_overload1(int p) {}
+
+void bad_overload2(int p) {}
+// expected-error at +2 {{conflicting types for 'bad_overload2'}}
+// expected-note at -2 {{previous definition is here}}
+void bad_overload2(void) __attribute__((target_clones("cpu=pwr8", "default")));
+
+void bad_overload3(void) __attribute__((target_clones("cpu=pwr8", "default")));
+// expected-error at +2 {{conflicting types for 'bad_overload3'}}
+// expected-note at -2 {{previous declaration is here}}
+void bad_overload3(int) __attribute__((target_clones("cpu=pwr8", "default")));
+
+
+void good_overload1(void) __attribute__((target_clones("cpu=pwr7", "cpu=power10", "default")));
+void __attribute__((__overloadable__)) good_overload1(int p) {}
+
+// expected-error at +1 {{attribute 'target_clones' multiversioning cannot be combined with attribute 'overloadable'}}
+void __attribute__((__overloadable__)) good_overload2(void) __attribute__((target_clones("cpu=pwr7", "default")));
+void good_overload2(int p) {}
+// expected-error at +1 {{attribute 'target_clones' multiversioning cannot be combined with attribute 'overloadable'}}
+void __attribute__((__overloadable__)) good_overload3(void) __attribute__((target_clones("cpu=pwr7", "default")));
+// expected-error at +1 {{attribute 'target_clones' multiversioning cannot be combined with attribute 'overloadable'}}
+void __attribute__((__overloadable__)) good_overload3(int) __attribute__((target_clones("cpu=pwr7", "default")));
+
+void good_overload4(void) __attribute__((target_clones("cpu=pwr7", "default")));
+// expected-error at +1 {{attribute 'target_clones' multiversioning cannot be combined with attribute 'overloadable'}}
+void __attribute__((__overloadable__)) good_overload4(int) __attribute__((target_clones("cpu=pwr7", "default")));
+
+// expected-error at +1 {{attribute 'target_clones' multiversioning cannot be combined with attribute 'overloadable'}}
+void __attribute__((__overloadable__)) good_overload5(void) __attribute__((target_clones("cpu=pwr7", "default")));
+void good_overload5(int) __attribute__((target_clones("cpu=pwr7", "default")));
+
+
+void good_isa_level(int) __attribute__((target_clones("default", "cpu=pwr7", "cpu=pwr8", "cpu=pwr9", "cpu=pwr10")));
+
+// expected-warning at +1 {{unsupported CPU 'bad-cpu' in the 'target_clones' attribute string; 'target_clones' attribute ignored}}
+void bad_cpu(int) __attribute__((target_clones("default", "cpu=bad-cpu")));
+
+
+// expected-error at +1 {{'target_clones' multiversioning requires a default target}}
+void __attribute__((target_clones()))
+gh173684_empty_attribute_args(void);
+
+// expected-error at +1 {{'target_clones' multiversioning requires a default target}}
+void __attribute__((target_clones))
+gh173684_empty_attribute_args_2(void);
>From 8a2f16aca304fce3972fdea438bc7a928533bba3 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Wed, 11 Mar 2026 18:39:35 +0000
Subject: [PATCH 18/24] now that we normalize CPU on target_clones in Sema,
remove normalization in codegen
---
clang/lib/Basic/Targets/PPC.cpp | 9 ++-------
clang/lib/CodeGen/CodeGenFunction.cpp | 10 +++++-----
2 files changed, 7 insertions(+), 12 deletions(-)
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 0f498c382f5d0..788d2d25b6cd5 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -536,8 +536,7 @@ bool PPCTargetInfo::initFeatureMap(
const llvm::Triple &TheTriple = getTriple();
std::optional<llvm::StringMap<bool>> FeaturesOpt =
- llvm::PPC::getPPCDefaultTargetFeatures(TheTriple,
- llvm::PPC::normalizeCPUName(CPU));
+ llvm::PPC::getPPCDefaultTargetFeatures(TheTriple, CPU);
if (FeaturesOpt)
Features = FeaturesOpt.value();
@@ -699,7 +698,6 @@ ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const {
// accepting it weirdly.
Feature = Feature.trim();
- // While we're here iterating check for a different target cpu.
if (Feature.starts_with("cpu=")) {
if (!Ret.CPU.empty())
Ret.Duplicate = "cpu=";
@@ -715,8 +713,6 @@ ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const {
else
Ret.Features.push_back("+" + Feature.str());
}
- Ret.CPU = llvm::PPC::normalizeCPUName(Ret.CPU);
- Ret.Tune = llvm::PPC::normalizeCPUName(Ret.Tune);
return Ret;
}
@@ -726,8 +722,7 @@ llvm::APInt PPCTargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
assert(Features.size() == 1 && "one feature/cpu per clone on PowerPC");
ParsedTargetAttr ParsedAttr = parseTargetAttr(Features[0]);
if (!ParsedAttr.CPU.empty()) {
- StringRef CPU = llvm::PPC::normalizeCPUName(ParsedAttr.CPU);
- int Priority = llvm::StringSwitch<int>(CPU)
+ int Priority = llvm::StringSwitch<int>(ParsedAttr.CPU)
.Case("pwr7", 1)
.Case("pwr8", 2)
.Case("pwr9", 3)
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index de63bbdaa7b55..ab9e66182205c 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -3137,11 +3137,11 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
assert(RO.Features[0].starts_with("cpu="));
StringRef CPU = RO.Features[0].split("=").second.trim();
StringRef Feature = llvm::StringSwitch<StringRef>(CPU)
- .Cases({"power7", "pwr7"}, "arch_2_06")
- .Cases({"power8", "pwr8"}, "arch_2_07")
- .Cases({"power9", "pwr9"}, "arch_3_00")
- .Cases({"power10", "pwr10"}, "arch_3_1")
- .Cases({"power11", "pwr11"}, "arch_3_1")
+ .Case("pwr7", "arch_2_06")
+ .Case("pwr8", "arch_2_07")
+ .Case("pwr9", "arch_3_00")
+ .Case("pwr10", "arch_3_1")
+ .Case("pwr11", "arch_3_1")
.Default("error");
llvm::Value *Condition = EmitPPCBuiltinCpu(
>From 062947c7cd27e0a9620c1f7007cc2484593ebb70 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Wed, 11 Mar 2026 23:14:36 +0000
Subject: [PATCH 19/24] diagnose non-cpu strings in target_clones in Sema
---
clang/lib/CodeGen/CodeGenModule.cpp | 2 +-
clang/lib/Sema/SemaPPC.cpp | 8 +++-----
2 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index c600eeb021772..0c4a42720aeca 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -5256,7 +5256,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
// regular non-FMV function. If a definition is later seen, then
// GetOrCreateMultiVersionResolver will get called (when processing said
// definition) which will replace the IR declaration we're creating here
- // with the FMV ifunc.
+ // with the FMV ifunc (see replaceDeclarationWith).
else if (getTriple().isOSAIX() && !FD->isDefined()) {
NameWithoutMultiVersionMangling = getMangledNameImpl(
*this, GD, FD, /*OmitMultiVersionMangling=*/true);
diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp
index 0de08e4dec046..91570ebf2d8fa 100644
--- a/clang/lib/Sema/SemaPPC.cpp
+++ b/clang/lib/Sema/SemaPPC.cpp
@@ -612,13 +612,11 @@ bool SemaPPC::checkTargetClonesAttr(const SmallVectorImpl<StringRef> &Params,
<< TargetClones;
} else if (LHS == "default") {
HasDefault = true;
- } else if (!TargetInfo.isValidFeatureName(LHS) ||
- TargetInfo.getFMVPriority(LHS) == 0) {
+ } else {
+ // it's a feature string, but not supported yet.
return Diag(CurLoc, diag::warn_unsupported_target_attribute)
<< Unsupported << None << LHS << TargetClones;
- } else
- assert(0 && "specifying target-features on target clones not supported yet");
-
+ }
SmallString<64> CPU;
if (LHS.starts_with("cpu=")) {
CPU.append("cpu=");
>From 35b2e4f9ecfbfdd064ffc22d759b2013e55a1c64 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Wed, 11 Mar 2026 20:42:03 -0400
Subject: [PATCH 20/24] create PPCTargetInfo::isTargetClonesSupportedCPU to
filter out unsupported CPUs during Sema
---
clang/lib/Sema/SemaPPC.cpp | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp
index 91570ebf2d8fa..57c880c81ac9e 100644
--- a/clang/lib/Sema/SemaPPC.cpp
+++ b/clang/lib/Sema/SemaPPC.cpp
@@ -606,10 +606,13 @@ bool SemaPPC::checkTargetClonesAttr(const SmallVectorImpl<StringRef> &Params,
Loc.getLocWithOffset(LHS.data() - Param.data());
if (LHS.starts_with("cpu=")) {
- if (!TargetInfo.isValidCPUName(LHS.drop_front(sizeof("cpu=") - 1)))
+ StringRef CPUStr = LHS.drop_front(sizeof("cpu=") - 1);
+ if (!TargetInfo.isValidCPUName(CPUStr))
return Diag(CurLoc, diag::warn_unsupported_target_attribute)
- << Unsupported << CPU << LHS.drop_front(sizeof("cpu=") - 1)
- << TargetClones;
+ << Unknown << CPU << CPUStr << TargetClones;
+ else if (!TargetInfo.validateCpuIs(CPUStr))
+ return Diag(CurLoc, diag::warn_unsupported_target_attribute)
+ << Unsupported << CPU << CPUStr << TargetClones;
} else if (LHS == "default") {
HasDefault = true;
} else {
>From cc711c87a374d7226148c2a036b4fe2823e910db Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Thu, 12 Mar 2026 03:10:25 +0000
Subject: [PATCH 21/24] fix test
---
clang/test/Sema/attr-target-clones-ppc.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/clang/test/Sema/attr-target-clones-ppc.c b/clang/test/Sema/attr-target-clones-ppc.c
index 2f6aadde528fb..96acc974320b0 100644
--- a/clang/test/Sema/attr-target-clones-ppc.c
+++ b/clang/test/Sema/attr-target-clones-ppc.c
@@ -117,9 +117,11 @@ void good_overload5(int) __attribute__((target_clones("cpu=pwr7", "default")));
void good_isa_level(int) __attribute__((target_clones("default", "cpu=pwr7", "cpu=pwr8", "cpu=pwr9", "cpu=pwr10")));
-// expected-warning at +1 {{unsupported CPU 'bad-cpu' in the 'target_clones' attribute string; 'target_clones' attribute ignored}}
+// expected-warning at +1 {{unknown CPU 'bad-cpu' in the 'target_clones' attribute string; 'target_clones' attribute ignored}}
void bad_cpu(int) __attribute__((target_clones("default", "cpu=bad-cpu")));
+// expected-warning at +1 {{unsupported CPU 'pwr3' in the 'target_clones' attribute string; 'target_clones' attribute ignored}}
+void bad_cpu(int) __attribute__((target_clones("default", "cpu=pwr3")));
// expected-error at +1 {{'target_clones' multiversioning requires a default target}}
void __attribute__((target_clones()))
>From b2c678859fe25fa4f8228c061aa84cc7ee0d68ce Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Thu, 12 Mar 2026 03:25:31 +0000
Subject: [PATCH 22/24] test all supported CPUs
---
clang/test/CodeGen/attr-target-clones-ppc.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/clang/test/CodeGen/attr-target-clones-ppc.c b/clang/test/CodeGen/attr-target-clones-ppc.c
index 05a9b788f701a..5d202bf9e52a4 100644
--- a/clang/test/CodeGen/attr-target-clones-ppc.c
+++ b/clang/test/CodeGen/attr-target-clones-ppc.c
@@ -15,11 +15,20 @@ static int __attribute__((target_clones("cpu=power10, default"))) internal(void)
int use(void) { return internal(); }
// CHECK: define internal ptr @internal.resolver()
-int __attribute__((target_clones("cpu=power10, default"))) foo(void) { return 0; }
+// test all supported cpus
+int __attribute__((target_clones("cpu=power10, cpu=power11, cpu=pwr9, cpu=pwr7, cpu=power8, default"))) foo(void) { return 0; }
// CHECK: define internal {{.*}}i32 @foo.cpu_pwr10() #[[#ATTR_P10:]]
+// CHECK: define internal {{.*}}i32 @foo.cpu_pwr11() #[[#ATTR_P11:]]
+// CHECK: define internal {{.*}}i32 @foo.cpu_pwr9() #[[#ATTR_P9:]]
+// CHECK: define internal {{.*}}i32 @foo.cpu_pwr7() #[[#ATTR_P7:]]
+// CHECK: define internal {{.*}}i32 @foo.cpu_pwr8() #[[#ATTR_P8:]]
// CHECK: define internal {{.*}}i32 @foo.default() #[[#ATTR_P7:]]
// CHECK: define internal ptr @foo.resolver()
+// CHECK: ret ptr @foo.cpu_pwr11
// CHECK: ret ptr @foo.cpu_pwr10
+// CHECK: ret ptr @foo.cpu_pwr9
+// CHECK: ret ptr @foo.cpu_pwr8
+// CHECK: ret ptr @foo.cpu_pwr7
// CHECK: ret ptr @foo.default
__attribute__((target_clones("default,default ,cpu=pwr8"))) void foo_dupes(void) {}
@@ -124,8 +133,10 @@ foo_priority(int x) { return x & (x - 1); }
// CHECK: ret ptr @foo_priority.cpu_pwr7
// CHECK: ret ptr @foo_priority.default
+
// CHECK: attributes #[[#ATTR_P7]] = {{.*}} "target-cpu"="pwr7"
// CHECK: attributes #[[#ATTR_P10]] = {{.*}} "target-cpu"="pwr10"
-// CHECK: attributes #[[#ATTR_P8]] = {{.*}} "target-cpu"="pwr8"
+// CHECK: attributes #[[#ATTR_P11]] = {{.*}} "target-cpu"="pwr11"
// CHECK: attributes #[[#ATTR_P9]] = {{.*}} "target-cpu"="pwr9"
+// CHECK: attributes #[[#ATTR_P8]] = {{.*}} "target-cpu"="pwr8"
>From eb5f780a26d30fe0a5113412c5af74a728ba74b6 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Thu, 12 Mar 2026 19:20:15 +0000
Subject: [PATCH 23/24] code review
code review
code review
---
clang/lib/Basic/Targets/PPC.cpp | 7 -------
clang/lib/Basic/Targets/PPC.h | 1 -
clang/lib/CodeGen/CodeGenFunction.cpp | 12 +++---------
clang/lib/CodeGen/CodeGenModule.cpp | 10 ++++------
4 files changed, 7 insertions(+), 23 deletions(-)
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 788d2d25b6cd5..b17103e4ebd7c 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -677,13 +677,6 @@ void PPCTargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features,
}
}
-bool PPCTargetInfo::isValidFeatureName(StringRef Name) const {
- if (Name.empty())
- return false;
- // TODO: filter out unknown features
- return true;
-}
-
ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const {
ParsedTargetAttr Ret;
if (Features == "default")
diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index 9dc501d33c95f..6f90ff1f5d57c 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -199,7 +199,6 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
bool supportsTargetAttributeTune() const override { return true; }
- bool isValidFeatureName(StringRef Name) const override;
ParsedTargetAttr parseTargetAttr(StringRef Str) const override;
llvm::APInt getFMVPriority(ArrayRef<StringRef> Features) const override;
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index ab9e66182205c..1c418528494ad 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -3086,7 +3086,7 @@ void CodeGenFunction::EmitMultiVersionResolver(
[[fallthrough]];
default:
assert(false &&
- "Only implemented for x86, AArch64, RISC-V, and PowerPC targets");
+ "Only implemented for x86, AArch64, RISC-V, and PowerPC AIX");
}
}
@@ -3123,7 +3123,7 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
assert(&RO == Options.end() - 1 &&
"Default or Generic case must be last");
Builder.CreateRet(RO.Function);
- break;
+ return;
}
// if.else_n:
// %is_version_n = __builtin_cpu_supports(version_n)
@@ -3155,13 +3155,7 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
Builder.CreateRet(RO.Function);
}
- // If no generic/default, emit an unreachable.
- // Builder.SetInsertPoint(CurBlock);
- // llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap);
- // TrapCall->setDoesNotReturn();
- // TrapCall->setDoesNotThrow();
- // Builder.CreateUnreachable();
- // Builder.ClearInsertionPoint();
+ llvm_unreachable("Default case missing");
}
void CodeGenFunction::EmitRISCVMultiVersionResolver(
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 0c4a42720aeca..5da7446013f27 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2176,7 +2176,7 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
Out << CGM.getModuleNameHash();
}
- if (const auto *FD = dyn_cast<FunctionDecl>(ND)) {
+ if (const auto *FD = dyn_cast<FunctionDecl>(ND))
if (FD->isMultiVersion() && !OmitMultiVersionMangling) {
switch (FD->getMultiVersionKind()) {
case MultiVersionKind::CPUDispatch:
@@ -2214,7 +2214,6 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
llvm_unreachable("None multiversion type isn't valid here");
}
}
- }
// Make unique name for device side static file-scope variable for HIP.
if (CGM.getContext().shouldExternalize(ND) &&
@@ -5145,6 +5144,8 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) {
MangledName + ".resolver", ResolverType, GlobalDecl{},
/*ForVTable=*/false);
+ // on AIX, the FMV is ignored on a declaration, and so we don't need the
+ // ifunc, which is only generated on FMV definitions, to be weak.
auto Linkage = getTriple().isOSAIX() ? getFunctionLinkage(GD)
: getMultiversionLinkage(*this, GD);
@@ -5169,10 +5170,7 @@ void CodeGenModule::setMultiVersionResolverAttributes(llvm::Function *Resolver,
GlobalDecl GD) {
const NamedDecl *D = dyn_cast_or_null<NamedDecl>(GD.getDecl());
- auto ResolverLinkage = getTriple().isOSAIX()
- ? llvm::GlobalValue::InternalLinkage
- : getMultiversionLinkage(*this, GD);
- Resolver->setLinkage(ResolverLinkage);
+ Resolver->setLinkage(getMultiversionLinkage(*this, GD));
// Function body has to be emitted before calling setGlobalVisibility
// for Resolver to be considered as definition.
>From 5c8c0736677bc33e7de807a170e810ad295598f2 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Thu, 12 Mar 2026 20:12:18 +0000
Subject: [PATCH 24/24] update target_clones documentation
---
clang/include/clang/Basic/AttrDocs.td | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 43f827b4c60ee..608b6d14dac6c 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -3292,6 +3292,12 @@ multiversioned function would have if it had been declared without the attribute
For backward compatibility with earlier Clang releases, a function alias with an
``.ifunc`` suffix is also emitted. The ``.ifunc`` suffixed symbol is a deprecated
feature and support for it may be removed in the future.
+
+For PowerPC targets, ``target_clones`` is supported on AIX only. Only CPU
+(specified as ``cpu=CPU``) and ``default`` options are allowed. IFUNC is supported
+on AIX in Clang, so dispatch is implemented similar to other targets using IFUNC.
+An FMV function that is only declared in a translation unit is treated as a
+non-FMV. The resolver and the function clones are given internal linkage.
}];
}
More information about the cfe-commits
mailing list