[clang] [llvm] [FMV][AIX] Implement target_clones (cpu-only) (PR #177428)
Wael Yehia via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 10 08:09:28 PDT 2026
https://github.com/w2yehia updated https://github.com/llvm/llvm-project/pull/177428
>From 2dc3d49b5c6c216c9115dc7e931d9bf1a74f17fe Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 23 Sep 2025 14:22:57 -0400
Subject: [PATCH 01/15] refactor EmitPPCBuiltinCpu
---
clang/lib/CodeGen/CodeGenFunction.h | 1 +
clang/lib/CodeGen/TargetBuiltins/PPC.cpp | 62 +++++++++++++-----------
2 files changed, 36 insertions(+), 27 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 226950ab599e3..aecc4ec40aa97 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4889,6 +4889,7 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Value *BuildVector(ArrayRef<llvm::Value *> Ops);
llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+ llvm::Value *EmitPPCBuiltinCpu(unsigned BuiltinID, llvm::Type *ReturnType, StringRef CPUStr);
llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
diff --git a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
index 6568959351a5d..8360a17c470dd 100644
--- a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
@@ -70,31 +70,18 @@ static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF,
return CI;
}
-Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
- const CallExpr *E) {
- // Do not emit the builtin arguments in the arguments of a function call,
- // because the evaluation order of function arguments is not specified in C++.
- // This is important when testing to ensure the arguments are emitted in the
- // same order every time. Eg:
- // Instead of:
- // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
- // EmitScalarExpr(E->getArg(1)), "swdiv");
- // Use:
- // Value *Op0 = EmitScalarExpr(E->getArg(0));
- // Value *Op1 = EmitScalarExpr(E->getArg(1));
- // return Builder.CreateFDiv(Op0, Op1, "swdiv")
-
- Intrinsic::ID ID = Intrinsic::not_intrinsic;
+Value *CodeGenFunction::EmitPPCBuiltinCpu(
+ unsigned BuiltinID, llvm::Type *ReturnType, StringRef CPUStr) {
#include "llvm/TargetParser/PPCTargetParser.def"
auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
unsigned Mask, CmpInst::Predicate CompOp,
unsigned OpValue) -> Value * {
if (SupportMethod == BUILTIN_PPC_FALSE)
- return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
+ return llvm::ConstantInt::getFalse(ReturnType);
if (SupportMethod == BUILTIN_PPC_TRUE)
- return llvm::ConstantInt::getTrue(ConvertType(E->getType()));
+ return llvm::ConstantInt::getTrue(ReturnType);
assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod.");
@@ -137,12 +124,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue));
};
- switch (BuiltinID) {
- default: return nullptr;
-
- case Builtin::BI__builtin_cpu_is: {
- const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
- StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
+ if (BuiltinID == Builtin::BI__builtin_cpu_is) {
llvm::Triple Triple = getTarget().getTriple();
typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUInfo;
@@ -170,7 +152,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
"Invalid CPU name. Missed by SemaChecking?");
if (LinuxSupportMethod == BUILTIN_PPC_FALSE)
- return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
+ return llvm::ConstantInt::getFalse(ReturnType);
Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID);
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
@@ -178,10 +160,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
return Builder.CreateICmpEQ(TheCall,
llvm::ConstantInt::get(Int32Ty, LinuxIDValue));
}
- case Builtin::BI__builtin_cpu_supports: {
+ else if (BuiltinID == Builtin::BI__builtin_cpu_supports) {
llvm::Triple Triple = getTarget().getTriple();
- const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
- StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
if (Triple.isOSAIX()) {
typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
unsigned>
@@ -218,7 +198,35 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
#undef PPC_FAWORD_HWCAP2
#undef PPC_FAWORD_CPUID
}
+ else
+ assert(0 && "unexpected builtin");
+}
+Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
+ const CallExpr *E) {
+ // Do not emit the builtin arguments in the arguments of a function call,
+ // because the evaluation order of function arguments is not specified in C++.
+ // This is important when testing to ensure the arguments are emitted in the
+ // same order every time. Eg:
+ // Instead of:
+ // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
+ // EmitScalarExpr(E->getArg(1)), "swdiv");
+ // Use:
+ // Value *Op0 = EmitScalarExpr(E->getArg(0));
+ // Value *Op1 = EmitScalarExpr(E->getArg(1));
+ // return Builder.CreateFDiv(Op0, Op1, "swdiv")
+
+ Intrinsic::ID ID = Intrinsic::not_intrinsic;
+
+ switch (BuiltinID) {
+ default: return nullptr;
+
+ case Builtin::BI__builtin_cpu_is:
+ case Builtin::BI__builtin_cpu_supports: {
+ const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
+ StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
+ return EmitPPCBuiltinCpu(BuiltinID, ConvertType(E->getType()), CPUStr);
+ }
// __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
// call __builtin_readcyclecounter.
case PPC::BI__builtin_ppc_get_timebase:
>From 6e4997e5b00442e3d73d81c047a34875bfdef294 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Wed, 16 Jul 2025 20:02:11 +0000
Subject: [PATCH 02/15] clang codegen for target_clones
---
clang/include/clang/Basic/TargetInfo.h | 2 +-
clang/include/clang/Sema/SemaPPC.h | 4 ++
clang/lib/AST/ASTContext.cpp | 2 +
clang/lib/Basic/Targets/PPC.cpp | 36 +++++++++++++
clang/lib/Basic/Targets/PPC.h | 4 ++
clang/lib/CodeGen/CodeGenFunction.cpp | 70 +++++++++++++++++++++++++-
clang/lib/CodeGen/CodeGenFunction.h | 3 ++
clang/lib/CodeGen/CodeGenModule.cpp | 10 ++--
clang/lib/CodeGen/Targets/PPC.cpp | 47 +++++++++++++++++
clang/lib/Sema/SemaDeclAttr.cpp | 4 ++
clang/lib/Sema/SemaPPC.cpp | 56 +++++++++++++++++++++
11 files changed, 231 insertions(+), 7 deletions(-)
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index ec6cd2be7c3c5..68160e9bd9b29 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1567,7 +1567,7 @@ class TargetInfo : public TransferrableTargetInfo,
/// which requires support for cpu_supports and cpu_is functionality.
bool supportsMultiVersioning() const {
return getTriple().isX86() || getTriple().isAArch64() ||
- getTriple().isRISCV();
+ getTriple().isRISCV() || getTriple().isOSBinFormatXCOFF();
}
/// Identify whether this target supports IFuncs.
diff --git a/clang/include/clang/Sema/SemaPPC.h b/clang/include/clang/Sema/SemaPPC.h
index f8edecc4fcb7b..0cf6ba7ff29dd 100644
--- a/clang/include/clang/Sema/SemaPPC.h
+++ b/clang/include/clang/Sema/SemaPPC.h
@@ -53,6 +53,10 @@ class SemaPPC : public SemaBase {
// vector double vec_xxpermdi(vector double, vector double, int);
// vector short vec_xxsldwi(vector short, vector short, int);
bool BuiltinVSX(CallExpr *TheCall);
+
+ bool checkTargetClonesAttr(SmallVectorImpl<StringRef> &Params,
+ SmallVectorImpl<SourceLocation> &Locs,
+ SmallVectorImpl<SmallString<64>> &NewParams);
};
} // namespace clang
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 3f63420cae91e..6f5784d7d4a64 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -15033,6 +15033,8 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex());
if (VersionStr.starts_with("arch="))
TargetCPU = VersionStr.drop_front(sizeof("arch=") - 1);
+ else if (Target->getTriple().isOSAIX() && VersionStr.starts_with("cpu=")) // TODO make a function that extracts CPU from a feature string
+ TargetCPU = VersionStr.drop_front(sizeof("cpu=") - 1);
else if (VersionStr != "default")
Features.push_back((StringRef{"+"} + VersionStr).str());
Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features);
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index a37a68ad91724..7f8005db3cd3e 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -678,6 +678,42 @@ void PPCTargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features,
}
}
+ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const {
+ ParsedTargetAttr Ret;
+ if (Features == "default")
+ return Ret;
+ SmallVector<StringRef, 1> AttrFeatures;
+ Features.split(AttrFeatures, ",");
+
+ // Grab the various features and prepend a "+" to turn on the feature to
+ // the backend and add them to our existing set of features.
+ for (auto &Feature : AttrFeatures) {
+ // Go ahead and trim whitespace rather than either erroring or
+ // accepting it weirdly.
+ Feature = Feature.trim();
+
+ // While we're here iterating check for a different target cpu.
+ if (Feature.starts_with("cpu=")) {
+ assert(Ret.CPU.empty());
+ Ret.CPU = Feature.split("=").second.trim();
+ } else assert(0);
+// else if (Feature.starts_with("tune=")) {
+// if (!Ret.Tune.empty())
+// Ret.Duplicate = "tune=";
+// else
+// Ret.Tune = Feature.split("=").second.trim();
+// } else if (Feature.starts_with("no-"))
+// Ret.Features.push_back("-" + Feature.split("-").second.str());
+// else
+// Ret.Features.push_back("+" + Feature.str());
+ }
+ return Ret;
+}
+
+llvm::APInt PPCTargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
+ return llvm::APInt(32, Features.empty() ? 0 : 1);
+}
+
// Make sure that registers are added in the correct array index which should be
// the DWARF number for PPC registers.
const char *const PPCTargetInfo::GCCRegNames[] = {
diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index 664c9e15d8d18..6f90ff1f5d57c 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -199,6 +199,10 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
bool supportsTargetAttributeTune() const override { return true; }
+ ParsedTargetAttr parseTargetAttr(StringRef Str) const override;
+
+ llvm::APInt getFMVPriority(ArrayRef<StringRef> Features) const override;
+
ArrayRef<const char *> getGCCRegNames() const override;
ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override;
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 61128316963ac..e4ef527a536d3 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -45,6 +45,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsPowerPC.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/Support/CRC.h"
#include "llvm/Support/xxhash.h"
@@ -3027,12 +3028,77 @@ void CodeGenFunction::EmitMultiVersionResolver(
case llvm::Triple::riscv64:
EmitRISCVMultiVersionResolver(Resolver, Options);
return;
-
+ case llvm::Triple::ppc:
+ case llvm::Triple::ppc64:
+ if (getContext().getTargetInfo().getTriple().isOSAIX()) {
+ EmitPPCAIXMultiVersionResolver(Resolver, Options);
+ return;
+ }
+ [[fallthrough]];
default:
- assert(false && "Only implemented for x86, AArch64 and RISC-V targets");
+ assert(false && "Only implemented for x86, AArch64, RISC-V, and PowerPC targets");
+ }
+}
+
+/*
+ * Desc_t *foo_desc = ppc_get_function_descriptor(&foo);
+ * if (foo_desc->addr == ppc_get_function_entry(&foo)) {
+ * FuncPtr fp = resolver();
+ * __c11_atomic_store((_Atomic FuncPtr *)&foo_desc->addr, fp, 0);
+ * }
+ * return ((int (*)(int)) foo_desc)(a);
+ */
+void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
+ llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) {
+
+ llvm::PointerType *PtrTy = Builder.getPtrTy();
+ // entry:
+ llvm::BasicBlock *CurBlock = createBasicBlock("entry", Resolver);
+
+ SmallVector<std::pair<llvm::Value *, llvm::BasicBlock *>, 3> PhiArgs;
+ for (const FMVResolverOption &RO : Options) {
+ Builder.SetInsertPoint(CurBlock);
+ // The 'default' or 'generic' case.
+ if (!RO.Architecture && RO.Features.empty()) {
+ // if.default:
+ // %fmv.default = call ptr @getEntryPoint(ptr noundef @foo_default)
+ // br label %resolver_exit
+ assert(&RO == Options.end() - 1 && "Default or Generic case must be last");
+ Builder.CreateRet(RO.Function);
+ break;
+ }
+ // if.else_n:
+ // %is_version_n = __builtin_cpu_supports(version_n)
+ // br i1 %is_version_n, label %if.version_n, label %if.default
+ //
+ // if.version_n:
+ // %fmv.version.n = call ptr @getEntryPoint(ptr noundef @foo_version_n)
+ // br label %resolver_exit
+ assert(RO.Features.size() == 1 && "for now one feature requirement per version");
+ llvm::Value *Condition;
+ if (RO.Features[0].starts_with("cpu=")) {
+ Condition = EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_is, Builder.getInt1Ty(), RO.Features[0].split("=").second.trim());
+ } else {
+ Condition = EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_supports, Builder.getInt1Ty(), RO.Features[0]);
+ }
+ llvm::BasicBlock *ThenBlock = createBasicBlock("if.version", Resolver);
+ CurBlock = createBasicBlock("if.else", Resolver);
+ Builder.CreateCondBr(Condition, ThenBlock, CurBlock);
+
+ Builder.SetInsertPoint(ThenBlock);
+ Builder.CreateRet(RO.Function);
}
+
+ // If no generic/default, emit an unreachable.
+// Builder.SetInsertPoint(CurBlock);
+// llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap);
+// TrapCall->setDoesNotReturn();
+// TrapCall->setDoesNotThrow();
+// Builder.CreateUnreachable();
+// Builder.ClearInsertionPoint();
}
+
void CodeGenFunction::EmitRISCVMultiVersionResolver(
llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index aecc4ec40aa97..7fb1b634f857f 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -5557,6 +5557,9 @@ class CodeGenFunction : public CodeGenTypeCache {
void EmitRISCVMultiVersionResolver(llvm::Function *Resolver,
ArrayRef<FMVResolverOption> Options);
+ void EmitPPCAIXMultiVersionResolver(llvm::Function *Resolver,
+ ArrayRef<FMVResolverOption> Options);
+
private:
QualType getVarArgType(const Expr *Arg);
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index d50c9605a30b3..576b4c6ba0f3e 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -3008,9 +3008,10 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
// While we populated the feature map above, we still need to
// get and parse the target attribute so we can get the cpu for
// the function.
- if (TD) {
- ParsedTargetAttr ParsedAttr =
- Target.parseTargetAttr(TD->getFeaturesStr());
+ StringRef FeatureStr = TD ? TD->getFeaturesStr() :
+ (TC ? TC->getFeatureStr(GD.getMultiVersionIndex()) : StringRef());
+ if (!FeatureStr.empty()) {
+ ParsedTargetAttr ParsedAttr = Target.parseTargetAttr(FeatureStr);
if (!ParsedAttr.CPU.empty() &&
getTarget().isValidCPUName(ParsedAttr.CPU)) {
TargetCPU = ParsedAttr.CPU;
@@ -4795,7 +4796,8 @@ void CodeGenModule::emitMultiVersionFunctions() {
if (auto *IFunc = dyn_cast<llvm::GlobalIFunc>(ResolverConstant)) {
ResolverConstant = IFunc->getResolver();
if (FD->isTargetClonesMultiVersion() &&
- !getTarget().getTriple().isAArch64()) {
+ !getTarget().getTriple().isAArch64() &&
+ !getTarget().getTriple().isOSAIX()) {
std::string MangledName = getMangledNameImpl(
*this, GD, FD, /*OmitMultiVersionMangling=*/true);
if (!GetGlobalValue(MangledName + ".ifunc")) {
diff --git a/clang/lib/CodeGen/Targets/PPC.cpp b/clang/lib/CodeGen/Targets/PPC.cpp
index 35e7655646ade..bc357e0908a5e 100644
--- a/clang/lib/CodeGen/Targets/PPC.cpp
+++ b/clang/lib/CodeGen/Targets/PPC.cpp
@@ -128,8 +128,55 @@ class AIXABIInfo : public ABIInfo {
RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
AggValueSlot Slot) const override;
+
+ using ABIInfo::appendAttributeMangling;
+ void appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index,
+ raw_ostream &Out) const override;
+ void appendAttributeMangling(StringRef AttrStr,
+ raw_ostream &Out) const override;
};
+void AIXABIInfo::appendAttributeMangling(TargetClonesAttr *Attr,
+ unsigned Index,
+ raw_ostream &Out) const {
+ appendAttributeMangling(Attr->getFeatureStr(Index), Out);
+}
+
+void AIXABIInfo::appendAttributeMangling(StringRef AttrStr,
+ raw_ostream &Out) const {
+ if (AttrStr == "default") {
+ Out << ".default";
+ return;
+ }
+
+ Out << '.';
+ const TargetInfo &TI = CGT.getTarget();
+ ParsedTargetAttr Info = TI.parseTargetAttr(AttrStr);
+
+ llvm::sort(Info.Features, [&TI](StringRef LHS, StringRef RHS) {
+ // Multiversioning doesn't allow "no-${feature}", so we can
+ // only have "+" prefixes here.
+ assert(LHS.starts_with("+") && RHS.starts_with("+") &&
+ "Features should always have a prefix.");
+ return TI.getFMVPriority({LHS.substr(1)})
+ .ugt(TI.getFMVPriority({RHS.substr(1)}));
+ });
+
+ bool IsFirst = true;
+ if (!Info.CPU.empty()) {
+ IsFirst = false;
+ Out << "cpu_" << Info.CPU;
+ }
+
+ assert(Info.Features.empty() && "unhandled case");
+ for (StringRef Feat : Info.Features) {
+ if (!IsFirst)
+ Out << '_';
+ IsFirst = false;
+ Out << Feat.substr(1);
+ }
+}
+
class AIXTargetCodeGenInfo : public TargetCodeGenInfo {
const bool Is64Bit;
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index bee42cce09aca..9c4dea25b53ab 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -54,6 +54,7 @@
#include "clang/Sema/SemaObjC.h"
#include "clang/Sema/SemaOpenCL.h"
#include "clang/Sema/SemaOpenMP.h"
+#include "clang/Sema/SemaPPC.h"
#include "clang/Sema/SemaRISCV.h"
#include "clang/Sema/SemaSYCL.h"
#include "clang/Sema/SemaSwift.h"
@@ -3623,6 +3624,9 @@ static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
if (S.X86().checkTargetClonesAttr(Params, Locations, NewParams,
AL.getLoc()))
return;
+ } else if (S.Context.getTargetInfo().getTriple().isOSAIX()) {
+ if (S.PPC().checkTargetClonesAttr(Params, Locations, NewParams))
+ return;
}
Params.clear();
for (auto &SmallStr : NewParams)
diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp
index 149c564bd5b84..2fd6a3e911fd1 100644
--- a/clang/lib/Sema/SemaPPC.cpp
+++ b/clang/lib/Sema/SemaPPC.cpp
@@ -562,4 +562,60 @@ bool SemaPPC::BuiltinVSX(CallExpr *TheCall) {
return false;
}
+bool SemaPPC::checkTargetClonesAttr(
+ SmallVectorImpl<StringRef> &Params, SmallVectorImpl<SourceLocation> &Locs,
+ SmallVectorImpl<SmallString<64>> &NewParams) {
+ using namespace DiagAttrParams;
+
+ assert(Params.size() == Locs.size() &&
+ "Mismatch between number of string parameters and locations");
+
+ bool HasDefault = false;
+ bool HasComma = false;
+ for (unsigned I = 0, E = Params.size(); I < E; ++I) {
+ const StringRef Param = Params[I].trim();
+ const SourceLocation &Loc = Locs[I];
+
+ if (Param.empty() || Param.ends_with(','))
+ return Diag(Loc, diag::warn_unsupported_target_attribute)
+ << Unsupported << None << "" << TargetClones;
+
+ if (Param.contains(','))
+ HasComma = true;
+
+ StringRef LHS;
+ StringRef RHS = Param;
+ do {
+ std::tie(LHS, RHS) = RHS.split(',');
+ LHS = LHS.trim();
+ const SourceLocation &CurLoc =
+ Loc.getLocWithOffset(LHS.data() - Param.data());
+
+ if (LHS.starts_with("cpu=")) {
+ if (!getASTContext().getTargetInfo().isValidCPUName(
+ LHS.drop_front(sizeof("cpu=") - 1)))
+ return Diag(CurLoc, diag::warn_unsupported_target_attribute)
+ << Unsupported << CPU << LHS.drop_front(sizeof("cpu=") - 1)
+ << TargetClones;
+ } else if (LHS == "default")
+ HasDefault = true;
+ else if (!getASTContext().getTargetInfo().isValidFeatureName(LHS) ||
+ getASTContext().getTargetInfo().getFMVPriority(LHS) == 0)
+ return Diag(CurLoc, diag::warn_unsupported_target_attribute)
+ << Unsupported << None << LHS << TargetClones;
+
+ if (llvm::is_contained(NewParams, LHS))
+ Diag(CurLoc, diag::warn_target_clone_duplicate_options);
+ // Note: Add even if there are duplicates, since it changes name mangling.
+ NewParams.push_back(LHS);
+ } while (!RHS.empty());
+ }
+ if (HasComma && Params.size() > 1)
+ Diag(Locs[0], diag::warn_target_clone_mixed_values);
+
+ if (!HasDefault)
+ return Diag(Locs[0], diag::err_target_clone_must_have_default);
+
+ return false;
+}
} // namespace clang
>From bdd750ef734e025f53313f19b579b223db719888 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Sat, 27 Sep 2025 23:26:33 -0400
Subject: [PATCH 03/15] ignore target_clones on a declaration and internalize
the resolver and the clones
---
clang/lib/CodeGen/CodeGenModule.cpp | 40 ++++++++++++++++++++++++-----
1 file changed, 34 insertions(+), 6 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 576b4c6ba0f3e..00c82a7b70ef9 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2092,6 +2092,19 @@ static bool isUniqueInternalLinkageDecl(GlobalDecl GD,
(CGM.getFunctionLinkage(GD) == llvm::GlobalValue::InternalLinkage);
}
+// On certain platforms, a declared (but not defined) FMV shall be treated
+// like a regular non-FMV function.
+static bool IgnoreFMVOnADeclaration(const llvm::Triple &Triple, const FunctionDecl *FD) {
+ if (!FD->isMultiVersion())
+ return false;
+
+ if (Triple.isOSAIX()) {
+ assert(FD->isTargetClonesMultiVersion());
+ return !FD->isDefined();
+ }
+ return false;
+}
+
static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
const NamedDecl *ND,
bool OmitMultiVersionMangling = false) {
@@ -2141,8 +2154,9 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
Out << CGM.getModuleNameHash();
}
- if (const auto *FD = dyn_cast<FunctionDecl>(ND))
- if (FD->isMultiVersion() && !OmitMultiVersionMangling) {
+ if (const auto *FD = dyn_cast<FunctionDecl>(ND)) {
+ if (FD->isMultiVersion() && !OmitMultiVersionMangling &&
+ !IgnoreFMVOnADeclaration(CGM.getTriple(), FD)) {
switch (FD->getMultiVersionKind()) {
case MultiVersionKind::CPUDispatch:
case MultiVersionKind::CPUSpecific:
@@ -2179,6 +2193,7 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
llvm_unreachable("None multiversion type isn't valid here");
}
}
+ }
// Make unique name for device side static file-scope variable for HIP.
if (CGM.getContext().shouldExternalize(ND) &&
@@ -4710,7 +4725,8 @@ getFMVPriority(const TargetInfo &TI,
static llvm::GlobalValue::LinkageTypes
getMultiversionLinkage(CodeGenModule &CGM, GlobalDecl GD) {
const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
- if (FD->getFormalLinkage() == Linkage::Internal)
+ if (FD->getFormalLinkage() == Linkage::Internal ||
+ CGM.getTriple().isOSAIX())
return llvm::GlobalValue::InternalLinkage;
return llvm::GlobalValue::WeakODRLinkage;
}
@@ -4744,7 +4760,7 @@ void CodeGenModule::emitMultiVersionFunctions() {
// For AArch64, a resolver is only emitted if a function marked with
// target_version("default")) or target_clones("default") is defined
// in this TU. For other architectures it is always emitted.
- bool ShouldEmitResolver = !getTarget().getTriple().isAArch64();
+ bool ShouldEmitResolver = !getTriple().isAArch64();
SmallVector<CodeGenFunction::FMVResolverOption, 10> Options;
llvm::DenseMap<llvm::Function *, const FunctionDecl *> DeclMap;
@@ -5063,8 +5079,11 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) {
llvm::Constant *Resolver = GetOrCreateLLVMFunction(
MangledName + ".resolver", ResolverType, GlobalDecl{},
/*ForVTable=*/false);
+
+ auto Linkage = getTriple().isOSAIX() ? getFunctionLinkage(GD) : getMultiversionLinkage(*this, GD);
+
llvm::GlobalIFunc *GIF =
- llvm::GlobalIFunc::create(DeclTy, AS, getMultiversionLinkage(*this, GD),
+ llvm::GlobalIFunc::create(DeclTy, AS, Linkage,
"", Resolver, &getModule());
GIF->setName(ResolverName);
SetCommonAttributes(FD, GIF);
@@ -5084,7 +5103,9 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) {
void CodeGenModule::setMultiVersionResolverAttributes(llvm::Function *Resolver,
GlobalDecl GD) {
const NamedDecl *D = dyn_cast_or_null<NamedDecl>(GD.getDecl());
- Resolver->setLinkage(getMultiversionLinkage(*this, GD));
+
+ auto ResolverLinkage = getTriple().isOSAIX() ? llvm::GlobalValue::InternalLinkage : getMultiversionLinkage(*this, GD);
+ Resolver->setLinkage(ResolverLinkage);
// Function body has to be emitted before calling setGlobalVisibility
// for Resolver to be considered as definition.
@@ -5163,6 +5184,10 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
AddDeferredMultiVersionResolverToEmit(GD);
NameWithoutMultiVersionMangling = getMangledNameImpl(
*this, GD, FD, /*OmitMultiVersionMangling=*/true);
+ } else if (IgnoreFMVOnADeclaration(getTriple(), FD)) {
+ // TODO this might not be necessary after fix in getMangledNameImpl
+ NameWithoutMultiVersionMangling = getMangledNameImpl(
+ *this, GD, FD, /*OmitMultiVersionMangling=*/true);
} else
return GetOrCreateMultiVersionResolver(GD);
}
@@ -6623,6 +6648,9 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
auto *Fn = cast<llvm::Function>(GV);
setFunctionLinkage(GD, Fn);
+ if (getTriple().isOSAIX() && D->isTargetClonesMultiVersion())
+ Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
+
// FIXME: this is redundant with part of setFunctionDefinitionAttributes
setGVProperties(Fn, GD);
>From 104732c74757e123f682f872939d8a49ca3dd89f Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 30 Sep 2025 15:27:07 -0400
Subject: [PATCH 04/15] fix PPCTargetInfo::parseTargetAttr
---
clang/lib/Basic/Targets/PPC.cpp | 26 ++++++++++++++------------
1 file changed, 14 insertions(+), 12 deletions(-)
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 7f8005db3cd3e..dd51b46727a6a 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -694,18 +694,20 @@ ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const {
// While we're here iterating check for a different target cpu.
if (Feature.starts_with("cpu=")) {
- assert(Ret.CPU.empty());
- Ret.CPU = Feature.split("=").second.trim();
- } else assert(0);
-// else if (Feature.starts_with("tune=")) {
-// if (!Ret.Tune.empty())
-// Ret.Duplicate = "tune=";
-// else
-// Ret.Tune = Feature.split("=").second.trim();
-// } else if (Feature.starts_with("no-"))
-// Ret.Features.push_back("-" + Feature.split("-").second.str());
-// else
-// Ret.Features.push_back("+" + Feature.str());
+ if (!Ret.CPU.empty())
+ Ret.Duplicate = "cpu=";
+ else
+ Ret.CPU = Feature.split("=").second.trim();
+ }
+ else if (Feature.starts_with("tune=")) {
+ if (!Ret.Tune.empty())
+ Ret.Duplicate = "tune=";
+ else
+ Ret.Tune = Feature.split("=").second.trim();
+ } else if (Feature.starts_with("no-"))
+ Ret.Features.push_back("-" + Feature.split("-").second.str());
+ else
+ Ret.Features.push_back("+" + Feature.str());
}
return Ret;
}
>From 72ced461a47aa2ace5de58e27eb4cd2aa0e58cd5 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 30 Sep 2025 19:38:35 +0000
Subject: [PATCH 05/15] fix Sema/attr-target.c
---
clang/test/Sema/attr-target.c | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/clang/test/Sema/attr-target.c b/clang/test/Sema/attr-target.c
index 65ece3c27d299..ddf6654632187 100644
--- a/clang/test/Sema/attr-target.c
+++ b/clang/test/Sema/attr-target.c
@@ -75,15 +75,13 @@ int __attribute__((target("tune=pwr8"))) baz(void) { return 4; }
//expected-warning at +1 {{unsupported 'fpmath=' in the 'target' attribute string; 'target' attribute ignored}}
int __attribute__((target("fpmath=387"))) walrus(void) { return 4; }
//expected-warning at +1 {{unknown CPU 'hiss' in the 'target' attribute string; 'target' attribute ignored}}
-int __attribute__((target("float128,arch=hiss"))) meow(void) { return 4; }
+int __attribute__((target("float128,cpu=hiss"))) meow(void) { return 4; }
// no warning, same as saying 'nothing'.
-int __attribute__((target("arch="))) turtle(void) { return 4; }
+int __attribute__((target("cpu="))) turtle(void) { return 4; }
//expected-warning at +1 {{unknown CPU 'hiss' in the 'target' attribute string; 'target' attribute ignored}}
-int __attribute__((target("arch=hiss,arch=woof"))) pine_tree(void) { return 4; }
-//expected-warning at +1 {{duplicate 'arch=' in the 'target' attribute string; 'target' attribute ignored}}
-int __attribute__((target("arch=pwr9,arch=pwr10"))) oak_tree(void) { return 4; }
-//expected-warning at +1 {{unsupported 'branch-protection' in the 'target' attribute string; 'target' attribute ignored}}
-int __attribute__((target("branch-protection=none"))) birch_tree(void) { return 5; }
+int __attribute__((target("cpu=hiss,cpu=woof"))) pine_tree(void) { return 4; }
+//expected-warning at +1 {{duplicate 'cpu=' in the 'target' attribute string; 'target' attribute ignored}}
+int __attribute__((target("cpu=pwr9,cpu=pwr10"))) oak_tree(void) { return 4; }
//expected-warning at +1 {{unknown tune CPU 'hiss' in the 'target' attribute string; 'target' attribute ignored}}
int __attribute__((target("tune=hiss,tune=woof"))) apple_tree(void) { return 4; }
>From 7d29d4358b1514367a464d5e20dce832e6f3d9a6 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 7 Oct 2025 23:11:21 +0000
Subject: [PATCH 06/15] clang-format
---
clang/lib/AST/ASTContext.cpp | 5 +++-
clang/lib/Basic/Targets/PPC.cpp | 3 +--
clang/lib/CodeGen/CodeGenFunction.cpp | 31 ++++++++++++++----------
clang/lib/CodeGen/CodeGenFunction.h | 5 ++--
clang/lib/CodeGen/CodeGenModule.cpp | 29 ++++++++++++----------
clang/lib/CodeGen/TargetBuiltins/PPC.cpp | 14 +++++------
clang/lib/CodeGen/Targets/PPC.cpp | 5 ++--
7 files changed, 51 insertions(+), 41 deletions(-)
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 6f5784d7d4a64..2152b685411fa 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -15033,7 +15033,10 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex());
if (VersionStr.starts_with("arch="))
TargetCPU = VersionStr.drop_front(sizeof("arch=") - 1);
- else if (Target->getTriple().isOSAIX() && VersionStr.starts_with("cpu=")) // TODO make a function that extracts CPU from a feature string
+ else if (Target->getTriple().isOSAIX() &&
+ VersionStr.starts_with(
+ "cpu=")) // TODO make a function that extracts CPU from a
+ // feature string
TargetCPU = VersionStr.drop_front(sizeof("cpu=") - 1);
else if (VersionStr != "default")
Features.push_back((StringRef{"+"} + VersionStr).str());
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index dd51b46727a6a..5b86f84264905 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -698,8 +698,7 @@ ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const {
Ret.Duplicate = "cpu=";
else
Ret.CPU = Feature.split("=").second.trim();
- }
- else if (Feature.starts_with("tune=")) {
+ } else if (Feature.starts_with("tune=")) {
if (!Ret.Tune.empty())
Ret.Duplicate = "tune=";
else
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index e4ef527a536d3..882eea2f52361 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -3036,7 +3036,8 @@ void CodeGenFunction::EmitMultiVersionResolver(
}
[[fallthrough]];
default:
- assert(false && "Only implemented for x86, AArch64, RISC-V, and PowerPC targets");
+ assert(false &&
+ "Only implemented for x86, AArch64, RISC-V, and PowerPC targets");
}
}
@@ -3049,7 +3050,7 @@ void CodeGenFunction::EmitMultiVersionResolver(
* return ((int (*)(int)) foo_desc)(a);
*/
void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
- llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) {
+ llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) {
llvm::PointerType *PtrTy = Builder.getPtrTy();
// entry:
@@ -3063,7 +3064,8 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
// if.default:
// %fmv.default = call ptr @getEntryPoint(ptr noundef @foo_default)
// br label %resolver_exit
- assert(&RO == Options.end() - 1 && "Default or Generic case must be last");
+ assert(&RO == Options.end() - 1 &&
+ "Default or Generic case must be last");
Builder.CreateRet(RO.Function);
break;
}
@@ -3074,12 +3076,16 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
// if.version_n:
// %fmv.version.n = call ptr @getEntryPoint(ptr noundef @foo_version_n)
// br label %resolver_exit
- assert(RO.Features.size() == 1 && "for now one feature requirement per version");
+ assert(RO.Features.size() == 1 &&
+ "for now one feature requirement per version");
llvm::Value *Condition;
if (RO.Features[0].starts_with("cpu=")) {
- Condition = EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_is, Builder.getInt1Ty(), RO.Features[0].split("=").second.trim());
+ Condition =
+ EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_is, Builder.getInt1Ty(),
+ RO.Features[0].split("=").second.trim());
} else {
- Condition = EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_supports, Builder.getInt1Ty(), RO.Features[0]);
+ Condition = EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_supports,
+ Builder.getInt1Ty(), RO.Features[0]);
}
llvm::BasicBlock *ThenBlock = createBasicBlock("if.version", Resolver);
CurBlock = createBasicBlock("if.else", Resolver);
@@ -3090,15 +3096,14 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
}
// If no generic/default, emit an unreachable.
-// Builder.SetInsertPoint(CurBlock);
-// llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap);
-// TrapCall->setDoesNotReturn();
-// TrapCall->setDoesNotThrow();
-// Builder.CreateUnreachable();
-// Builder.ClearInsertionPoint();
+ // Builder.SetInsertPoint(CurBlock);
+ // llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap);
+ // TrapCall->setDoesNotReturn();
+ // TrapCall->setDoesNotThrow();
+ // Builder.CreateUnreachable();
+ // Builder.ClearInsertionPoint();
}
-
void CodeGenFunction::EmitRISCVMultiVersionResolver(
llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 7fb1b634f857f..226e22cb3992e 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4889,7 +4889,8 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Value *BuildVector(ArrayRef<llvm::Value *> Ops);
llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
- llvm::Value *EmitPPCBuiltinCpu(unsigned BuiltinID, llvm::Type *ReturnType, StringRef CPUStr);
+ llvm::Value *EmitPPCBuiltinCpu(unsigned BuiltinID, llvm::Type *ReturnType,
+ StringRef CPUStr);
llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
@@ -5558,7 +5559,7 @@ class CodeGenFunction : public CodeGenTypeCache {
ArrayRef<FMVResolverOption> Options);
void EmitPPCAIXMultiVersionResolver(llvm::Function *Resolver,
- ArrayRef<FMVResolverOption> Options);
+ ArrayRef<FMVResolverOption> Options);
private:
QualType getVarArgType(const Expr *Arg);
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 00c82a7b70ef9..a2c81ab00b021 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2094,7 +2094,8 @@ static bool isUniqueInternalLinkageDecl(GlobalDecl GD,
// On certain platforms, a declared (but not defined) FMV shall be treated
// like a regular non-FMV function.
-static bool IgnoreFMVOnADeclaration(const llvm::Triple &Triple, const FunctionDecl *FD) {
+static bool IgnoreFMVOnADeclaration(const llvm::Triple &Triple,
+ const FunctionDecl *FD) {
if (!FD->isMultiVersion())
return false;
@@ -2156,7 +2157,7 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
if (const auto *FD = dyn_cast<FunctionDecl>(ND)) {
if (FD->isMultiVersion() && !OmitMultiVersionMangling &&
- !IgnoreFMVOnADeclaration(CGM.getTriple(), FD)) {
+ !IgnoreFMVOnADeclaration(CGM.getTriple(), FD)) {
switch (FD->getMultiVersionKind()) {
case MultiVersionKind::CPUDispatch:
case MultiVersionKind::CPUSpecific:
@@ -3023,8 +3024,9 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
// While we populated the feature map above, we still need to
// get and parse the target attribute so we can get the cpu for
// the function.
- StringRef FeatureStr = TD ? TD->getFeaturesStr() :
- (TC ? TC->getFeatureStr(GD.getMultiVersionIndex()) : StringRef());
+ StringRef FeatureStr =
+ TD ? TD->getFeaturesStr()
+ : (TC ? TC->getFeatureStr(GD.getMultiVersionIndex()) : StringRef());
if (!FeatureStr.empty()) {
ParsedTargetAttr ParsedAttr = Target.parseTargetAttr(FeatureStr);
if (!ParsedAttr.CPU.empty() &&
@@ -4725,8 +4727,7 @@ getFMVPriority(const TargetInfo &TI,
static llvm::GlobalValue::LinkageTypes
getMultiversionLinkage(CodeGenModule &CGM, GlobalDecl GD) {
const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
- if (FD->getFormalLinkage() == Linkage::Internal ||
- CGM.getTriple().isOSAIX())
+ if (FD->getFormalLinkage() == Linkage::Internal || CGM.getTriple().isOSAIX())
return llvm::GlobalValue::InternalLinkage;
return llvm::GlobalValue::WeakODRLinkage;
}
@@ -4813,7 +4814,7 @@ void CodeGenModule::emitMultiVersionFunctions() {
ResolverConstant = IFunc->getResolver();
if (FD->isTargetClonesMultiVersion() &&
!getTarget().getTriple().isAArch64() &&
- !getTarget().getTriple().isOSAIX()) {
+ !getTarget().getTriple().isOSAIX()) {
std::string MangledName = getMangledNameImpl(
*this, GD, FD, /*OmitMultiVersionMangling=*/true);
if (!GetGlobalValue(MangledName + ".ifunc")) {
@@ -5080,11 +5081,11 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) {
MangledName + ".resolver", ResolverType, GlobalDecl{},
/*ForVTable=*/false);
- auto Linkage = getTriple().isOSAIX() ? getFunctionLinkage(GD) : getMultiversionLinkage(*this, GD);
+ auto Linkage = getTriple().isOSAIX() ? getFunctionLinkage(GD)
+ : getMultiversionLinkage(*this, GD);
- llvm::GlobalIFunc *GIF =
- llvm::GlobalIFunc::create(DeclTy, AS, Linkage,
- "", Resolver, &getModule());
+ llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create(DeclTy, AS, Linkage, "",
+ Resolver, &getModule());
GIF->setName(ResolverName);
SetCommonAttributes(FD, GIF);
if (ResolverGV)
@@ -5104,7 +5105,9 @@ void CodeGenModule::setMultiVersionResolverAttributes(llvm::Function *Resolver,
GlobalDecl GD) {
const NamedDecl *D = dyn_cast_or_null<NamedDecl>(GD.getDecl());
- auto ResolverLinkage = getTriple().isOSAIX() ? llvm::GlobalValue::InternalLinkage : getMultiversionLinkage(*this, GD);
+ auto ResolverLinkage = getTriple().isOSAIX()
+ ? llvm::GlobalValue::InternalLinkage
+ : getMultiversionLinkage(*this, GD);
Resolver->setLinkage(ResolverLinkage);
// Function body has to be emitted before calling setGlobalVisibility
@@ -6649,7 +6652,7 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
setFunctionLinkage(GD, Fn);
if (getTriple().isOSAIX() && D->isTargetClonesMultiVersion())
- Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
+ Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
// FIXME: this is redundant with part of setFunctionDefinitionAttributes
setGVProperties(Fn, GD);
diff --git a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
index 8360a17c470dd..a730de0fb856d 100644
--- a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
@@ -70,8 +70,9 @@ static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF,
return CI;
}
-Value *CodeGenFunction::EmitPPCBuiltinCpu(
- unsigned BuiltinID, llvm::Type *ReturnType, StringRef CPUStr) {
+Value *CodeGenFunction::EmitPPCBuiltinCpu(unsigned BuiltinID,
+ llvm::Type *ReturnType,
+ StringRef CPUStr) {
#include "llvm/TargetParser/PPCTargetParser.def"
auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
@@ -159,8 +160,7 @@ Value *CodeGenFunction::EmitPPCBuiltinCpu(
Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is");
return Builder.CreateICmpEQ(TheCall,
llvm::ConstantInt::get(Int32Ty, LinuxIDValue));
- }
- else if (BuiltinID == Builtin::BI__builtin_cpu_supports) {
+ } else if (BuiltinID == Builtin::BI__builtin_cpu_supports) {
llvm::Triple Triple = getTarget().getTriple();
if (Triple.isOSAIX()) {
typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
@@ -197,8 +197,7 @@ Value *CodeGenFunction::EmitPPCBuiltinCpu(
#undef PPC_FAWORD_HWCAP
#undef PPC_FAWORD_HWCAP2
#undef PPC_FAWORD_CPUID
- }
- else
+ } else
assert(0 && "unexpected builtin");
}
@@ -219,7 +218,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
Intrinsic::ID ID = Intrinsic::not_intrinsic;
switch (BuiltinID) {
- default: return nullptr;
+ default:
+ return nullptr;
case Builtin::BI__builtin_cpu_is:
case Builtin::BI__builtin_cpu_supports: {
diff --git a/clang/lib/CodeGen/Targets/PPC.cpp b/clang/lib/CodeGen/Targets/PPC.cpp
index bc357e0908a5e..61d110e3c7ae3 100644
--- a/clang/lib/CodeGen/Targets/PPC.cpp
+++ b/clang/lib/CodeGen/Targets/PPC.cpp
@@ -136,8 +136,7 @@ class AIXABIInfo : public ABIInfo {
raw_ostream &Out) const override;
};
-void AIXABIInfo::appendAttributeMangling(TargetClonesAttr *Attr,
- unsigned Index,
+void AIXABIInfo::appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index,
raw_ostream &Out) const {
appendAttributeMangling(Attr->getFeatureStr(Index), Out);
}
@@ -159,7 +158,7 @@ void AIXABIInfo::appendAttributeMangling(StringRef AttrStr,
assert(LHS.starts_with("+") && RHS.starts_with("+") &&
"Features should always have a prefix.");
return TI.getFMVPriority({LHS.substr(1)})
- .ugt(TI.getFMVPriority({RHS.substr(1)}));
+ .ugt(TI.getFMVPriority({RHS.substr(1)}));
});
bool IsFirst = true;
>From e7d14991b3fc6fa028ecc91049698b8eaf880ee7 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Fri, 6 Feb 2026 04:12:47 +0000
Subject: [PATCH 07/15] normalize the CPU name on the target* attribute
---
clang/lib/Basic/Targets/PPC.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 5b86f84264905..53c7af1b57767 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -708,6 +708,8 @@ ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const {
else
Ret.Features.push_back("+" + Feature.str());
}
+ Ret.CPU = llvm::PPC::normalizeCPUName(Ret.CPU);
+ Ret.Tune = llvm::PPC::normalizeCPUName(Ret.Tune);
return Ret;
}
>From 2501fcbe5da5631d4f9eec5a5798e4178af93dae Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 3 Mar 2026 17:10:55 +0000
Subject: [PATCH 08/15] limit support to cpu-only versions, and implement
getFMVPriority
---
clang/lib/AST/ASTContext.cpp | 13 ++++++-----
clang/lib/Basic/Targets/PPC.cpp | 18 ++++++++++++++-
clang/lib/CodeGen/CodeGenFunction.cpp | 32 +++++++++++++--------------
clang/lib/CodeGen/Targets/PPC.cpp | 24 ++++----------------
clang/lib/Sema/SemaPPC.cpp | 13 ++++++-----
5 files changed, 52 insertions(+), 48 deletions(-)
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 2152b685411fa..98463ab05e7a7 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -15028,16 +15028,19 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
Target->getTargetOpts().FeaturesAsWritten.begin(),
Target->getTargetOpts().FeaturesAsWritten.end());
Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features);
+ } else if (Target->getTriple().isOSAIX()) {
+ std::vector<std::string> Features;
+ StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex());
+ if (VersionStr.starts_with("cpu="))
+ TargetCPU = VersionStr.drop_front(sizeof("cpu=") - 1);
+ else
+ assert(VersionStr == "default");
+ Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features);
} else {
std::vector<std::string> Features;
StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex());
if (VersionStr.starts_with("arch="))
TargetCPU = VersionStr.drop_front(sizeof("arch=") - 1);
- else if (Target->getTriple().isOSAIX() &&
- VersionStr.starts_with(
- "cpu=")) // TODO make a function that extracts CPU from a
- // feature string
- TargetCPU = VersionStr.drop_front(sizeof("cpu=") - 1);
else if (VersionStr != "default")
Features.push_back((StringRef{"+"} + VersionStr).str());
Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features);
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 53c7af1b57767..fbff0af711b13 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -714,7 +714,23 @@ ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const {
}
llvm::APInt PPCTargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
- return llvm::APInt(32, Features.empty() ? 0 : 1);
+ if (Features.empty())
+ return llvm::APInt(32, 0);
+ assert(Features.size() == 1 && "one feature/cpu per clone on PowerPC");
+ ParsedTargetAttr ParsedAttr = parseTargetAttr(Features[0]);
+ if (!ParsedAttr.CPU.empty()) {
+ StringRef CPU = llvm::PPC::normalizeCPUName(ParsedAttr.CPU);
+ int Priority = llvm::StringSwitch<int>(CPU)
+ .Case("pwr7", 1)
+ .Case("pwr8", 2)
+ .Case("pwr9", 3)
+ .Case("pwr10", 4)
+ .Case("pwr11", 5)
+ .Default(0);
+ return llvm::APInt(32, Priority);
+ }
+ assert(false && "unimplemented");
+ return llvm::APInt(32, 0);
}
// Make sure that registers are added in the correct array index which should be
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 882eea2f52361..5897123f4019d 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -3041,13 +3041,8 @@ void CodeGenFunction::EmitMultiVersionResolver(
}
}
-/*
- * Desc_t *foo_desc = ppc_get_function_descriptor(&foo);
- * if (foo_desc->addr == ppc_get_function_entry(&foo)) {
- * FuncPtr fp = resolver();
- * __c11_atomic_store((_Atomic FuncPtr *)&foo_desc->addr, fp, 0);
- * }
- * return ((int (*)(int)) foo_desc)(a);
+/**
+ *
*/
void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) {
@@ -3078,15 +3073,20 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
// br label %resolver_exit
assert(RO.Features.size() == 1 &&
"for now one feature requirement per version");
- llvm::Value *Condition;
- if (RO.Features[0].starts_with("cpu=")) {
- Condition =
- EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_is, Builder.getInt1Ty(),
- RO.Features[0].split("=").second.trim());
- } else {
- Condition = EmitPPCBuiltinCpu(Builtin::BI__builtin_cpu_supports,
- Builder.getInt1Ty(), RO.Features[0]);
- }
+
+ assert(RO.Features[0].starts_with("cpu="));
+ StringRef CPU = RO.Features[0].split("=").second.trim();
+ StringRef Feature = llvm::StringSwitch<StringRef>(CPU)
+ .Cases({"power7","pwr7"}, "arch_2_06")
+ .Cases({"power8","pwr8"}, "arch_2_07")
+ .Cases({"power9","pwr9"}, "arch_3_00")
+ .Cases({"power10","pwr10"}, "arch_3_1")
+ .Cases({"power11","pwr11"}, "arch_3_1")
+ .Default("error");
+
+ llvm::Value *Condition = EmitPPCBuiltinCpu(
+ Builtin::BI__builtin_cpu_supports, Builder.getInt1Ty(), Feature);
+
llvm::BasicBlock *ThenBlock = createBasicBlock("if.version", Resolver);
CurBlock = createBasicBlock("if.else", Resolver);
Builder.CreateCondBr(Condition, ThenBlock, CurBlock);
diff --git a/clang/lib/CodeGen/Targets/PPC.cpp b/clang/lib/CodeGen/Targets/PPC.cpp
index 61d110e3c7ae3..6694725277144 100644
--- a/clang/lib/CodeGen/Targets/PPC.cpp
+++ b/clang/lib/CodeGen/Targets/PPC.cpp
@@ -148,32 +148,16 @@ void AIXABIInfo::appendAttributeMangling(StringRef AttrStr,
return;
}
- Out << '.';
const TargetInfo &TI = CGT.getTarget();
ParsedTargetAttr Info = TI.parseTargetAttr(AttrStr);
- llvm::sort(Info.Features, [&TI](StringRef LHS, StringRef RHS) {
- // Multiversioning doesn't allow "no-${feature}", so we can
- // only have "+" prefixes here.
- assert(LHS.starts_with("+") && RHS.starts_with("+") &&
- "Features should always have a prefix.");
- return TI.getFMVPriority({LHS.substr(1)})
- .ugt(TI.getFMVPriority({RHS.substr(1)}));
- });
-
- bool IsFirst = true;
if (!Info.CPU.empty()) {
- IsFirst = false;
- Out << "cpu_" << Info.CPU;
+ assert(Info.Features.empty() && "cannot have both a CPU and a feature");
+ Out << ".cpu_" << Info.CPU;
+ return;
}
- assert(Info.Features.empty() && "unhandled case");
- for (StringRef Feat : Info.Features) {
- if (!IsFirst)
- Out << '_';
- IsFirst = false;
- Out << Feat.substr(1);
- }
+ assert(false && "specifying target features on an FMV is not supported on AIX");
}
class AIXTargetCodeGenInfo : public TargetCodeGenInfo {
diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp
index 2fd6a3e911fd1..46c9cf172fbc6 100644
--- a/clang/lib/Sema/SemaPPC.cpp
+++ b/clang/lib/Sema/SemaPPC.cpp
@@ -597,16 +597,17 @@ bool SemaPPC::checkTargetClonesAttr(
return Diag(CurLoc, diag::warn_unsupported_target_attribute)
<< Unsupported << CPU << LHS.drop_front(sizeof("cpu=") - 1)
<< TargetClones;
- } else if (LHS == "default")
+ } else if (LHS == "default") {
HasDefault = true;
- else if (!getASTContext().getTargetInfo().isValidFeatureName(LHS) ||
- getASTContext().getTargetInfo().getFMVPriority(LHS) == 0)
+ } else if (!getASTContext().getTargetInfo().isValidFeatureName(LHS) ||
+ getASTContext().getTargetInfo().getFMVPriority(LHS) == 0) {
return Diag(CurLoc, diag::warn_unsupported_target_attribute)
<< Unsupported << None << LHS << TargetClones;
-
- if (llvm::is_contained(NewParams, LHS))
+ }
+ if (llvm::is_contained(NewParams, LHS)) {
Diag(CurLoc, diag::warn_target_clone_duplicate_options);
- // Note: Add even if there are duplicates, since it changes name mangling.
+ continue;
+ }
NewParams.push_back(LHS);
} while (!RHS.empty());
}
>From 0fe0be1a63026ba0cd076fb579d9805e289517ba Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Mon, 9 Mar 2026 18:35:26 +0000
Subject: [PATCH 09/15] Handle case when an FMV function is declared, used,
then defined by: fixing getMangledNameImpl such that it does not need to
special case for FMV declarations because GetOrCreateLLVMFunction already can
return the non-mangled name of declared FMV functions
---
clang/lib/CodeGen/CodeGenModule.cpp | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index a2c81ab00b021..0760b92af3dfe 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2156,8 +2156,7 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
}
if (const auto *FD = dyn_cast<FunctionDecl>(ND)) {
- if (FD->isMultiVersion() && !OmitMultiVersionMangling &&
- !IgnoreFMVOnADeclaration(CGM.getTriple(), FD)) {
+ if (FD->isMultiVersion() && !OmitMultiVersionMangling) {
switch (FD->getMultiVersionKind()) {
case MultiVersionKind::CPUDispatch:
case MultiVersionKind::CPUSpecific:
@@ -5188,7 +5187,6 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
NameWithoutMultiVersionMangling = getMangledNameImpl(
*this, GD, FD, /*OmitMultiVersionMangling=*/true);
} else if (IgnoreFMVOnADeclaration(getTriple(), FD)) {
- // TODO this might not be necessary after fix in getMangledNameImpl
NameWithoutMultiVersionMangling = getMangledNameImpl(
*this, GD, FD, /*OmitMultiVersionMangling=*/true);
} else
>From e6f34146aa90f417633ac19386b2ebac64354ead Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 3 Mar 2026 19:04:28 -0500
Subject: [PATCH 10/15] test
test
test
---
clang/lib/CodeGen/CodeGenFunction.cpp | 23 +++-
clang/test/CodeGen/attr-target-clones-ppc.c | 116 ++++++++++++++++++++
2 files changed, 133 insertions(+), 6 deletions(-)
create mode 100644 clang/test/CodeGen/attr-target-clones-ppc.c
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 5897123f4019d..6287095211099 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -3042,7 +3042,20 @@ void CodeGenFunction::EmitMultiVersionResolver(
}
/**
+ * define internal ptr @foo.resolver() {
+ * entry:
+ * %is_version_1 = __builtin_cpu_supports(version_1)
+ * br i1 %1, label %if.version_1, label %if.else_2
*
+ * if.version_1:
+ * ret ptr @foo.version_1
+ *
+ * if.else_2:
+ * %is_version_2 = __builtin_cpu_supports(version_2)
+ * ...
+ * if.else: ; preds = %entry
+ * ret ptr @foo.default
+ * }
*/
void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) {
@@ -3056,9 +3069,8 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
Builder.SetInsertPoint(CurBlock);
// The 'default' or 'generic' case.
if (!RO.Architecture && RO.Features.empty()) {
- // if.default:
- // %fmv.default = call ptr @getEntryPoint(ptr noundef @foo_default)
- // br label %resolver_exit
+ // if.else:
+ // ret ptr @foo.default
assert(&RO == Options.end() - 1 &&
"Default or Generic case must be last");
Builder.CreateRet(RO.Function);
@@ -3066,11 +3078,10 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
}
// if.else_n:
// %is_version_n = __builtin_cpu_supports(version_n)
- // br i1 %is_version_n, label %if.version_n, label %if.default
+ // br i1 %is_version_n, label %if.version_n, label %if.else_n+1
//
// if.version_n:
- // %fmv.version.n = call ptr @getEntryPoint(ptr noundef @foo_version_n)
- // br label %resolver_exit
+ // ret ptr @foo_version_n
assert(RO.Features.size() == 1 &&
"for now one feature requirement per version");
diff --git a/clang/test/CodeGen/attr-target-clones-ppc.c b/clang/test/CodeGen/attr-target-clones-ppc.c
new file mode 100644
index 0000000000000..08d54a391b58f
--- /dev/null
+++ b/clang/test/CodeGen/attr-target-clones-ppc.c
@@ -0,0 +1,116 @@
+// RUN: %clang_cc1 -triple powerpc-ibm-aix-xcoff -target-cpu pwr7 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64-ibm-aix-xcoff -target-cpu pwr7 -emit-llvm %s -o - | FileCheck %s
+
+// CHECK: @internal = internal ifunc i32 (), ptr @internal.resolver
+// CHECK: @foo = ifunc i32 (), ptr @foo.resolver
+// CHECK: @foo_dupes = ifunc void (), ptr @foo_dupes.resolver
+// CHECK: @unused = ifunc void (), ptr @unused.resolver
+// CHECK: @foo_inline = linkonce ifunc i32 (), ptr @foo_inline.resolver
+// CHECK: @foo_ref_then_def = ifunc i32 (), ptr @foo_ref_then_def.resolver
+// CHECK: @foo_priority = ifunc i32 (i32), ptr @foo_priority.resolver
+// CHEECK: @isa_level = ifunc i32 (i32), ptr @isa_level.resolver
+
+
+static int __attribute__((target_clones("cpu=power10, default"))) internal(void) { return 0; }
+int use(void) { return internal(); }
+// CHECK: define internal ptr @internal.resolver()
+
+int __attribute__((target_clones("cpu=power10, default"))) foo(void) { return 0; }
+// CHECK: define internal {{.*}}i32 @foo.cpu_pwr10() #[[#ATTR_P10:]]
+// CHECK: define internal {{.*}}i32 @foo.default() #[[#ATTR_P7:]]
+// CHECK: define internal ptr @foo.resolver()
+// CHECK: ret ptr @foo.cpu_pwr10
+// CHECK: ret ptr @foo.default
+
+__attribute__((target_clones("default,default ,cpu=pwr8"))) void foo_dupes(void) {}
+// CHECK: define internal void @foo_dupes.default() #[[#ATTR_P7]]
+// CHECK: define internal void @foo_dupes.cpu_pwr8() #[[#ATTR_P8:]]
+// CHECK: define internal ptr @foo_dupes.resolver()
+// CHECK: ret ptr @foo_dupes.cpu_pwr8
+// CHECK: ret ptr @foo_dupes.default
+
+void bar2(void) {
+ // CHECK: define {{.*}}void @bar2()
+ foo_dupes();
+ // CHECK: call void @foo_dupes()
+}
+
+int bar(void) {
+ // CHECK: define {{.*}}i32 @bar()
+ return foo();
+ // CHECK: call {{.*}}i32 @foo()
+}
+
+void __attribute__((target_clones("default, cpu=pwr9"))) unused(void) {}
+// CHECK: define internal void @unused.default() #[[#ATTR_P7]]
+// CHECK: define internal void @unused.cpu_pwr9() #[[#ATTR_P9:]]
+// CHECK: define internal ptr @unused.resolver()
+// CHECK: ret ptr @unused.cpu_pwr9
+// CHECK: ret ptr @unused.default
+
+int __attribute__((target_clones("cpu=power10, default"))) inherited(void);
+int inherited(void) { return 0; }
+// CHECK: define internal {{.*}}i32 @inherited.cpu_pwr10() #[[#ATTR_P10]]
+// CHECK: define internal {{.*}}i32 @inherited.default() #[[#ATTR_P7]]
+// CHECK: define internal ptr @inherited.resolver()
+// CHECK: ret ptr @inherited.cpu_pwr10
+// CHECK: ret ptr @inherited.default
+
+
+int test_inherited(void) {
+ // CHECK: define {{.*}}i32 @test_inherited()
+ return inherited();
+ // CHECK: call {{.*}}i32 @inherited()
+}
+
+inline int __attribute__((target_clones("default,cpu=pwr8")))
+foo_inline(void) { return 0; }
+int __attribute__((target_clones("cpu=pwr7,default")))
+foo_ref_then_def(void);
+
+int bar3(void) {
+ // CHECK: define {{.*}}i32 @bar3()
+ return foo_inline() + foo_ref_then_def();
+ // CHECK: call {{.*}}i32 @foo_inline()
+ // CHECK: call {{.*}}i32 @foo_ref_then_def()
+}
+
+// CHECK: define internal ptr @foo_inline.resolver()
+// CHECK: ret ptr @foo_inline.cpu_pwr8
+// CHECK: ret ptr @foo_inline.default
+
+int __attribute__((target_clones("cpu=pwr7,default")))
+foo_ref_then_def(void){ return 0; }
+// CHECK: define internal ptr @foo_ref_then_def.resolver()
+// CHECK: ret ptr @foo_ref_then_def.cpu_pwr7
+// CHECK: ret ptr @foo_ref_then_def.default
+
+int __attribute__((target_clones("default", "cpu=pwr8")))
+foo_unused_no_defn(void);
+// CHECK-NOT: foo_unused_no_defn
+
+int __attribute__((target_clones("default", "cpu=pwr9")))
+foo_used_no_defn(void);
+
+int test_foo_used_no_defn(void) {
+ // CHECK: define {{.*}}i32 @test_foo_used_no_defn()
+ return foo_used_no_defn();
+ // CHECK: call {{.*}}i32 @foo_used_no_defn()
+}
+// CHECK: declare {{.*}}i32 @foo_used_no_defn()
+
+// test that the CPU checks are done in most to least restrictive (highest to lowest CPU)
+int __attribute__((target_clones("cpu=pwr7", "cpu=pwr9", "default", "cpu=pwr8")))
+foo_priority(int x) { return x & (x - 1); }
+// CHECK: define internal ptr @foo_priority.resolver()
+// CHECK: ret ptr @foo_priority.cpu_pwr9
+// CHECK: ret ptr @foo_priority.cpu_pwr8
+// CHECK: ret ptr @foo_priority.cpu_pwr7
+// CHECK: ret ptr @foo_priority.default
+
+
+// CHECK: attributes #[[#ATTR_P7]] = {{.*}} "target-cpu"="pwr7"
+// CHECK: attributes #[[#ATTR_P10]] = {{.*}} "target-cpu"="pwr10"
+// CHECK: attributes #[[#ATTR_P8]] = {{.*}} "target-cpu"="pwr8"
+// CHECK: attributes #[[#ATTR_P9]] = {{.*}} "target-cpu"="pwr9"
+
>From 3ef7c2773de0a3a9b6422e4395d5abcc61fb7a62 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 10 Mar 2026 04:02:06 +0000
Subject: [PATCH 11/15] clang-formt
---
clang/lib/CodeGen/CodeGenFunction.cpp | 12 ++++++------
clang/lib/CodeGen/Targets/PPC.cpp | 2 +-
clang/lib/Sema/SemaPPC.cpp | 2 +-
3 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 6287095211099..84030cb421a40 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -3088,12 +3088,12 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
assert(RO.Features[0].starts_with("cpu="));
StringRef CPU = RO.Features[0].split("=").second.trim();
StringRef Feature = llvm::StringSwitch<StringRef>(CPU)
- .Cases({"power7","pwr7"}, "arch_2_06")
- .Cases({"power8","pwr8"}, "arch_2_07")
- .Cases({"power9","pwr9"}, "arch_3_00")
- .Cases({"power10","pwr10"}, "arch_3_1")
- .Cases({"power11","pwr11"}, "arch_3_1")
- .Default("error");
+ .Cases({"power7", "pwr7"}, "arch_2_06")
+ .Cases({"power8", "pwr8"}, "arch_2_07")
+ .Cases({"power9", "pwr9"}, "arch_3_00")
+ .Cases({"power10", "pwr10"}, "arch_3_1")
+ .Cases({"power11", "pwr11"}, "arch_3_1")
+ .Default("error");
llvm::Value *Condition = EmitPPCBuiltinCpu(
Builtin::BI__builtin_cpu_supports, Builder.getInt1Ty(), Feature);
diff --git a/clang/lib/CodeGen/Targets/PPC.cpp b/clang/lib/CodeGen/Targets/PPC.cpp
index 6694725277144..ab069bfbd1b51 100644
--- a/clang/lib/CodeGen/Targets/PPC.cpp
+++ b/clang/lib/CodeGen/Targets/PPC.cpp
@@ -157,7 +157,7 @@ void AIXABIInfo::appendAttributeMangling(StringRef AttrStr,
return;
}
- assert(false && "specifying target features on an FMV is not supported on AIX");
+ assert(0 && "specifying target features on an FMV is unsupported on AIX");
}
class AIXTargetCodeGenInfo : public TargetCodeGenInfo {
diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp
index 46c9cf172fbc6..705cfa4c4d739 100644
--- a/clang/lib/Sema/SemaPPC.cpp
+++ b/clang/lib/Sema/SemaPPC.cpp
@@ -600,7 +600,7 @@ bool SemaPPC::checkTargetClonesAttr(
} else if (LHS == "default") {
HasDefault = true;
} else if (!getASTContext().getTargetInfo().isValidFeatureName(LHS) ||
- getASTContext().getTargetInfo().getFMVPriority(LHS) == 0) {
+ getASTContext().getTargetInfo().getFMVPriority(LHS) == 0) {
return Diag(CurLoc, diag::warn_unsupported_target_attribute)
<< Unsupported << None << LHS << TargetClones;
}
>From de2f0abe4472c9dc0f5f7325e757d61d63b6f930 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 10 Mar 2026 14:49:11 +0000
Subject: [PATCH 12/15] add IR codegen test
---
clang/test/CodeGen/attr-target-clones-ppc.c | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)
diff --git a/clang/test/CodeGen/attr-target-clones-ppc.c b/clang/test/CodeGen/attr-target-clones-ppc.c
index 08d54a391b58f..05a9b788f701a 100644
--- a/clang/test/CodeGen/attr-target-clones-ppc.c
+++ b/clang/test/CodeGen/attr-target-clones-ppc.c
@@ -99,16 +99,31 @@ int test_foo_used_no_defn(void) {
}
// CHECK: declare {{.*}}i32 @foo_used_no_defn()
-// test that the CPU checks are done in most to least restrictive (highest to lowest CPU)
-int __attribute__((target_clones("cpu=pwr7", "cpu=pwr9", "default", "cpu=pwr8")))
+// Test that the CPU conditions are checked from the most to the least
+// restrictive (highest to lowest CPU). Also test the codegen for the
+// conditions
+int __attribute__((target_clones("cpu=pwr10", "cpu=pwr7", "cpu=pwr9", "default", "cpu=pwr8")))
foo_priority(int x) { return x & (x - 1); }
// CHECK: define internal ptr @foo_priority.resolver()
+// CHECK-NEXT: entry
+// if (__builtin_cpu_supports("arch_3_1")) return &foo_priority.cpu_pwr10;
+// CHECK-NEXT: %[[#L1:]] = load i32, {{.*}} ptr @_system_configuration, i32 0, i32 1)
+// CHECK-NEXT: icmp uge i32 %[[#L1]], 262144
+// CHECK: ret ptr @foo_priority.cpu_pwr10
+// if (__builtin_cpu_supports("arch_3_00")) return &foo_priority.cpu_pwr9;
+// CHECK: %[[#L2:]] = load i32, {{.*}} ptr @_system_configuration, i32 0, i32 1)
+// CHECK-NEXT: icmp uge i32 %[[#L2]], 131072
// CHECK: ret ptr @foo_priority.cpu_pwr9
+// if (__builtin_cpu_supports("arch_2_07")) return &foo_priority.cpu_pwr8;
+// CHECK: %[[#L3:]] = load i32, {{.*}} ptr @_system_configuration, i32 0, i32 1)
+// CHECK-NEXT: icmp uge i32 %[[#L3]], 65536
// CHECK: ret ptr @foo_priority.cpu_pwr8
+// if (__builtin_cpu_supports("arch_2_06")) return &foo_priority.cpu_pwr8;
+// CHECK: %[[#L4:]] = load i32, {{.*}} ptr @_system_configuration, i32 0, i32 1)
+// CHECK-NEXT: icmp uge i32 %[[#L4]], 32768
// CHECK: ret ptr @foo_priority.cpu_pwr7
// CHECK: ret ptr @foo_priority.default
-
// CHECK: attributes #[[#ATTR_P7]] = {{.*}} "target-cpu"="pwr7"
// CHECK: attributes #[[#ATTR_P10]] = {{.*}} "target-cpu"="pwr10"
// CHECK: attributes #[[#ATTR_P8]] = {{.*}} "target-cpu"="pwr8"
>From 5c91dfce8b510c80d0195d16a52fcf2ffe812cea Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 10 Mar 2026 15:17:18 +0000
Subject: [PATCH 13/15] code review
---
clang/include/clang/Basic/TargetInfo.h | 2 +-
clang/lib/CodeGen/TargetBuiltins/PPC.cpp | 57 ++++++++++++------------
2 files changed, 30 insertions(+), 29 deletions(-)
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 68160e9bd9b29..b08631baf2532 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1567,7 +1567,7 @@ class TargetInfo : public TransferrableTargetInfo,
/// which requires support for cpu_supports and cpu_is functionality.
bool supportsMultiVersioning() const {
return getTriple().isX86() || getTriple().isAArch64() ||
- getTriple().isRISCV() || getTriple().isOSBinFormatXCOFF();
+ getTriple().isRISCV() || getTriple().isOSAIX();
}
/// Identify whether this target supports IFuncs.
diff --git a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
index a730de0fb856d..e87b987eeb340 100644
--- a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
@@ -73,6 +73,8 @@ static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF,
Value *CodeGenFunction::EmitPPCBuiltinCpu(unsigned BuiltinID,
llvm::Type *ReturnType,
StringRef CPUStr) {
+ assert(BuiltinID == Builtin::BI__builtin_cpu_is ||
+ BuiltinID == Builtin::BI__builtin_cpu_supports);
#include "llvm/TargetParser/PPCTargetParser.def"
auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
@@ -160,45 +162,44 @@ Value *CodeGenFunction::EmitPPCBuiltinCpu(unsigned BuiltinID,
Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is");
return Builder.CreateICmpEQ(TheCall,
llvm::ConstantInt::get(Int32Ty, LinuxIDValue));
- } else if (BuiltinID == Builtin::BI__builtin_cpu_supports) {
- llvm::Triple Triple = getTarget().getTriple();
- if (Triple.isOSAIX()) {
- typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
- unsigned>
- CPUSupportType;
- auto [SupportMethod, FieldIdx, Mask, CompOp, Value] =
- static_cast<CPUSupportType>(StringSwitch<CPUSupportType>(CPUStr)
+ }
+ // else BuiltinID == Builtin::BI__builtin_cpu_supports
+ llvm::Triple Triple = getTarget().getTriple();
+ if (Triple.isOSAIX()) {
+ typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
+ unsigned>
+ CPUSupportType;
+ auto [SupportMethod, FieldIdx, Mask, CompOp, Value] =
+ static_cast<CPUSupportType>(
+ StringSwitch<CPUSupportType>(CPUStr)
#define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \
VALUE) \
.Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE})
#include "llvm/TargetParser/PPCTargetParser.def"
- .Default({BUILTIN_PPC_FALSE, 0, 0,
- CmpInst::Predicate(), 0}));
- return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp,
- Value);
- }
+ .Default({BUILTIN_PPC_FALSE, 0, 0, CmpInst::Predicate(), 0}));
+ return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp,
+ Value);
+ }
- assert(Triple.isOSLinux() &&
- "__builtin_cpu_supports() is only supported for AIX and Linux.");
- auto [FeatureWord, BitMask] =
- StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
+ assert(Triple.isOSLinux() &&
+ "__builtin_cpu_supports() is only supported for AIX and Linux.");
+ auto [FeatureWord, BitMask] =
+ StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
#define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \
.Case(Name, {FA_WORD, Bitmask})
#include "llvm/TargetParser/PPCTargetParser.def"
- .Default({0, 0});
- if (!BitMask)
- return Builder.getFalse();
- Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
- llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
- Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
- Value *Mask =
- Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
- return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
+ .Default({0, 0});
+ if (!BitMask)
+ return Builder.getFalse();
+ Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
+ llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
+ Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
+ Value *Mask =
+ Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
+ return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
#undef PPC_FAWORD_HWCAP
#undef PPC_FAWORD_HWCAP2
#undef PPC_FAWORD_CPUID
- } else
- assert(0 && "unexpected builtin");
}
Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
>From 63082ce93d6b147a9d2ecd3b0f57ffd5315ec683 Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 10 Mar 2026 02:56:08 +0000
Subject: [PATCH 14/15] debugging, CMakelists changes
---
clang/lib/AST/CMakeLists.txt | 4 ++
clang/lib/Basic/CMakeLists.txt | 4 ++
clang/lib/CodeGen/CMakeLists.txt | 74 ++++++++++++++++++++++++++++
clang/lib/Sema/CMakeLists.txt | 1 +
llvm/lib/MC/CMakeLists.txt | 4 ++
llvm/lib/TargetParser/CMakeLists.txt | 4 ++
6 files changed, 91 insertions(+)
diff --git a/clang/lib/AST/CMakeLists.txt b/clang/lib/AST/CMakeLists.txt
index f9a5f4f0e7ecd..c5a6f65849501 100644
--- a/clang/lib/AST/CMakeLists.txt
+++ b/clang/lib/AST/CMakeLists.txt
@@ -149,3 +149,7 @@ add_clang_library(clangAST
# These generated headers are included transitively.
target_parser_gen
)
+set_source_files_properties(
+ ASTContext.cpp
+ PROPERTIES COMPILE_FLAGS "-g -O0")
+
diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt
index adfc6ee326b5a..56fde54ca02ea 100644
--- a/clang/lib/Basic/CMakeLists.txt
+++ b/clang/lib/Basic/CMakeLists.txt
@@ -141,3 +141,7 @@ target_link_libraries(clangBasic
PRIVATE
${LLVM_ATOMIC_LIB}
)
+set_source_files_properties(
+ TargetInfo.cpp Targets/X86.cpp Targets.cpp Targets/PPC.cpp
+ PROPERTIES COMPILE_FLAGS "-g -O0")
+
diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt
index dbbc35b372f42..fede1e72492a5 100644
--- a/clang/lib/CodeGen/CMakeLists.txt
+++ b/clang/lib/CodeGen/CMakeLists.txt
@@ -173,3 +173,77 @@ add_clang_library(clangCodeGen
clangLex
clangSerialization
)
+
+set_source_files_properties(
+ ABIInfo.cpp
+ ABIInfoImpl.cpp
+ BackendUtil.cpp
+ CGAtomic.cpp
+ CGBlocks.cpp
+ CGBuiltin.cpp
+ CGCUDANV.cpp
+ CGCUDARuntime.cpp
+ CGCXX.cpp
+ CGCXXABI.cpp
+ CGCall.cpp
+ CGClass.cpp
+ CGCleanup.cpp
+ CGCoroutine.cpp
+ CGDebugInfo.cpp
+ CGDecl.cpp
+ CGDeclCXX.cpp
+ CGException.cpp
+ CGExpr.cpp
+ CGExprAgg.cpp
+ CGExprCXX.cpp
+ CGExprComplex.cpp
+ CGExprConstant.cpp
+ CGExprScalar.cpp
+ CGGPUBuiltin.cpp
+ CGHLSLRuntime.cpp
+ CGHLSLBuiltins.cpp
+ CGLoopInfo.cpp
+ CGNonTrivialStruct.cpp
+ CGObjC.cpp
+ CGObjCGNU.cpp
+ CGObjCMac.cpp
+ CGObjCRuntime.cpp
+ CGOpenCLRuntime.cpp
+ CGOpenMPRuntime.cpp
+ CGOpenMPRuntimeGPU.cpp
+ CGPointerAuth.cpp
+ CGRecordLayoutBuilder.cpp
+ CGStmt.cpp
+ CGStmtOpenMP.cpp
+ CGVTT.cpp
+ CGVTables.cpp
+ CodeGenABITypes.cpp
+ CodeGenAction.cpp
+ CodeGenFunction.cpp
+ CodeGenModule.cpp
+ CodeGenPGO.cpp
+ CodeGenSYCL.cpp
+ CodeGenTBAA.cpp
+ CodeGenTypes.cpp
+ ConstantInitBuilder.cpp
+ CoverageMappingGen.cpp
+ ItaniumCXXABI.cpp
+ HLSLBufferLayoutBuilder.cpp
+ LinkInModulesPass.cpp
+ MacroPPCallbacks.cpp
+ MicrosoftCXXABI.cpp
+ ModuleBuilder.cpp
+ ObjectFilePCHContainerWriter.cpp
+ PatternInit.cpp
+ SanitizerMetadata.cpp
+ TargetBuiltins/ARM.cpp
+ TargetBuiltins/PPC.cpp
+ TargetBuiltins/RISCV.cpp
+ TargetBuiltins/X86.cpp
+ TargetInfo.cpp
+ Targets/AArch64.cpp
+ Targets/ARM.cpp
+ Targets/PPC.cpp
+ Targets/X86.cpp
+ PROPERTIES COMPILE_FLAGS "-g -O0")
+
diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt
index 0ebf56ecffe69..607ee679cc65d 100644
--- a/clang/lib/Sema/CMakeLists.txt
+++ b/clang/lib/Sema/CMakeLists.txt
@@ -117,3 +117,4 @@ add_clang_library(clangSema
clangLex
clangSupport
)
+set_source_files_properties(SemaPPC.cpp PROPERTIES COMPILE_FLAGS "-g -O0")
diff --git a/llvm/lib/MC/CMakeLists.txt b/llvm/lib/MC/CMakeLists.txt
index 1388f130bb806..c0a524f71f970 100644
--- a/llvm/lib/MC/CMakeLists.txt
+++ b/llvm/lib/MC/CMakeLists.txt
@@ -85,3 +85,7 @@ add_llvm_component_library(LLVMMC
add_subdirectory(MCParser)
add_subdirectory(MCDisassembler)
+set_source_files_properties(
+ MCSubtargetInfo.cpp
+ PROPERTIES COMPILE_FLAGS "-g -O0 -DLLVM_ENABLE_DUMP")
+
diff --git a/llvm/lib/TargetParser/CMakeLists.txt b/llvm/lib/TargetParser/CMakeLists.txt
index e1a30199e1ade..01b9702489617 100644
--- a/llvm/lib/TargetParser/CMakeLists.txt
+++ b/llvm/lib/TargetParser/CMakeLists.txt
@@ -43,3 +43,7 @@ add_llvm_component_library(LLVMTargetParser
DEPENDS
target_parser_gen
)
+set_source_files_properties(
+ PPCTargetParser.cpp
+ TargetParser.cpp
+ PROPERTIES COMPILE_FLAGS "-g -O0 -DLLVM_ENABLE_DUMP")
>From af8d418f06076e04411de18a5aa6244274501c7f Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Tue, 10 Mar 2026 02:56:27 +0000
Subject: [PATCH 15/15] debugging
---
clang/lib/Basic/Targets/PPC.cpp | 1 +
clang/lib/CodeGen/CodeGenFunction.cpp | 4 +++
clang/lib/CodeGen/CodeGenModule.cpp | 47 +++++++++++++++++++++++++--
3 files changed, 49 insertions(+), 3 deletions(-)
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index fbff0af711b13..d8cda91a8c588 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -717,6 +717,7 @@ llvm::APInt PPCTargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
if (Features.empty())
return llvm::APInt(32, 0);
assert(Features.size() == 1 && "one feature/cpu per clone on PowerPC");
+ llvm::dbgs() << Features[0] << "\n";
ParsedTargetAttr ParsedAttr = parseTargetAttr(Features[0]);
if (!ParsedAttr.CPU.empty()) {
StringRef CPU = llvm::PPC::normalizeCPUName(ParsedAttr.CPU);
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 84030cb421a40..2e7fa7ce4fe17 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -3095,6 +3095,10 @@ void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
.Cases({"power11", "pwr11"}, "arch_3_1")
.Default("error");
+ if (getLangOpts().EncodeExtendedBlockSig) {
+ llvm::dbgs() << "Feature = \"" << RO.Features[0] << "\", __builtin_cpu_supports(\"" << Feature << "\")\n";
+ }
+
llvm::Value *Condition = EmitPPCBuiltinCpu(
Builtin::BI__builtin_cpu_supports, Builder.getInt1Ty(), Feature);
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 0760b92af3dfe..09276386e843a 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -80,6 +80,24 @@
using namespace clang;
using namespace CodeGen;
+void pdecl(const Decl* D) { D->dump(); }
+void pgdecl(const GlobalDecl &GD) {
+ llvm::dbgs() << "{Value={" << (isa<NamedDecl>(GD.getDecl()) ? cast<NamedDecl>(GD.getDecl())->getName() : StringRef(""))
+ << " " << GD.getDecl() << "," << GD.getAsOpaquePtr() << "}, MVIndex=" << GD.getMultiVersionIndex() << "}";
+}
+void pmapvector(const llvm::MapVector<GlobalDecl, StringRef> &mv) {
+ for (auto p : mv.getArrayRef()) {
+ const GlobalDecl &GD = p.first;
+ StringRef s = p.second;
+ llvm::dbgs() << " "; pgdecl(GD);
+ llvm::dbgs() << " : " << s << "\n";
+ }
+}
+void pm(const CodeGenModule* CGM) { CGM->getModule().dump(); }
+void pmm(const llvm::Module *M) { M->dump(); }
+
+#define W(X) if (getLangOpts().EncodeExtendedBlockSig) { X; }
+
static llvm::cl::opt<bool> LimitedCoverage(
"limited-coverage-experimental", llvm::cl::Hidden,
llvm::cl::desc("Emit limited coverage mapping information (experimental)"));
@@ -2251,6 +2269,9 @@ void CodeGenModule::UpdateMultiVersionNames(GlobalDecl GD,
StringRef CodeGenModule::getMangledName(GlobalDecl GD) {
GlobalDecl CanonicalGD = GD.getCanonicalDecl();
+ W(pmapvector(MangledDeclNames))
+ W(llvm::dbgs() << "getMangledName("; pgdecl(CanonicalGD); llvm::dbgs() << ")\n")
+
// Some ABIs don't have constructor variants. Make sure that base and
// complete constructors get mangled the same.
@@ -2269,8 +2290,10 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) {
// cached.
if (!LangOpts.CUDAIsDevice || !getContext().mayExternalize(GD.getDecl())) {
auto FoundName = MangledDeclNames.find(CanonicalGD);
- if (FoundName != MangledDeclNames.end())
+ if (FoundName != MangledDeclNames.end()) {
+ W(llvm::dbgs() << " found in MangledDeclNames: " << FoundName->second << "\n")
return FoundName->second;
+ }
}
// Keep the first result in the case of a mangling collision.
@@ -2308,7 +2331,10 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) {
// "LLVM demangler must demangle clang-generated names");
auto Result = Manglings.insert(std::make_pair(MangledName, GD));
- return MangledDeclNames[CanonicalGD] = Result.first->first();
+ W(llvm::dbgs() << "inserting into MangledDeclNames: GD = "; pgdecl(CanonicalGD); llvm::dbgs() << " String = " << Result.first->first() << "\n")
+ StringRef res = (MangledDeclNames[CanonicalGD] = Result.first->first());
+ W(pmapvector(MangledDeclNames))
+ return res;
}
StringRef CodeGenModule::getBlockMangledName(GlobalDecl GD,
@@ -4837,7 +4863,14 @@ void CodeGenModule::emitMultiVersionFunctions() {
const CodeGenFunction::FMVResolverOption &RHS) {
return getFMVPriority(TI, LHS).ugt(getFMVPriority(TI, RHS));
});
-
+ if (getLangOpts().EncodeExtendedBlockSig) {
+ for (const CodeGenFunction::FMVResolverOption &RO : Options) {
+ llvm::dbgs() << "RO = {Architecture = " << (RO.Architecture ? *RO.Architecture : StringRef{"null"}) << ", Features = {";
+ for(StringRef S : RO.Features)
+ llvm::dbgs() << S << ", ";
+ llvm::dbgs() << ", Priority = " << getFMVPriority(TI, RO) << "}\n";
+ }
+ }
// Diagnose unreachable function versions.
if (getTarget().getTriple().isAArch64()) {
for (auto I = Options.begin() + 1, E = Options.end(); I != E; ++I) {
@@ -5378,9 +5411,17 @@ CodeGenModule::GetAddrOfFunction(GlobalDecl GD, llvm::Type *Ty, bool ForVTable,
}
StringRef MangledName = getMangledName(GD);
+ W(llvm::dbgs() << "GetAddrOfFunction(" \
+ << (isa<NamedDecl>(GD.getDecl()) ? cast<NamedDecl>(GD.getDecl())->getName() : StringRef("")) \
+ << ", IsForDefinition = " << (IsForDefinition == ForDefinition) << ")\n"; \
+ llvm::dbgs() << "getMangledName(GD) = " << MangledName << "\n"; )
+
auto *F = GetOrCreateLLVMFunction(MangledName, Ty, GD, ForVTable, DontDefer,
/*IsThunk=*/false, llvm::AttributeList(),
IsForDefinition);
+ W(llvm::dbgs() << "GetOrCreateLLVMFunction(" << MangledName << ", GD = " \
+ << (isa<NamedDecl>(GD.getDecl()) ? cast<NamedDecl>(GD.getDecl())->getName() : StringRef("")) \
+ << ", returns "; F->dump())
// Returns kernel handle for HIP kernel stub function.
if (LangOpts.CUDA && !LangOpts.CUDAIsDevice &&
cast<FunctionDecl>(GD.getDecl())->hasAttr<CUDAGlobalAttr>()) {
More information about the llvm-commits
mailing list