[clang] [llvm] [FMV][AArch64] Allow user to override version priority. (PR #150267)
Alexandros Lamprineas via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 13 05:57:00 PDT 2025
https://github.com/labrinea updated https://github.com/llvm/llvm-project/pull/150267
>From 0575957f6c7f9524778f85220c426be6e56f9714 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Thu, 26 Jun 2025 15:52:11 +0100
Subject: [PATCH 1/3] [FMV][AArch64] Allow user to override version priority.
Implements https://github.com/ARM-software/acle/pull/404
This allows the user to specify "priority=[1-255];featA+featB"
where priority=255 means highest priority. If the explicit
priority string is omitted then the priority of "featA+featB"
is implied, which is lower than priority=1.
Internally this gets expanded using special FMV features P0 ... P7
which can encode up to 256-1 priority levels (excluding all zeros).
Those do not have corresponding detection bit at pos FEAT_#enum
so I made this field optional in FMVInfo. Also they don't affect
the codegen or name mangling of versioned functions.
---
.../clang/Basic/DiagnosticSemaKinds.td | 6 +
clang/include/clang/Sema/SemaARM.h | 3 +-
clang/include/clang/Sema/SemaRISCV.h | 3 +-
clang/lib/CodeGen/Targets/AArch64.cpp | 7 +-
clang/lib/Sema/SemaARM.cpp | 63 +++++-
clang/lib/Sema/SemaDeclAttr.cpp | 7 +-
clang/lib/Sema/SemaRISCV.cpp | 4 +-
clang/test/AST/attr-target-version.c | 30 ++-
.../AArch64/fmv-duplicate-mangled-name.c | 16 ++
.../CodeGen/AArch64/fmv-explicit-priority.c | 193 ++++++++++++++++++
clang/test/Sema/attr-target-clones-aarch64.c | 10 +
clang/test/Sema/attr-target-version.c | 9 +
.../llvm/Analysis/TargetTransformInfo.h | 6 +-
.../llvm/Analysis/TargetTransformInfoImpl.h | 4 +
.../TargetParser/AArch64FeatPriorities.inc | 11 +-
.../llvm/TargetParser/AArch64TargetParser.h | 11 +-
llvm/lib/Analysis/TargetTransformInfo.cpp | 4 +
llvm/lib/Target/AArch64/AArch64FMV.td | 11 +
.../AArch64/AArch64TargetTransformInfo.cpp | 17 +-
.../AArch64/AArch64TargetTransformInfo.h | 1 +
llvm/lib/TargetParser/AArch64TargetParser.cpp | 31 ++-
llvm/lib/Transforms/IPO/GlobalOpt.cpp | 22 +-
.../TableGen/Basic/ARMTargetDefEmitter.cpp | 9 +-
23 files changed, 427 insertions(+), 51 deletions(-)
create mode 100644 clang/test/CodeGen/AArch64/fmv-explicit-priority.c
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 116341f4b66d5..c002608cb6ecd 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12781,6 +12781,12 @@ def warn_target_clone_duplicate_options
def warn_target_clone_no_impact_options
: Warning<"version list contains entries that don't impact code generation">,
InGroup<FunctionMultiVersioning>;
+def warn_version_priority_out_of_range
+ : Warning<"version priority '%0' is outside the allowed range [1-255]; ignoring priority">,
+ InGroup<FunctionMultiVersioning>;
+def warn_invalid_default_version_priority
+ : Warning<"priority of default version cannot be overridden; ignoring priority">,
+ InGroup<FunctionMultiVersioning>;
// three-way comparison operator diagnostics
def err_implied_comparison_category_type_not_found : Error<
diff --git a/clang/include/clang/Sema/SemaARM.h b/clang/include/clang/Sema/SemaARM.h
index 104992e8826c3..66eb87c568c8f 100644
--- a/clang/include/clang/Sema/SemaARM.h
+++ b/clang/include/clang/Sema/SemaARM.h
@@ -92,7 +92,8 @@ class SemaARM : public SemaBase {
/// false otherwise.
bool areLaxCompatibleSveTypes(QualType FirstType, QualType SecondType);
- bool checkTargetVersionAttr(const StringRef Str, const SourceLocation Loc);
+ bool checkTargetVersionAttr(const StringRef Param, const SourceLocation Loc,
+ SmallString<64> &NewParam);
bool checkTargetClonesAttr(SmallVectorImpl<StringRef> &Params,
SmallVectorImpl<SourceLocation> &Locs,
SmallVectorImpl<SmallString<64>> &NewParams);
diff --git a/clang/include/clang/Sema/SemaRISCV.h b/clang/include/clang/Sema/SemaRISCV.h
index 844cc3ce4a440..863b8a143f48a 100644
--- a/clang/include/clang/Sema/SemaRISCV.h
+++ b/clang/include/clang/Sema/SemaRISCV.h
@@ -56,7 +56,8 @@ class SemaRISCV : public SemaBase {
std::unique_ptr<sema::RISCVIntrinsicManager> IntrinsicManager;
- bool checkTargetVersionAttr(const StringRef Param, const SourceLocation Loc);
+ bool checkTargetVersionAttr(const StringRef Param, const SourceLocation Loc,
+ SmallString<64> &NewParam);
bool checkTargetClonesAttr(SmallVectorImpl<StringRef> &Params,
SmallVectorImpl<SourceLocation> &Locs,
SmallVectorImpl<SmallString<64>> &NewParams);
diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index 289f8a9dcf211..89d4c83d727ed 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -1338,9 +1338,10 @@ void AArch64ABIInfo::appendAttributeMangling(StringRef AttrStr,
llvm::SmallDenseSet<StringRef, 8> UniqueFeats;
for (auto &Feat : Features)
- if (auto Ext = llvm::AArch64::parseFMVExtension(Feat))
- if (UniqueFeats.insert(Ext->Name).second)
- Out << 'M' << Ext->Name;
+ if (getTarget().doesFeatureAffectCodeGen(Feat))
+ if (auto Ext = llvm::AArch64::parseFMVExtension(Feat))
+ if (UniqueFeats.insert(Ext->Name).second)
+ Out << 'M' << Ext->Name;
}
std::unique_ptr<TargetCodeGenInfo>
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index e09c35296ef3b..167790b77d791 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -1579,19 +1579,53 @@ bool SemaARM::areLaxCompatibleSveTypes(QualType FirstType,
IsLaxCompatible(SecondType, FirstType);
}
+static void appendFeature(StringRef Feat, SmallString<64> &Buffer) {
+ if (!Buffer.empty())
+ Buffer.append("+");
+ Buffer.append(Feat);
+}
+
+static void convertPriorityString(unsigned Priority,
+ SmallString<64> &NewParam) {
+ StringRef PriorityString[8] = {"P0", "P1", "P2", "P3",
+ "P4", "P5", "P6", "P7"};
+
+ assert(Priority > 0 && Priority < 256 && "priority out of range");
+ // Convert priority=[1-31] -> P0 + ... + P4
+ for (unsigned BitPos = 0; BitPos < 8; ++BitPos)
+ if (Priority & (1U << BitPos))
+ appendFeature(PriorityString[BitPos], NewParam);
+}
+
bool SemaARM::checkTargetVersionAttr(const StringRef Param,
- const SourceLocation Loc) {
+ const SourceLocation Loc,
+ SmallString<64> &NewParam) {
using namespace DiagAttrParams;
+ auto [LHS, RHS] = Param.split(';');
+ bool IsDefault = false;
llvm::SmallVector<StringRef, 8> Features;
- Param.split(Features, '+');
+ LHS.split(Features, '+');
for (StringRef Feat : Features) {
Feat = Feat.trim();
if (Feat == "default")
- continue;
- if (!getASTContext().getTargetInfo().validateCpuSupports(Feat))
+ IsDefault = true;
+ else if (!getASTContext().getTargetInfo().validateCpuSupports(Feat))
return Diag(Loc, diag::warn_unsupported_target_attribute)
<< Unsupported << None << Feat << TargetVersion;
+ appendFeature(Feat, NewParam);
+ }
+
+ if (!RHS.empty() && RHS.consume_front("priority=")) {
+ if (IsDefault)
+ Diag(Loc, diag::warn_invalid_default_version_priority);
+ else {
+ unsigned Digit;
+ if (RHS.getAsInteger(0, Digit) || Digit < 1 || Digit > 255)
+ Diag(Loc, diag::warn_version_priority_out_of_range) << RHS;
+ else
+ convertPriorityString(Digit, NewParam);
+ }
}
return false;
}
@@ -1613,15 +1647,20 @@ bool SemaARM::checkTargetClonesAttr(
const StringRef Param = Params[I].trim();
const SourceLocation &Loc = Locs[I];
- if (Param.empty())
+ auto [LHS, RHS] = Param.split(';');
+ bool HasPriority = !RHS.empty() && RHS.consume_front("priority=");
+
+ if (LHS.empty())
return Diag(Loc, diag::warn_unsupported_target_attribute)
<< Unsupported << None << "" << TargetClones;
- if (Param == "default") {
+ if (LHS == "default") {
if (HasDefault)
Diag(Loc, diag::warn_target_clone_duplicate_options);
else {
- NewParams.push_back(Param);
+ if (HasPriority)
+ Diag(Loc, diag::warn_invalid_default_version_priority);
+ NewParams.push_back(LHS);
HasDefault = true;
}
continue;
@@ -1630,7 +1669,7 @@ bool SemaARM::checkTargetClonesAttr(
bool HasCodeGenImpact = false;
llvm::SmallVector<StringRef, 8> Features;
llvm::SmallVector<StringRef, 8> ValidFeatures;
- Param.split(Features, '+');
+ LHS.split(Features, '+');
for (StringRef Feat : Features) {
Feat = Feat.trim();
if (!getASTContext().getTargetInfo().validateCpuSupports(Feat)) {
@@ -1660,6 +1699,14 @@ bool SemaARM::checkTargetClonesAttr(
continue;
}
+ if (HasPriority) {
+ unsigned Digit;
+ if (RHS.getAsInteger(0, Digit) || Digit < 1 || Digit > 255)
+ Diag(Loc, diag::warn_version_priority_out_of_range) << RHS;
+ else
+ convertPriorityString(Digit, NewParam);
+ }
+
// Valid non-default argument.
NewParams.push_back(NewParam);
HasNonDefault = true;
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 7a185106e4c6e..7726639ed987c 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -3343,19 +3343,20 @@ bool Sema::checkTargetAttr(SourceLocation LiteralLoc, StringRef AttrStr) {
static void handleTargetVersionAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
StringRef Param;
SourceLocation Loc;
+ SmallString<64> NewParam;
if (!S.checkStringLiteralArgumentAttr(AL, 0, Param, &Loc))
return;
if (S.Context.getTargetInfo().getTriple().isAArch64()) {
- if (S.ARM().checkTargetVersionAttr(Param, Loc))
+ if (S.ARM().checkTargetVersionAttr(Param, Loc, NewParam))
return;
} else if (S.Context.getTargetInfo().getTriple().isRISCV()) {
- if (S.RISCV().checkTargetVersionAttr(Param, Loc))
+ if (S.RISCV().checkTargetVersionAttr(Param, Loc, NewParam))
return;
}
TargetVersionAttr *NewAttr =
- ::new (S.Context) TargetVersionAttr(S.Context, AL, Param);
+ ::new (S.Context) TargetVersionAttr(S.Context, AL, NewParam);
D->addAttr(NewAttr);
}
diff --git a/clang/lib/Sema/SemaRISCV.cpp b/clang/lib/Sema/SemaRISCV.cpp
index 7b16d080603bf..9bbcd4680f895 100644
--- a/clang/lib/Sema/SemaRISCV.cpp
+++ b/clang/lib/Sema/SemaRISCV.cpp
@@ -1646,7 +1646,8 @@ bool SemaRISCV::isValidFMVExtension(StringRef Ext) {
}
bool SemaRISCV::checkTargetVersionAttr(const StringRef Param,
- const SourceLocation Loc) {
+ const SourceLocation Loc,
+ SmallString<64> &NewParam) {
using namespace DiagAttrParams;
llvm::SmallVector<StringRef, 8> AttrStrs;
@@ -1692,6 +1693,7 @@ bool SemaRISCV::checkTargetVersionAttr(const StringRef Param,
return Diag(Loc, diag::warn_unsupported_target_attribute)
<< Unsupported << None << Param << TargetVersion;
+ NewParam = Param;
return false;
}
diff --git a/clang/test/AST/attr-target-version.c b/clang/test/AST/attr-target-version.c
index b537f5e685a31..c216cd6d1a28a 100644
--- a/clang/test/AST/attr-target-version.c
+++ b/clang/test/AST/attr-target-version.c
@@ -2,7 +2,29 @@
int __attribute__((target_version("sve2-bitperm + sha2"))) foov(void) { return 1; }
int __attribute__((target_clones(" lse + fp + sha3 ", "default"))) fooc(void) { return 2; }
-// CHECK: TargetVersionAttr
-// CHECK: sve2-bitperm + sha2
-// CHECK: TargetClonesAttr
-// CHECK: fp+lse+sha3 default
+
+int __attribute__((target_version("aes;priority=1"))) explicit_priority(void) { return 1; }
+int __attribute__((target_version("bf16;priority=2"))) explicit_priority(void) { return 2; }
+int __attribute__((target_version("crc;priority=4"))) explicit_priority(void) { return 4; }
+int __attribute__((target_version("dpb2;priority=8"))) explicit_priority(void) { return 8; }
+int __attribute__((target_version("fp16fml;priority=16"))) explicit_priority(void) { return 16; }
+int __attribute__((target_version("dotprod;priority=32"))) explicit_priority(void) { return 32; }
+int __attribute__((target_version("sve;priority=64"))) explicit_priority(void) { return 64; }
+int __attribute__((target_version("mops;priority=128"))) explicit_priority(void) { return 128; }
+
+int __attribute__((target_clones("simd;priority=255", "default"))) explicit_priority(void) {
+ return 0;
+}
+
+// CHECK: TargetVersionAttr {{.*}} "sve2-bitperm+sha2"
+// CHECK: TargetClonesAttr {{.*}} fp+lse+sha3 default
+
+// CHECK: TargetVersionAttr {{.*}} "aes+P0"
+// CHECK: TargetVersionAttr {{.*}} "bf16+P1"
+// CHECK: TargetVersionAttr {{.*}} "crc+P2"
+// CHECK: TargetVersionAttr {{.*}} "dpb2+P3"
+// CHECK: TargetVersionAttr {{.*}} "fp16fml+P4"
+// CHECK: TargetVersionAttr {{.*}} "dotprod+P5"
+// CHECK: TargetVersionAttr {{.*}} "sve+P6"
+// CHECK: TargetVersionAttr {{.*}} "mops+P7"
+// CHECK: TargetClonesAttr {{.*}} simd+P0+P1+P2+P3+P4+P5+P6+P7 default
diff --git a/clang/test/CodeGen/AArch64/fmv-duplicate-mangled-name.c b/clang/test/CodeGen/AArch64/fmv-duplicate-mangled-name.c
index e7e611e09542e..ebe5b75cf7946 100644
--- a/clang/test/CodeGen/AArch64/fmv-duplicate-mangled-name.c
+++ b/clang/test/CodeGen/AArch64/fmv-duplicate-mangled-name.c
@@ -1,5 +1,7 @@
// RUN: %clang_cc1 -triple aarch64-linux-gnu -verify -emit-llvm-only %s -DCHECK_IMPLICIT_DEFAULT
// RUN: %clang_cc1 -triple aarch64-linux-gnu -verify -emit-llvm-only %s -DCHECK_EXPLICIT_DEFAULT
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -verify -emit-llvm-only %s -DCHECK_EXPLICIT_VERSION_PRIORITY
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -verify -emit-llvm-only %s -DCHECK_EXPLICIT_CLONES_PRIORITY
#if defined(CHECK_IMPLICIT_DEFAULT)
@@ -21,4 +23,18 @@ __attribute__((target_version("default"))) int explicit_default_bad(void) { retu
// expected-note at -2 {{previous definition is here}}
__attribute__((target_clones("aes", "lse", "default"))) int explicit_default_bad(void) { return 1; }
+#elif defined(CHECK_EXPLICIT_VERSION_PRIORITY)
+
+__attribute__((target_version("aes"))) int explicit_version_priority(void) { return 0; }
+// expected-error at +2 {{definition with same mangled name 'explicit_version_priority._Maes' as another definition}}
+// expected-note at -2 {{previous definition is here}}
+__attribute__((target_version("aes;priority=10"))) int explicit_version_priority(void) { return 1; }
+
+#elif defined(CHECK_EXPLICIT_CLONES_PRIORITY)
+
+__attribute__((target_version("aes;priority=20"))) int explicit_clones_priority(void) { return 0; }
+// expected-error at +2 {{definition with same mangled name 'explicit_clones_priority._Maes' as another definition}}
+// expected-note at -2 {{previous definition is here}}
+__attribute__((target_clones("aes;priority=5", "lse"))) int explicit_clones_priority(void) { return 1; }
+
#endif
diff --git a/clang/test/CodeGen/AArch64/fmv-explicit-priority.c b/clang/test/CodeGen/AArch64/fmv-explicit-priority.c
new file mode 100644
index 0000000000000..437221c95542b
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/fmv-explicit-priority.c
@@ -0,0 +1,193 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -O3 -fno-inline -emit-llvm -o - %s | FileCheck %s
+
+__attribute__((target_version("lse;priority=30"))) int foo(void) { return 1; }
+__attribute__((target_version("sve2;priority=20"))) int foo(void) { return 2; }
+__attribute__((target_version("sve;priority=10"))) int foo(void) { return 3; }
+__attribute__((target_version( "default"))) int foo(void) { return 0; }
+
+__attribute__((target_clones("lse+sve2;priority=3", "lse;priority=2", "sve;priority=1", "default")))
+int fmv_caller(void) { return foo(); }
+
+
+__attribute__((target_version("aes"))) int bar(void) { return 1; }
+__attribute__((target_version("sm4;priority=5"))) int bar(void) { return 2; }
+__attribute__((target_version("default"))) int bar(void) { return 0; }
+
+__attribute__((target("aes"))) int regular_caller_aes() { return bar(); }
+__attribute__((target("sm4"))) int regular_caller_sm4() { return bar(); }
+//.
+// CHECK: @__aarch64_cpu_features = external dso_local local_unnamed_addr global { i64 }
+// CHECK: @foo = weak_odr ifunc i32 (), ptr @foo.resolver
+// CHECK: @fmv_caller = weak_odr ifunc i32 (), ptr @fmv_caller.resolver
+// CHECK: @bar = weak_odr ifunc i32 (), ptr @bar.resolver
+//.
+// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none)
+// CHECK-LABEL: define {{[^@]+}}@foo._Mlse
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 1
+//
+//
+// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16)
+// CHECK-LABEL: define {{[^@]+}}@foo._Msve2
+// CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 2
+//
+//
+// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16)
+// CHECK-LABEL: define {{[^@]+}}@foo._Msve
+// CHECK-SAME: () #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 3
+//
+//
+// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none)
+// CHECK-LABEL: define {{[^@]+}}@foo.default
+// CHECK-SAME: () #[[ATTR3:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 0
+//
+//
+// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16)
+// CHECK-LABEL: define {{[^@]+}}@fmv_caller._MlseMsve2
+// CHECK-SAME: () #[[ATTR4:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo._Mlse()
+// CHECK-NEXT: ret i32 [[CALL]]
+//
+//
+// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16)
+// CHECK-LABEL: define {{[^@]+}}@fmv_caller._Mlse
+// CHECK-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo._Mlse()
+// CHECK-NEXT: ret i32 [[CALL]]
+//
+//
+// CHECK: Function Attrs: noinline nounwind vscale_range(1,16)
+// CHECK-LABEL: define {{[^@]+}}@fmv_caller._Msve
+// CHECK-SAME: () #[[ATTR6:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo() #[[ATTR12:[0-9]+]]
+// CHECK-NEXT: ret i32 [[CALL]]
+//
+//
+// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16)
+// CHECK-LABEL: define {{[^@]+}}@fmv_caller.default
+// CHECK-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo.default()
+// CHECK-NEXT: ret i32 [[CALL]]
+//
+//
+// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none)
+// CHECK-LABEL: define {{[^@]+}}@bar._Maes
+// CHECK-SAME: () #[[ATTR8:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 1
+//
+//
+// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none)
+// CHECK-LABEL: define {{[^@]+}}@bar._Msm4
+// CHECK-SAME: () #[[ATTR9:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 2
+//
+//
+// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none)
+// CHECK-LABEL: define {{[^@]+}}@bar.default
+// CHECK-SAME: () #[[ATTR3]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 0
+//
+//
+// CHECK: Function Attrs: noinline nounwind
+// CHECK-LABEL: define {{[^@]+}}@regular_caller_aes
+// CHECK-SAME: () local_unnamed_addr #[[ATTR10:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @bar() #[[ATTR12]]
+// CHECK-NEXT: ret i32 [[CALL]]
+//
+//
+// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none)
+// CHECK-LABEL: define {{[^@]+}}@regular_caller_sm4
+// CHECK-SAME: () local_unnamed_addr #[[ATTR11:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @bar._Msm4()
+// CHECK-NEXT: ret i32 [[CALL]]
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@foo.resolver() comdat {
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: tail call void @__init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 128
+// CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[TMP1]], 0
+// CHECK-NEXT: br i1 [[DOTNOT]], label [[RESOLVER_ELSE:%.*]], label [[COMMON_RET:%.*]]
+// CHECK: common.ret:
+// CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi ptr [ @foo._Mlse, [[RESOLVER_ENTRY:%.*]] ], [ @foo._Msve2, [[RESOLVER_ELSE]] ], [ [[FOO__MSVE_FOO_DEFAULT:%.*]], [[RESOLVER_ELSE2:%.*]] ]
+// CHECK-NEXT: ret ptr [[COMMON_RET_OP]]
+// CHECK: resolver_else:
+// CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0]], 69793284352
+// CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 69793284352
+// CHECK-NEXT: br i1 [[TMP3]], label [[COMMON_RET]], label [[RESOLVER_ELSE2]]
+// CHECK: resolver_else2:
+// CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP0]], 1073807616
+// CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 1073807616
+// CHECK-NEXT: [[FOO__MSVE_FOO_DEFAULT]] = select i1 [[TMP5]], ptr @foo._Msve, ptr @foo.default
+// CHECK-NEXT: br label [[COMMON_RET]]
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@fmv_caller.resolver() comdat {
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: tail call void @__init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 69793284480
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 69793284480
+// CHECK-NEXT: br i1 [[TMP2]], label [[COMMON_RET:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: common.ret:
+// CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi ptr [ @fmv_caller._MlseMsve2, [[RESOLVER_ENTRY:%.*]] ], [ @fmv_caller._Mlse, [[RESOLVER_ELSE]] ], [ [[FMV_CALLER__MSVE_FMV_CALLER_DEFAULT:%.*]], [[RESOLVER_ELSE2:%.*]] ]
+// CHECK-NEXT: ret ptr [[COMMON_RET_OP]]
+// CHECK: resolver_else:
+// CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP0]], 128
+// CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[TMP3]], 0
+// CHECK-NEXT: br i1 [[DOTNOT]], label [[RESOLVER_ELSE2]], label [[COMMON_RET]]
+// CHECK: resolver_else2:
+// CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP0]], 1073807616
+// CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 1073807616
+// CHECK-NEXT: [[FMV_CALLER__MSVE_FMV_CALLER_DEFAULT]] = select i1 [[TMP5]], ptr @fmv_caller._Msve, ptr @fmv_caller.default
+// CHECK-NEXT: br label [[COMMON_RET]]
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@bar.resolver() comdat {
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: tail call void @__init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 800
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 800
+// CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP0]], 33536
+// CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[TMP3]], 33536
+// CHECK-NEXT: [[BAR__MAES_BAR_DEFAULT:%.*]] = select i1 [[TMP4]], ptr @bar._Maes, ptr @bar.default
+// CHECK-NEXT: [[COMMON_RET_OP:%.*]] = select i1 [[TMP2]], ptr @bar._Msm4, ptr [[BAR__MAES_BAR_DEFAULT]]
+// CHECK-NEXT: ret ptr [[COMMON_RET_OP]]
+//
+//.
+// CHECK: attributes #[[ATTR0]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) "fmv-features"="P1,P2,P3,P4,lse" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+lse" }
+// CHECK: attributes #[[ATTR1]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) "fmv-features"="P2,P4,sve2" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve,+sve2" }
+// CHECK: attributes #[[ATTR2]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) "fmv-features"="P1,P3,sve" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" }
+// CHECK: attributes #[[ATTR3]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) "fmv-features" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// CHECK: attributes #[[ATTR4]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) "fmv-features"="P0,P1,lse,sve2" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+lse,+sve,+sve2" }
+// CHECK: attributes #[[ATTR5]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) "fmv-features"="P1,lse" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+lse" }
+// CHECK: attributes #[[ATTR6]] = { noinline nounwind vscale_range(1,16) "fmv-features"="P0,sve" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" }
+// CHECK: attributes #[[ATTR7]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) "fmv-features" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// CHECK: attributes #[[ATTR8]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) "fmv-features"="aes" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+aes,+fp-armv8,+neon" }
+// CHECK: attributes #[[ATTR9]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) "fmv-features"="P0,P2,sm4" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+sm4" }
+// CHECK: attributes #[[ATTR10]] = { noinline nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+aes,+fp-armv8,+neon" }
+// CHECK: attributes #[[ATTR11]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+sm4" }
+// CHECK: attributes #[[ATTR12]] = { nounwind }
+//.
+// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
diff --git a/clang/test/Sema/attr-target-clones-aarch64.c b/clang/test/Sema/attr-target-clones-aarch64.c
index 93d87cef54569..d3354915737c2 100644
--- a/clang/test/Sema/attr-target-clones-aarch64.c
+++ b/clang/test/Sema/attr-target-clones-aarch64.c
@@ -80,3 +80,13 @@ int useage(void) {
int __attribute__((target_clones("sve2-sha3+ssbs", "sm4"))) mv_after_use(void) { return 1; }
// expected-error at +1 {{'main' cannot be a multiversioned function}}
int __attribute__((target_clones("i8mm"))) main() { return 1; }
+
+//expected-warning at +2 {{unsupported 'priority=10' in the 'target_clones' attribute string; 'target_clones' attribute ignored}}
+//expected-warning at +1 {{version list contains entries that don't impact code generation}}
+int __attribute__((target_clones("priority=10;aes", "default"))) priority_before_features(void) { return 0; }
+
+//expected-warning at +1 {{version priority '0' is outside the allowed range [1-255]; ignoring priority}}
+int __attribute__((target_clones("aes;priority=0", "default"))) priority_out_of_range(void) { return 0; }
+
+//expected-warning at +1 {{priority of default version cannot be overridden; ignoring priority}}
+int __attribute__((target_clones("aes", "default;priority=10"))) priority_default_version(void) { return 0; }
diff --git a/clang/test/Sema/attr-target-version.c b/clang/test/Sema/attr-target-version.c
index d062212848daf..22e15aae890ba 100644
--- a/clang/test/Sema/attr-target-version.c
+++ b/clang/test/Sema/attr-target-version.c
@@ -117,3 +117,12 @@ int unspec_args_implicit_default_first();
int __attribute__((target_version("aes"))) unspec_args_implicit_default_first() { return -1; }
// expected-note at +1 {{function multiversioning caused by this declaration}}
int __attribute__((target_version("default"))) unspec_args_implicit_default_first() { return 0; }
+
+//expected-warning at +1 {{unsupported 'priority=10' in the 'target_version' attribute string; 'target_version' attribute ignored}}
+int __attribute__((target_version("priority=10;aes"))) priority_before_features(void) { return 0; }
+
+//expected-warning at +1 {{version priority '256' is outside the allowed range [1-255]; ignoring priority}}
+int __attribute__((target_version("aes;priority=256"))) priority_out_of_range(void) { return 0; }
+
+//expected-warning at +1 {{priority of default version cannot be overridden; ignoring priority}}
+int __attribute__((target_version("default;priority=10"))) priority_default_version(void) { return 0; }
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 53c91bfe16804..51602f32d5102 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1930,9 +1930,13 @@ class TargetTransformInfo {
LLVM_ABI bool hasArmWideBranch(bool Thumb) const;
/// Returns a bitmask constructed from the target-features or fmv-features
- /// metadata of a function.
+ /// metadata of a function corresponding to its Arch Extensions.
LLVM_ABI APInt getFeatureMask(const Function &F) const;
+ /// Returns a bitmask constructed from the target-features or fmv-features
+ /// metadata of a function corresponding to its FMV priority.
+ LLVM_ABI APInt getPriorityMask(const Function &F) const;
+
/// Returns true if this is an instance of a function with multiple versions.
LLVM_ABI bool isMultiversionedFunction(const Function &F) const;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index e879712121b59..5a8952117c0d7 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1135,6 +1135,10 @@ class TargetTransformInfoImplBase {
return APInt::getZero(32);
}
+ virtual APInt getPriorityMask(const Function &F) const {
+ return APInt::getZero(32);
+ }
+
virtual bool isMultiversionedFunction(const Function &F) const {
return false;
}
diff --git a/llvm/include/llvm/TargetParser/AArch64FeatPriorities.inc b/llvm/include/llvm/TargetParser/AArch64FeatPriorities.inc
index f2bad28ada93e..f0291926bbe75 100644
--- a/llvm/include/llvm/TargetParser/AArch64FeatPriorities.inc
+++ b/llvm/include/llvm/TargetParser/AArch64FeatPriorities.inc
@@ -59,7 +59,16 @@ enum FeatPriorities {
PRIOR_SME_I64,
PRIOR_SME2,
PRIOR_MOPS,
- PRIOR_CSSC
+ PRIOR_CSSC,
+ PRIOR_MAX,
+ PRIOR_P0 = 120,
+ PRIOR_P1,
+ PRIOR_P2,
+ PRIOR_P3,
+ PRIOR_P4,
+ PRIOR_P5,
+ PRIOR_P6,
+ PRIOR_P7
};
#endif
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index 8e83b04681f58..4c9fb17104c6a 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -42,6 +42,8 @@ struct CpuInfo;
static_assert(FEAT_MAX < 62,
"Number of features in CPUFeatures are limited to 62 entries");
+static_assert(PRIOR_MAX < 120, "FeatPriorities is limited to 120 entries");
+
// Each ArchExtKind correponds directly to a possible -target-feature.
#define EMIT_ARCHEXTKIND_ENUM
#include "llvm/TargetParser/AArch64TargetParserDef.inc"
@@ -72,12 +74,13 @@ struct ExtensionInfo {
struct FMVInfo {
StringRef Name; // The target_version/target_clones spelling.
- CPUFeatures FeatureBit; // Index of the bit in the FMV feature bitset.
+ std::optional<CPUFeatures>
+ FeatureBit; // Index of the bit in the FMV feature bitset.
FeatPriorities PriorityBit; // Index of the bit in the FMV priority bitset.
std::optional<ArchExtKind> ID; // The architecture extension to enable.
- FMVInfo(StringRef Name, CPUFeatures FeatureBit, FeatPriorities PriorityBit,
- std::optional<ArchExtKind> ID)
- : Name(Name), FeatureBit(FeatureBit), PriorityBit(PriorityBit), ID(ID) {};
+ FMVInfo(StringRef Name, std::optional<CPUFeatures> FeatureBit,
+ FeatPriorities PriorityBit, std::optional<ArchExtKind> ID)
+ : Name(Name), FeatureBit(FeatureBit), PriorityBit(PriorityBit), ID(ID){};
};
LLVM_ABI const std::vector<FMVInfo> &getFMVInfo();
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 4f04209cf4cfc..885e8a38e61d6 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1427,6 +1427,10 @@ APInt TargetTransformInfo::getFeatureMask(const Function &F) const {
return TTIImpl->getFeatureMask(F);
}
+APInt TargetTransformInfo::getPriorityMask(const Function &F) const {
+ return TTIImpl->getPriorityMask(F);
+}
+
bool TargetTransformInfo::isMultiversionedFunction(const Function &F) const {
return TTIImpl->isMultiversionedFunction(F);
}
diff --git a/llvm/lib/Target/AArch64/AArch64FMV.td b/llvm/lib/Target/AArch64/AArch64FMV.td
index b0f76ec6a6480..12939997401ac 100644
--- a/llvm/lib/Target/AArch64/AArch64FMV.td
+++ b/llvm/lib/Target/AArch64/AArch64FMV.td
@@ -83,3 +83,14 @@ def : FMVExtension<"sve2-sha3", "SVE_SHA3">;
def : FMVExtension<"sve2-sm4", "SVE_SM4">;
def : FMVExtension<"wfxt", "WFXT">;
def : FMVExtension<"cssc", "CSSC">;
+
+// Extensions which allow the user to override version priority.
+// 8-bits allow 256-1 priority levels (excluding all zeros).
+def : FMVExtension<"P0", "P0">;
+def : FMVExtension<"P1", "P1">;
+def : FMVExtension<"P2", "P2">;
+def : FMVExtension<"P3", "P3">;
+def : FMVExtension<"P4", "P4">;
+def : FMVExtension<"P5", "P5">;
+def : FMVExtension<"P6", "P6">;
+def : FMVExtension<"P7", "P7">;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 24ef92f5835fe..16fcc7891b8b2 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -245,12 +245,23 @@ static bool hasPossibleIncompatibleOps(const Function *F,
return false;
}
-APInt AArch64TTIImpl::getFeatureMask(const Function &F) const {
+static void extractAttrFeatures(const Function &F, const AArch64TTIImpl *TTI,
+ SmallVectorImpl<StringRef> &Features) {
StringRef AttributeStr =
- isMultiversionedFunction(F) ? "fmv-features" : "target-features";
+ TTI->isMultiversionedFunction(F) ? "fmv-features" : "target-features";
StringRef FeatureStr = F.getFnAttribute(AttributeStr).getValueAsString();
- SmallVector<StringRef, 8> Features;
FeatureStr.split(Features, ",");
+}
+
+APInt AArch64TTIImpl::getFeatureMask(const Function &F) const {
+ SmallVector<StringRef, 8> Features;
+ extractAttrFeatures(F, this, Features);
+ return AArch64::getCpuSupportsMask(Features);
+}
+
+APInt AArch64TTIImpl::getPriorityMask(const Function &F) const {
+ SmallVector<StringRef, 8> Features;
+ extractAttrFeatures(F, this, Features);
return AArch64::getFMVPriority(Features);
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 647b242d74fb3..f1e0002f602b2 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -90,6 +90,7 @@ class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
unsigned DefaultCallPenalty) const override;
APInt getFeatureMask(const Function &F) const override;
+ APInt getPriorityMask(const Function &F) const override;
bool isMultiversionedFunction(const Function &F) const override;
diff --git a/llvm/lib/TargetParser/AArch64TargetParser.cpp b/llvm/lib/TargetParser/AArch64TargetParser.cpp
index 7e3583275a734..2c0211b3a2919 100644
--- a/llvm/lib/TargetParser/AArch64TargetParser.cpp
+++ b/llvm/lib/TargetParser/AArch64TargetParser.cpp
@@ -55,21 +55,30 @@ std::optional<AArch64::FMVInfo> lookupFMVByID(AArch64::ArchExtKind ExtID) {
return {};
}
+std::optional<AArch64::FMVInfo> getFMVInfoFrom(StringRef Feature) {
+ std::optional<AArch64::FMVInfo> FMV = AArch64::parseFMVExtension(Feature);
+ if (!FMV && Feature.starts_with('+'))
+ if (std::optional<AArch64::ExtensionInfo> Ext =
+ AArch64::targetFeatureToExtension(Feature))
+ FMV = lookupFMVByID(Ext->ID);
+ return FMV;
+}
+
APInt AArch64::getFMVPriority(ArrayRef<StringRef> Features) {
// Transitively enable the Arch Extensions which correspond to each feature.
ExtensionSet FeatureBits;
+ APInt PriorityMask = APInt::getZero(128);
for (const StringRef Feature : Features) {
- std::optional<FMVInfo> FMV = parseFMVExtension(Feature);
- if (!FMV && Feature.starts_with('+')) {
- if (std::optional<ExtensionInfo> Info = targetFeatureToExtension(Feature))
- FMV = lookupFMVByID(Info->ID);
+ if (std::optional<FMVInfo> FMV = getFMVInfoFrom(Feature)) {
+ // FMV feature without a corresponding Arch Extension may affect priority
+ if (FMV->ID)
+ FeatureBits.enable(*FMV->ID);
+ else
+ PriorityMask.setBit(FMV->PriorityBit);
}
- if (FMV && FMV->ID)
- FeatureBits.enable(*FMV->ID);
}
// Construct a bitmask for all the transitively enabled Arch Extensions.
- APInt PriorityMask = APInt::getZero(128);
for (const FMVInfo &Info : getFMVInfo())
if (Info.ID && FeatureBits.Enabled.test(*Info.ID))
PriorityMask.setBit(Info.PriorityBit);
@@ -81,15 +90,15 @@ APInt AArch64::getCpuSupportsMask(ArrayRef<StringRef> Features) {
// Transitively enable the Arch Extensions which correspond to each feature.
ExtensionSet FeatureBits;
for (const StringRef Feature : Features)
- if (std::optional<FMVInfo> Info = parseFMVExtension(Feature))
- if (Info->ID)
- FeatureBits.enable(*Info->ID);
+ if (std::optional<FMVInfo> FMV = getFMVInfoFrom(Feature))
+ if (FMV->ID)
+ FeatureBits.enable(*FMV->ID);
// Construct a bitmask for all the transitively enabled Arch Extensions.
APInt FeaturesMask = APInt::getZero(128);
for (const FMVInfo &Info : getFMVInfo())
if (Info.ID && FeatureBits.Enabled.test(*Info.ID))
- FeaturesMask.setBit(Info.FeatureBit);
+ FeaturesMask.setBit(*Info.FeatureBit);
return FeaturesMask;
}
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index bdda4980c1005..689ab2cb99db4 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -2528,8 +2528,10 @@ static bool OptimizeNonTrivialIFuncs(
Module &M, function_ref<TargetTransformInfo &(Function &)> GetTTI) {
bool Changed = false;
- // Cache containing the mask constructed from a function's target features.
+ // Cache containing the feature mask constructed from a function's metadata.
DenseMap<Function *, APInt> FeatureMask;
+ // Cache containing the priority mask constructed from a function's metadata.
+ DenseMap<Function *, APInt> PriorityMask;
for (GlobalIFunc &IF : M.ifuncs()) {
if (IF.isInterposable())
@@ -2559,16 +2561,19 @@ static bool OptimizeNonTrivialIFuncs(
LLVM_DEBUG(dbgs() << "Statically resolving calls to function "
<< Resolver->getName() << "\n");
- // Cache the feature mask for each callee.
+ // Cache the masks for each callee.
for (Function *Callee : Callees) {
- auto [It, Inserted] = FeatureMask.try_emplace(Callee);
- if (Inserted)
- It->second = TTI.getFeatureMask(*Callee);
+ auto [FeatIt, FeatInserted] = FeatureMask.try_emplace(Callee);
+ if (FeatInserted)
+ FeatIt->second = TTI.getFeatureMask(*Callee);
+ auto [PriorIt, PriorInserted] = PriorityMask.try_emplace(Callee);
+ if (PriorInserted)
+ PriorIt->second = TTI.getPriorityMask(*Callee);
}
// Sort the callee versions in decreasing priority order.
sort(Callees, [&](auto *LHS, auto *RHS) {
- return FeatureMask[LHS].ugt(FeatureMask[RHS]);
+ return PriorityMask[LHS].ugt(PriorityMask[RHS]);
});
// Find the callsites and cache the feature mask for each caller.
@@ -2581,6 +2586,9 @@ static bool OptimizeNonTrivialIFuncs(
auto [FeatIt, FeatInserted] = FeatureMask.try_emplace(Caller);
if (FeatInserted)
FeatIt->second = TTI.getFeatureMask(*Caller);
+ auto [PriorIt, PriorInserted] = PriorityMask.try_emplace(Caller);
+ if (PriorInserted)
+ PriorIt->second = TTI.getPriorityMask(*Caller);
auto [CallIt, CallInserted] = CallSites.try_emplace(Caller);
if (CallInserted)
Callers.push_back(Caller);
@@ -2591,7 +2599,7 @@ static bool OptimizeNonTrivialIFuncs(
// Sort the caller versions in decreasing priority order.
sort(Callers, [&](auto *LHS, auto *RHS) {
- return FeatureMask[LHS].ugt(FeatureMask[RHS]);
+ return PriorityMask[LHS].ugt(PriorityMask[RHS]);
});
auto implies = [](APInt A, APInt B) { return B.isSubsetOf(A); };
diff --git a/llvm/utils/TableGen/Basic/ARMTargetDefEmitter.cpp b/llvm/utils/TableGen/Basic/ARMTargetDefEmitter.cpp
index 3f284ee1b1032..4368551676939 100644
--- a/llvm/utils/TableGen/Basic/ARMTargetDefEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/ARMTargetDefEmitter.cpp
@@ -159,12 +159,15 @@ static void emitARMTargetDef(const RecordKeeper &RK, raw_ostream &OS) {
<< " if(I.size()) return I;\n"
<< " I.reserve(" << FMVExts.size() << ");\n";
for (const Record *Rec : FMVExts) {
+ auto FeatName = Rec->getValueAsString("BackendFeature");
+ const Record *FeatRec = ExtensionMap[FeatName];
OS << " I.emplace_back(";
OS << "\"" << Rec->getValueAsString("Name") << "\"";
- OS << ", " << Rec->getValueAsString("FeatureBit");
+ if (FeatRec)
+ OS << ", " << Rec->getValueAsString("FeatureBit");
+ else
+ OS << ", std::nullopt";
OS << ", " << Rec->getValueAsString("PriorityBit");
- auto FeatName = Rec->getValueAsString("BackendFeature");
- const Record *FeatRec = ExtensionMap[FeatName];
if (FeatRec)
OS << ", " << FeatRec->getValueAsString("ArchExtKindSpelling").upper();
else
>From b932c05e25522e815eb4e2d30844042543f15286 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Wed, 13 Aug 2025 12:07:15 +0300
Subject: [PATCH 2/3] Update AArch64TargetParser.h
clang format
---
llvm/include/llvm/TargetParser/AArch64TargetParser.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index 4c9fb17104c6a..b4b291b402197 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -80,7 +80,7 @@ struct FMVInfo {
std::optional<ArchExtKind> ID; // The architecture extension to enable.
FMVInfo(StringRef Name, std::optional<CPUFeatures> FeatureBit,
FeatPriorities PriorityBit, std::optional<ArchExtKind> ID)
- : Name(Name), FeatureBit(FeatureBit), PriorityBit(PriorityBit), ID(ID){};
+ : Name(Name), FeatureBit(FeatureBit), PriorityBit(PriorityBit), ID(ID) {};
};
LLVM_ABI const std::vector<FMVInfo> &getFMVInfo();
>From 79243c408b964cd9087927e3cd388b32c1937097 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Wed, 13 Aug 2025 13:39:10 +0300
Subject: [PATCH 3/3] Update SemaARM.cpp
update comment
---
clang/lib/Sema/SemaARM.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 167790b77d791..5742d59f52636 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -1591,7 +1591,7 @@ static void convertPriorityString(unsigned Priority,
"P4", "P5", "P6", "P7"};
assert(Priority > 0 && Priority < 256 && "priority out of range");
- // Convert priority=[1-31] -> P0 + ... + P4
+ // Convert priority=[1-255] -> P0 + ... + P7
for (unsigned BitPos = 0; BitPos < 8; ++BitPos)
if (Priority & (1U << BitPos))
appendFeature(PriorityString[BitPos], NewParam);
More information about the llvm-commits
mailing list