[clang] [clang][FMV] Direct-call FMV callees from FMV callers (PR #80093)
Jon Roelofs via cfe-commits
cfe-commits at lists.llvm.org
Wed Jan 31 09:19:28 PST 2024
https://github.com/jroelofs updated https://github.com/llvm/llvm-project/pull/80093
>From ed52ee4424459ebc046a625341ad8dbbd38bcbe3 Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Tue, 30 Jan 2024 19:13:42 -0800
Subject: [PATCH 1/4] [clang][FMV] Direct-call multi-versioned callees from
multi-versioned callers
... when there is a callee with a matching feature set, and no other higher
priority callee. This optimization helps the inliner see past the
ifunc+resolver to the callee that we know it will always land on.
This is a conservative implementation of: https://github.com/llvm/llvm-project/issues/71714
---
clang/lib/CodeGen/CGCall.cpp | 72 +++++
clang/lib/CodeGen/CodeGenModule.cpp | 2 +-
.../test/CodeGen/attr-target-mv-direct-call.c | 245 ++++++++++++++++++
3 files changed, 318 insertions(+), 1 deletion(-)
create mode 100644 clang/test/CodeGen/attr-target-mv-direct-call.c
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 28c211aa631e4..84a04e3ccddd8 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -4966,6 +4966,11 @@ static unsigned getMaxVectorWidth(const llvm::Type *Ty) {
return MaxVectorWidth;
}
+// FIXME: put this somewhere nicer to share
+unsigned
+TargetMVPriority(const TargetInfo &TI,
+ const CodeGenFunction::MultiVersionResolverOption &RO);
+
RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
const CGCallee &Callee,
ReturnValueSlot ReturnValue,
@@ -5437,6 +5442,73 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
const CGCallee &ConcreteCallee = Callee.prepareConcreteCallee(*this);
llvm::Value *CalleePtr = ConcreteCallee.getFunctionPointer();
+ // If a multi-versioned caller calls a multi-versioned callee, skip the
+ // resolver when there is a precise match on the feature sets, and no
+ // possibility of a better match at runtime.
+ if (const auto *CallerFD = dyn_cast_or_null<FunctionDecl>(CurGD.getDecl()))
+ if (const auto *CallerTVA = CallerFD->getAttr<TargetVersionAttr>())
+ if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl))
+ // FIXME: do the same where either the caller or callee are
+ // target_clones.
+ if (FD->isTargetMultiVersion()) {
+ llvm::SmallVector<StringRef, 8> CallerFeats;
+ CallerTVA->getFeatures(CallerFeats);
+ MultiVersionResolverOption CallerMVRO(nullptr, "", CallerFeats);
+
+ bool HasHigherPriorityCallee = false;
+ llvm::Constant *FoundMatchingCallee = nullptr;
+ getContext().forEachMultiversionedFunctionVersion(
+ FD, [this, FD, &CallerMVRO, &HasHigherPriorityCallee,
+ &FoundMatchingCallee](const FunctionDecl *CurFD) {
+ const auto *CalleeTVA = CurFD->getAttr<TargetVersionAttr>();
+
+ GlobalDecl CurGD{
+ (CurFD->isDefined() ? CurFD->getDefinition() : CurFD)};
+ StringRef MangledName = CGM.getMangledName(CurFD);
+
+ llvm::SmallVector<StringRef, 8> CalleeFeats;
+ CalleeTVA->getFeatures(CalleeFeats);
+ MultiVersionResolverOption CalleeMVRO(nullptr, "", CalleeFeats);
+
+ const TargetInfo &TI = getTarget();
+
+ // If there is a higher priority callee, we can't do the
+ // optimization at all, as it would be a valid choice at
+ // runtime.
+ if (TargetMVPriority(TI, CalleeMVRO) >
+ TargetMVPriority(TI, CallerMVRO)) {
+ HasHigherPriorityCallee = true;
+ return;
+ }
+
+ // FIXME: we could allow a lower-priority match when the
+ // features are a proper subset. But for now, to keep things
+ // simpler, we only care about a precise match.
+ if (TargetMVPriority(TI, CalleeMVRO) <
+ TargetMVPriority(TI, CallerMVRO))
+ return;
+
+ if (llvm::Constant *Func = CGM.GetGlobalValue(MangledName)) {
+ FoundMatchingCallee = Func;
+ return;
+ }
+
+ if (CurFD->isDefined()) {
+ // FIXME: not sure how to get the address
+ } else {
+ const CGFunctionInfo &FI =
+ getTypes().arrangeGlobalDeclaration(FD);
+ llvm::FunctionType *Ty = getTypes().GetFunctionType(FI);
+ FoundMatchingCallee =
+ CGM.GetAddrOfFunction(CurGD, Ty, /*ForVTable=*/false,
+ /*DontDefer=*/false, ForDefinition);
+ }
+ });
+
+ if (FoundMatchingCallee && !HasHigherPriorityCallee)
+ CalleePtr = FoundMatchingCallee;
+ }
+
// If we're using inalloca, set up that argument.
if (ArgMemory.isValid()) {
llvm::Value *Arg = ArgMemory.getPointer();
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 6ec54cc01c923..c334e4a3a40f3 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -4092,7 +4092,7 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old,
llvm::Function *NewFn);
-static unsigned
+unsigned
TargetMVPriority(const TargetInfo &TI,
const CodeGenFunction::MultiVersionResolverOption &RO) {
unsigned Priority = 0;
diff --git a/clang/test/CodeGen/attr-target-mv-direct-call.c b/clang/test/CodeGen/attr-target-mv-direct-call.c
new file mode 100644
index 0000000000000..687fdd1ca3c24
--- /dev/null
+++ b/clang/test/CodeGen/attr-target-mv-direct-call.c
@@ -0,0 +1,245 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -emit-llvm -o - %s | FileCheck %s
+
+// Check that we make a direct call from direct_caller._Msimd to
+// direct_callee._Msimd when there is no better option.
+__attribute__((target_version("simd"))) void direct_callee(void) {}
+__attribute__((target_version("default"))) void direct_callee(void) {}
+__attribute__((target_version("simd"))) void direct_caller(void) { direct_callee(); }
+__attribute__((target_version("default"))) void direct_caller(void) { direct_callee(); }
+
+// ... and that we go through the ifunc+resolver when there is a better option
+// that might be chosen at runtime.
+__attribute__((target_version("simd"))) void resolved_callee1(void) {}
+__attribute__((target_version("fcma"))) void resolved_callee1(void) {}
+__attribute__((target_version("default"))) void resolved_callee1(void) {}
+__attribute__((target_version("simd"))) void resolved_caller1(void) { resolved_callee1(); }
+__attribute__((target_version("default"))) void resolved_caller1(void) { resolved_callee1(); }
+
+// FIXME: we could direct call in cases like this:
+__attribute__((target_version("fp"))) void resolved_callee2(void) {}
+__attribute__((target_version("default"))) void resolved_callee2(void) {}
+__attribute__((target_version("simd+fp"))) void resolved_caller2(void) { resolved_callee2(); }
+__attribute__((target_version("default"))) void resolved_caller2(void) { resolved_callee2(); }
+
+void source() {
+ direct_caller();
+ resolved_caller1();
+ resolved_caller2();
+}
+
+//.
+// CHECK: @__aarch64_cpu_features = external dso_local global { i64 }
+// CHECK: @direct_callee.ifunc = weak_odr ifunc void (), ptr @direct_callee.resolver
+// CHECK: @direct_caller.ifunc = weak_odr ifunc void (), ptr @direct_caller.resolver
+// CHECK: @resolved_callee1.ifunc = weak_odr ifunc void (), ptr @resolved_callee1.resolver
+// CHECK: @resolved_caller1.ifunc = weak_odr ifunc void (), ptr @resolved_caller1.resolver
+// CHECK: @resolved_callee2.ifunc = weak_odr ifunc void (), ptr @resolved_callee2.resolver
+// CHECK: @resolved_caller2.ifunc = weak_odr ifunc void (), ptr @resolved_caller2.resolver
+//.
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@direct_callee._Msimd
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@direct_callee.resolver() comdat {
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @__init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 512
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 512
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @direct_callee._Msimd
+// CHECK: resolver_else:
+// CHECK-NEXT: ret ptr @direct_callee.default
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@direct_caller._Msimd
+// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @direct_callee._Msimd()
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@direct_caller.resolver() comdat {
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @__init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 512
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 512
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @direct_caller._Msimd
+// CHECK: resolver_else:
+// CHECK-NEXT: ret ptr @direct_caller.default
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@resolved_callee1._Msimd
+// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@resolved_callee1.resolver() comdat {
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @__init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 2097152
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 2097152
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @resolved_callee1._Mfcma
+// CHECK: resolver_else:
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 512
+// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 512
+// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
+// CHECK: resolver_return1:
+// CHECK-NEXT: ret ptr @resolved_callee1._Msimd
+// CHECK: resolver_else2:
+// CHECK-NEXT: ret ptr @resolved_callee1.default
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@resolved_caller1._Msimd
+// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @resolved_callee1.ifunc()
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@resolved_caller1.resolver() comdat {
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @__init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 512
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 512
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @resolved_caller1._Msimd
+// CHECK: resolver_else:
+// CHECK-NEXT: ret ptr @resolved_caller1.default
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@resolved_callee2._Mfp
+// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@resolved_callee2.resolver() comdat {
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @__init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 256
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 256
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @resolved_callee2._Mfp
+// CHECK: resolver_else:
+// CHECK-NEXT: ret ptr @resolved_callee2.default
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@resolved_caller2._MfpMsimd
+// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @resolved_callee2.ifunc()
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@resolved_caller2.resolver() comdat {
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @__init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 768
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 768
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @resolved_caller2._MfpMsimd
+// CHECK: resolver_else:
+// CHECK-NEXT: ret ptr @resolved_caller2.default
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@source
+// CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @direct_caller.ifunc()
+// CHECK-NEXT: call void @resolved_caller1.ifunc()
+// CHECK-NEXT: call void @resolved_caller2.ifunc()
+// CHECK-NEXT: ret void
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@direct_callee.default
+// CHECK-SAME: () #[[ATTR1]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@direct_caller.default
+// CHECK-SAME: () #[[ATTR1]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @direct_callee.ifunc()
+// CHECK-NEXT: ret void
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@resolved_callee1._Mfcma
+// CHECK-SAME: () #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@resolved_callee1.default
+// CHECK-SAME: () #[[ATTR1]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@resolved_caller1.default
+// CHECK-SAME: () #[[ATTR1]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @resolved_callee1.ifunc()
+// CHECK-NEXT: ret void
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@resolved_callee2.default
+// CHECK-SAME: () #[[ATTR1]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@resolved_caller2.default
+// CHECK-SAME: () #[[ATTR1]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @resolved_callee2.ifunc()
+// CHECK-NEXT: ret void
+//
+//.
+// CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon" }
+// CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+complxnum,+fp-armv8,+neon" }
+//.
+// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
>From 712643868509f64ee820688579b0278b8783456c Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Wed, 31 Jan 2024 08:53:36 -0800
Subject: [PATCH 2/4] don't do it at -O0
---
clang/lib/CodeGen/CGCall.cpp | 127 ++---
.../test/CodeGen/attr-target-mv-direct-call.c | 453 +++++++++---------
2 files changed, 284 insertions(+), 296 deletions(-)
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 84a04e3ccddd8..fe69b0ab133ac 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5446,68 +5446,71 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// resolver when there is a precise match on the feature sets, and no
// possibility of a better match at runtime.
if (const auto *CallerFD = dyn_cast_or_null<FunctionDecl>(CurGD.getDecl()))
- if (const auto *CallerTVA = CallerFD->getAttr<TargetVersionAttr>())
- if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl))
- // FIXME: do the same where either the caller or callee are
- // target_clones.
- if (FD->isTargetMultiVersion()) {
- llvm::SmallVector<StringRef, 8> CallerFeats;
- CallerTVA->getFeatures(CallerFeats);
- MultiVersionResolverOption CallerMVRO(nullptr, "", CallerFeats);
-
- bool HasHigherPriorityCallee = false;
- llvm::Constant *FoundMatchingCallee = nullptr;
- getContext().forEachMultiversionedFunctionVersion(
- FD, [this, FD, &CallerMVRO, &HasHigherPriorityCallee,
- &FoundMatchingCallee](const FunctionDecl *CurFD) {
- const auto *CalleeTVA = CurFD->getAttr<TargetVersionAttr>();
-
- GlobalDecl CurGD{
- (CurFD->isDefined() ? CurFD->getDefinition() : CurFD)};
- StringRef MangledName = CGM.getMangledName(CurFD);
-
- llvm::SmallVector<StringRef, 8> CalleeFeats;
- CalleeTVA->getFeatures(CalleeFeats);
- MultiVersionResolverOption CalleeMVRO(nullptr, "", CalleeFeats);
-
- const TargetInfo &TI = getTarget();
-
- // If there is a higher priority callee, we can't do the
- // optimization at all, as it would be a valid choice at
- // runtime.
- if (TargetMVPriority(TI, CalleeMVRO) >
- TargetMVPriority(TI, CallerMVRO)) {
- HasHigherPriorityCallee = true;
- return;
- }
-
- // FIXME: we could allow a lower-priority match when the
- // features are a proper subset. But for now, to keep things
- // simpler, we only care about a precise match.
- if (TargetMVPriority(TI, CalleeMVRO) <
- TargetMVPriority(TI, CallerMVRO))
- return;
-
- if (llvm::Constant *Func = CGM.GetGlobalValue(MangledName)) {
- FoundMatchingCallee = Func;
- return;
- }
-
- if (CurFD->isDefined()) {
- // FIXME: not sure how to get the address
- } else {
- const CGFunctionInfo &FI =
- getTypes().arrangeGlobalDeclaration(FD);
- llvm::FunctionType *Ty = getTypes().GetFunctionType(FI);
- FoundMatchingCallee =
- CGM.GetAddrOfFunction(CurGD, Ty, /*ForVTable=*/false,
- /*DontDefer=*/false, ForDefinition);
- }
- });
-
- if (FoundMatchingCallee && !HasHigherPriorityCallee)
- CalleePtr = FoundMatchingCallee;
- }
+ if (CGM.getCodeGenOpts().OptimizationLevel > 0 &&
+ !CallerFD->hasAttr<OptimizeNoneAttr>())
+ if (const auto *CallerTVA = CallerFD->getAttr<TargetVersionAttr>())
+ if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl))
+ // FIXME: do the same where either the caller or callee are
+ // target_clones.
+ if (FD->isTargetMultiVersion()) {
+ llvm::SmallVector<StringRef, 8> CallerFeats;
+ CallerTVA->getFeatures(CallerFeats);
+ MultiVersionResolverOption CallerMVRO(nullptr, "", CallerFeats);
+
+ bool HasHigherPriorityCallee = false;
+ llvm::Constant *FoundMatchingCallee = nullptr;
+ getContext().forEachMultiversionedFunctionVersion(
+ FD, [this, FD, &CallerMVRO, &HasHigherPriorityCallee,
+ &FoundMatchingCallee](const FunctionDecl *CurFD) {
+ const auto *CalleeTVA = CurFD->getAttr<TargetVersionAttr>();
+
+ GlobalDecl CurGD{
+ (CurFD->isDefined() ? CurFD->getDefinition() : CurFD)};
+ StringRef MangledName = CGM.getMangledName(CurFD);
+
+ llvm::SmallVector<StringRef, 8> CalleeFeats;
+ CalleeTVA->getFeatures(CalleeFeats);
+ MultiVersionResolverOption CalleeMVRO(nullptr, "",
+ CalleeFeats);
+
+ const TargetInfo &TI = getTarget();
+
+ // If there is a higher priority callee, we can't do the
+ // optimization at all, as it would be a valid choice at
+ // runtime.
+ if (TargetMVPriority(TI, CalleeMVRO) >
+ TargetMVPriority(TI, CallerMVRO)) {
+ HasHigherPriorityCallee = true;
+ return;
+ }
+
+ // FIXME: we could allow a lower-priority match when the
+ // features are a proper subset. But for now, to keep things
+ // simpler, we only care about a precise match.
+ if (TargetMVPriority(TI, CalleeMVRO) <
+ TargetMVPriority(TI, CallerMVRO))
+ return;
+
+ if (llvm::Constant *Func = CGM.GetGlobalValue(MangledName)) {
+ FoundMatchingCallee = Func;
+ return;
+ }
+
+ if (CurFD->isDefined()) {
+ // FIXME: not sure how to get the address
+ } else {
+ const CGFunctionInfo &FI =
+ getTypes().arrangeGlobalDeclaration(FD);
+ llvm::FunctionType *Ty = getTypes().GetFunctionType(FI);
+ FoundMatchingCallee = CGM.GetAddrOfFunction(
+ CurGD, Ty, /*ForVTable=*/false,
+ /*DontDefer=*/false, ForDefinition);
+ }
+ });
+
+ if (FoundMatchingCallee && !HasHigherPriorityCallee)
+ CalleePtr = FoundMatchingCallee;
+ }
// If we're using inalloca, set up that argument.
if (ArgMemory.isValid()) {
diff --git a/clang/test/CodeGen/attr-target-mv-direct-call.c b/clang/test/CodeGen/attr-target-mv-direct-call.c
index 687fdd1ca3c24..9a465dba52d25 100644
--- a/clang/test/CodeGen/attr-target-mv-direct-call.c
+++ b/clang/test/CodeGen/attr-target-mv-direct-call.c
@@ -1,245 +1,230 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -emit-llvm -o - %s | FileCheck %s
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --filter "call i32" --include-generated-funcs
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -O0 -S -emit-llvm -disable-llvm-optzns -o - %s | FileCheck %s --check-prefixes=CHECK,O0
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -O2 -S -emit-llvm -disable-llvm-optzns -o - %s | FileCheck %s --check-prefixes=CHECK,O2
// Check that we make a direct call from direct_caller._Msimd to
// direct_callee._Msimd when there is no better option.
-__attribute__((target_version("simd"))) void direct_callee(void) {}
-__attribute__((target_version("default"))) void direct_callee(void) {}
-__attribute__((target_version("simd"))) void direct_caller(void) { direct_callee(); }
-__attribute__((target_version("default"))) void direct_caller(void) { direct_callee(); }
+__attribute__((target_version("simd"))) int direct_callee(void) { return 1; }
+__attribute__((target_version("default"))) int direct_callee(void) { return 2; }
+__attribute__((target_version("simd"))) int direct_caller(void) { return direct_callee(); }
+__attribute__((target_version("default"))) int direct_caller(void) { return direct_callee(); }
+
+__attribute__((target_version("simd"), optnone)) int optnone_caller(void) { return direct_callee(); }
+__attribute__((target_version("default"), optnone)) int optnone_caller(void) { return direct_callee(); }
// ... and that we go through the ifunc+resolver when there is a better option
// that might be chosen at runtime.
-__attribute__((target_version("simd"))) void resolved_callee1(void) {}
-__attribute__((target_version("fcma"))) void resolved_callee1(void) {}
-__attribute__((target_version("default"))) void resolved_callee1(void) {}
-__attribute__((target_version("simd"))) void resolved_caller1(void) { resolved_callee1(); }
-__attribute__((target_version("default"))) void resolved_caller1(void) { resolved_callee1(); }
+__attribute__((target_version("simd"))) int resolved_callee1(void) { return 3; }
+__attribute__((target_version("fcma"))) int resolved_callee1(void) { return 4; }
+__attribute__((target_version("default"))) int resolved_callee1(void) { return 5; }
+__attribute__((target_version("simd"))) int resolved_caller1(void) { return resolved_callee1(); }
+__attribute__((target_version("default"))) int resolved_caller1(void) { return resolved_callee1(); }
// FIXME: we could direct call in cases like this:
-__attribute__((target_version("fp"))) void resolved_callee2(void) {}
-__attribute__((target_version("default"))) void resolved_callee2(void) {}
-__attribute__((target_version("simd+fp"))) void resolved_caller2(void) { resolved_callee2(); }
-__attribute__((target_version("default"))) void resolved_caller2(void) { resolved_callee2(); }
+__attribute__((target_version("fp"))) int resolved_callee2(void) { return 6; }
+__attribute__((target_version("default"))) int resolved_callee2(void) { return 7; }
+__attribute__((target_version("simd+fp"))) int resolved_caller2(void) { return resolved_callee2(); }
+__attribute__((target_version("default"))) int resolved_caller2(void) { return resolved_callee2(); }
-void source() {
- direct_caller();
- resolved_caller1();
- resolved_caller2();
+int source() {
+ return direct_caller() + optnone_caller() + resolved_caller1() + resolved_caller2();
}
-//.
-// CHECK: @__aarch64_cpu_features = external dso_local global { i64 }
-// CHECK: @direct_callee.ifunc = weak_odr ifunc void (), ptr @direct_callee.resolver
-// CHECK: @direct_caller.ifunc = weak_odr ifunc void (), ptr @direct_caller.resolver
-// CHECK: @resolved_callee1.ifunc = weak_odr ifunc void (), ptr @resolved_callee1.resolver
-// CHECK: @resolved_caller1.ifunc = weak_odr ifunc void (), ptr @resolved_caller1.resolver
-// CHECK: @resolved_callee2.ifunc = weak_odr ifunc void (), ptr @resolved_callee2.resolver
-// CHECK: @resolved_caller2.ifunc = weak_odr ifunc void (), ptr @resolved_caller2.resolver
-//.
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@direct_callee._Msimd
-// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: ret void
-//
-//
-// CHECK-LABEL: define {{[^@]+}}@direct_callee.resolver() comdat {
-// CHECK-NEXT: resolver_entry:
-// CHECK-NEXT: call void @__init_cpu_features_resolver()
-// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 512
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 512
-// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
-// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
-// CHECK: resolver_return:
-// CHECK-NEXT: ret ptr @direct_callee._Msimd
-// CHECK: resolver_else:
-// CHECK-NEXT: ret ptr @direct_callee.default
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@direct_caller._Msimd
-// CHECK-SAME: () #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: call void @direct_callee._Msimd()
-// CHECK-NEXT: ret void
-//
-//
-// CHECK-LABEL: define {{[^@]+}}@direct_caller.resolver() comdat {
-// CHECK-NEXT: resolver_entry:
-// CHECK-NEXT: call void @__init_cpu_features_resolver()
-// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 512
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 512
-// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
-// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
-// CHECK: resolver_return:
-// CHECK-NEXT: ret ptr @direct_caller._Msimd
-// CHECK: resolver_else:
-// CHECK-NEXT: ret ptr @direct_caller.default
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@resolved_callee1._Msimd
-// CHECK-SAME: () #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: ret void
-//
-//
-// CHECK-LABEL: define {{[^@]+}}@resolved_callee1.resolver() comdat {
-// CHECK-NEXT: resolver_entry:
-// CHECK-NEXT: call void @__init_cpu_features_resolver()
-// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 2097152
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 2097152
-// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
-// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
-// CHECK: resolver_return:
-// CHECK-NEXT: ret ptr @resolved_callee1._Mfcma
-// CHECK: resolver_else:
-// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 512
-// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 512
-// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
-// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
-// CHECK: resolver_return1:
-// CHECK-NEXT: ret ptr @resolved_callee1._Msimd
-// CHECK: resolver_else2:
-// CHECK-NEXT: ret ptr @resolved_callee1.default
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@resolved_caller1._Msimd
-// CHECK-SAME: () #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: call void @resolved_callee1.ifunc()
-// CHECK-NEXT: ret void
-//
-//
-// CHECK-LABEL: define {{[^@]+}}@resolved_caller1.resolver() comdat {
-// CHECK-NEXT: resolver_entry:
-// CHECK-NEXT: call void @__init_cpu_features_resolver()
-// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 512
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 512
-// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
-// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
-// CHECK: resolver_return:
-// CHECK-NEXT: ret ptr @resolved_caller1._Msimd
-// CHECK: resolver_else:
-// CHECK-NEXT: ret ptr @resolved_caller1.default
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@resolved_callee2._Mfp
-// CHECK-SAME: () #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: ret void
-//
-//
-// CHECK-LABEL: define {{[^@]+}}@resolved_callee2.resolver() comdat {
-// CHECK-NEXT: resolver_entry:
-// CHECK-NEXT: call void @__init_cpu_features_resolver()
-// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 256
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 256
-// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
-// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
-// CHECK: resolver_return:
-// CHECK-NEXT: ret ptr @resolved_callee2._Mfp
-// CHECK: resolver_else:
-// CHECK-NEXT: ret ptr @resolved_callee2.default
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@resolved_caller2._MfpMsimd
-// CHECK-SAME: () #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: call void @resolved_callee2.ifunc()
-// CHECK-NEXT: ret void
-//
-//
-// CHECK-LABEL: define {{[^@]+}}@resolved_caller2.resolver() comdat {
-// CHECK-NEXT: resolver_entry:
-// CHECK-NEXT: call void @__init_cpu_features_resolver()
-// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 768
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 768
-// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
-// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
-// CHECK: resolver_return:
-// CHECK-NEXT: ret ptr @resolved_caller2._MfpMsimd
-// CHECK: resolver_else:
-// CHECK-NEXT: ret ptr @resolved_caller2.default
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@source
-// CHECK-SAME: () #[[ATTR1:[0-9]+]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: call void @direct_caller.ifunc()
-// CHECK-NEXT: call void @resolved_caller1.ifunc()
-// CHECK-NEXT: call void @resolved_caller2.ifunc()
-// CHECK-NEXT: ret void
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@direct_callee.default
-// CHECK-SAME: () #[[ATTR1]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: ret void
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@direct_caller.default
-// CHECK-SAME: () #[[ATTR1]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: call void @direct_callee.ifunc()
-// CHECK-NEXT: ret void
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@resolved_callee1._Mfcma
-// CHECK-SAME: () #[[ATTR2:[0-9]+]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: ret void
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@resolved_callee1.default
-// CHECK-SAME: () #[[ATTR1]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: ret void
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@resolved_caller1.default
-// CHECK-SAME: () #[[ATTR1]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: call void @resolved_callee1.ifunc()
-// CHECK-NEXT: ret void
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@resolved_callee2.default
-// CHECK-SAME: () #[[ATTR1]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: ret void
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@resolved_caller2.default
-// CHECK-SAME: () #[[ATTR1]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: call void @resolved_callee2.ifunc()
-// CHECK-NEXT: ret void
-//
-//.
-// CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon" }
-// CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+complxnum,+fp-armv8,+neon" }
-//.
-// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-//.
+// CHECK-LABEL: @direct_callee._Msimd(
+//
+// CHECK-LABEL: @direct_callee.resolver(
+//
+//
+// CHECK-LABEL: @direct_caller.resolver(
+//
+// CHECK-LABEL: @optnone_caller._Msimd(
+// CHECK: [[CALL:%.*]] = call i32 @direct_callee.ifunc()
+//
+//
+// CHECK-LABEL: @optnone_caller.resolver(
+//
+// CHECK-LABEL: @resolved_callee1._Msimd(
+//
+// CHECK-LABEL: @resolved_callee1.resolver(
+//
+// CHECK-LABEL: @resolved_caller1._Msimd(
+// CHECK: [[CALL:%.*]] = call i32 @resolved_callee1.ifunc()
+//
+//
+// CHECK-LABEL: @resolved_caller1.resolver(
+//
+// CHECK-LABEL: @resolved_callee2._Mfp(
+//
+// CHECK-LABEL: @resolved_callee2.resolver(
+//
+// CHECK-LABEL: @resolved_caller2._MfpMsimd(
+// CHECK: [[CALL:%.*]] = call i32 @resolved_callee2.ifunc()
+//
+//
+// CHECK-LABEL: @resolved_caller2.resolver(
+//
+// CHECK-LABEL: @source(
+// CHECK: [[CALL:%.*]] = call i32 @direct_caller.ifunc()
+// CHECK: [[CALL1:%.*]] = call i32 @optnone_caller.ifunc()
+// CHECK: [[CALL2:%.*]] = call i32 @resolved_caller1.ifunc()
+// CHECK: [[CALL4:%.*]] = call i32 @resolved_caller2.ifunc()
+//
+//
+// CHECK-LABEL: @direct_callee.default(
+//
+// CHECK-LABEL: @direct_caller.default(
+// CHECK: [[CALL:%.*]] = call i32 @direct_callee.ifunc()
+//
+//
+// CHECK-LABEL: @optnone_caller.default(
+// CHECK: [[CALL:%.*]] = call i32 @direct_callee.ifunc()
+//
+//
+// CHECK-LABEL: @resolved_callee1._Mfcma(
+//
+// CHECK-LABEL: @resolved_callee1.default(
+//
+// CHECK-LABEL: @resolved_caller1.default(
+// CHECK: [[CALL:%.*]] = call i32 @resolved_callee1.ifunc()
+//
+//
+// CHECK-LABEL: @resolved_callee2.default(
+//
+// CHECK-LABEL: @resolved_caller2.default(
+// CHECK: [[CALL:%.*]] = call i32 @resolved_callee2.ifunc()
+//
+//
+// O0-LABEL: @direct_callee._Msimd(
+//
+// O0-LABEL: @direct_callee.resolver(
+//
+// O0-LABEL: @direct_caller._Msimd(
+// O0: [[CALL:%.*]] = call i32 @direct_callee.ifunc()
+//
+//
+// O0-LABEL: @direct_caller.resolver(
+//
+// O0-LABEL: @optnone_caller._Msimd(
+// O0: [[CALL:%.*]] = call i32 @direct_callee.ifunc()
+//
+//
+// O0-LABEL: @optnone_caller.resolver(
+//
+// O0-LABEL: @resolved_callee1._Msimd(
+//
+// O0-LABEL: @resolved_callee1.resolver(
+//
+// O0-LABEL: @resolved_caller1._Msimd(
+// O0: [[CALL:%.*]] = call i32 @resolved_callee1.ifunc()
+//
+//
+// O0-LABEL: @resolved_caller1.resolver(
+//
+// O0-LABEL: @resolved_callee2._Mfp(
+//
+// O0-LABEL: @resolved_callee2.resolver(
+//
+// O0-LABEL: @resolved_caller2._MfpMsimd(
+// O0: [[CALL:%.*]] = call i32 @resolved_callee2.ifunc()
+//
+//
+// O0-LABEL: @resolved_caller2.resolver(
+//
+// O0-LABEL: @source(
+// O0: [[CALL:%.*]] = call i32 @direct_caller.ifunc()
+// O0: [[CALL1:%.*]] = call i32 @optnone_caller.ifunc()
+// O0: [[CALL2:%.*]] = call i32 @resolved_caller1.ifunc()
+// O0: [[CALL4:%.*]] = call i32 @resolved_caller2.ifunc()
+//
+//
+// O0-LABEL: @direct_callee.default(
+//
+// O0-LABEL: @direct_caller.default(
+// O0: [[CALL:%.*]] = call i32 @direct_callee.ifunc()
+//
+//
+// O0-LABEL: @optnone_caller.default(
+// O0: [[CALL:%.*]] = call i32 @direct_callee.ifunc()
+//
+//
+// O0-LABEL: @resolved_callee1._Mfcma(
+//
+// O0-LABEL: @resolved_callee1.default(
+//
+// O0-LABEL: @resolved_caller1.default(
+// O0: [[CALL:%.*]] = call i32 @resolved_callee1.ifunc()
+//
+//
+// O0-LABEL: @resolved_callee2.default(
+//
+// O0-LABEL: @resolved_caller2.default(
+// O0: [[CALL:%.*]] = call i32 @resolved_callee2.ifunc()
+//
+//
+//
+// O2-LABEL: @direct_callee._Msimd(
+//
+// O2-LABEL: @direct_callee.resolver(
+//
+// O2-LABEL: @direct_caller._Msimd(
+// O2: [[CALL:%.*]] = call i32 @direct_callee._Msimd()
+//
+//
+// O2-LABEL: @direct_caller.resolver(
+//
+// O2-LABEL: @optnone_caller._Msimd(
+// O2: [[CALL:%.*]] = call i32 @direct_callee.ifunc()
+//
+//
+// O2-LABEL: @optnone_caller.resolver(
+//
+// O2-LABEL: @resolved_callee1._Msimd(
+//
+// O2-LABEL: @resolved_callee1.resolver(
+//
+// O2-LABEL: @resolved_caller1._Msimd(
+// O2: [[CALL:%.*]] = call i32 @resolved_callee1.ifunc()
+//
+//
+// O2-LABEL: @resolved_caller1.resolver(
+//
+// O2-LABEL: @resolved_callee2._Mfp(
+//
+// O2-LABEL: @resolved_callee2.resolver(
+//
+// O2-LABEL: @resolved_caller2._MfpMsimd(
+// O2: [[CALL:%.*]] = call i32 @resolved_callee2.ifunc()
+//
+//
+// O2-LABEL: @resolved_caller2.resolver(
+//
+// O2-LABEL: @source(
+// O2: [[CALL:%.*]] = call i32 @direct_caller.ifunc()
+// O2: [[CALL1:%.*]] = call i32 @optnone_caller.ifunc()
+// O2: [[CALL2:%.*]] = call i32 @resolved_caller1.ifunc()
+// O2: [[CALL4:%.*]] = call i32 @resolved_caller2.ifunc()
+//
+//
+// O2-LABEL: @direct_callee.default(
+//
+// O2-LABEL: @direct_caller.default(
+// O2: [[CALL:%.*]] = call i32 @direct_callee.ifunc()
+//
+//
+// O2-LABEL: @optnone_caller.default(
+// O2: [[CALL:%.*]] = call i32 @direct_callee.ifunc()
+//
+//
+// O2-LABEL: @resolved_callee1._Mfcma(
+//
+// O2-LABEL: @resolved_callee1.default(
+//
+// O2-LABEL: @resolved_caller1.default(
+// O2: [[CALL:%.*]] = call i32 @resolved_callee1.ifunc()
+//
+//
+// O2-LABEL: @resolved_callee2.default(
+//
+// O2-LABEL: @resolved_caller2.default(
+// O2: [[CALL:%.*]] = call i32 @resolved_callee2.ifunc()
+//
>From cadeb37acd2786f29984645420e8a715e8ccfe8b Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Wed, 31 Jan 2024 08:55:43 -0800
Subject: [PATCH 3/4] only care about caller => callee calls in the test
---
clang/test/CodeGen/attr-target-mv-direct-call.c | 17 +----------------
1 file changed, 1 insertion(+), 16 deletions(-)
diff --git a/clang/test/CodeGen/attr-target-mv-direct-call.c b/clang/test/CodeGen/attr-target-mv-direct-call.c
index 9a465dba52d25..88b4568d0c084 100644
--- a/clang/test/CodeGen/attr-target-mv-direct-call.c
+++ b/clang/test/CodeGen/attr-target-mv-direct-call.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --filter "call i32" --include-generated-funcs
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --filter "call i32.*callee" --include-generated-funcs
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -O0 -S -emit-llvm -disable-llvm-optzns -o - %s | FileCheck %s --check-prefixes=CHECK,O0
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -O2 -S -emit-llvm -disable-llvm-optzns -o - %s | FileCheck %s --check-prefixes=CHECK,O2
@@ -64,11 +64,6 @@ int source() {
// CHECK-LABEL: @resolved_caller2.resolver(
//
// CHECK-LABEL: @source(
-// CHECK: [[CALL:%.*]] = call i32 @direct_caller.ifunc()
-// CHECK: [[CALL1:%.*]] = call i32 @optnone_caller.ifunc()
-// CHECK: [[CALL2:%.*]] = call i32 @resolved_caller1.ifunc()
-// CHECK: [[CALL4:%.*]] = call i32 @resolved_caller2.ifunc()
-//
//
// CHECK-LABEL: @direct_callee.default(
//
@@ -131,11 +126,6 @@ int source() {
// O0-LABEL: @resolved_caller2.resolver(
//
// O0-LABEL: @source(
-// O0: [[CALL:%.*]] = call i32 @direct_caller.ifunc()
-// O0: [[CALL1:%.*]] = call i32 @optnone_caller.ifunc()
-// O0: [[CALL2:%.*]] = call i32 @resolved_caller1.ifunc()
-// O0: [[CALL4:%.*]] = call i32 @resolved_caller2.ifunc()
-//
//
// O0-LABEL: @direct_callee.default(
//
@@ -199,11 +189,6 @@ int source() {
// O2-LABEL: @resolved_caller2.resolver(
//
// O2-LABEL: @source(
-// O2: [[CALL:%.*]] = call i32 @direct_caller.ifunc()
-// O2: [[CALL1:%.*]] = call i32 @optnone_caller.ifunc()
-// O2: [[CALL2:%.*]] = call i32 @resolved_caller1.ifunc()
-// O2: [[CALL4:%.*]] = call i32 @resolved_caller2.ifunc()
-//
//
// O2-LABEL: @direct_callee.default(
//
>From 4129daf8de38949970bb00b226a2d8df1662a0db Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Wed, 31 Jan 2024 09:18:50 -0800
Subject: [PATCH 4/4] address a fixme: move priority calculation to a member
function
---
clang/lib/CodeGen/CGCall.cpp | 11 ++---------
clang/lib/CodeGen/CodeGenFunction.cpp | 18 ++++++++++++++++++
clang/lib/CodeGen/CodeGenFunction.h | 2 ++
clang/lib/CodeGen/CodeGenModule.cpp | 21 +--------------------
4 files changed, 23 insertions(+), 29 deletions(-)
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index fe69b0ab133ac..3f6e171c67345 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -4966,11 +4966,6 @@ static unsigned getMaxVectorWidth(const llvm::Type *Ty) {
return MaxVectorWidth;
}
-// FIXME: put this somewhere nicer to share
-unsigned
-TargetMVPriority(const TargetInfo &TI,
- const CodeGenFunction::MultiVersionResolverOption &RO);
-
RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
const CGCallee &Callee,
ReturnValueSlot ReturnValue,
@@ -5478,8 +5473,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// If there is a higher priority callee, we can't do the
// optimization at all, as it would be a valid choice at
// runtime.
- if (TargetMVPriority(TI, CalleeMVRO) >
- TargetMVPriority(TI, CallerMVRO)) {
+ if (CalleeMVRO.priority(TI) > CallerMVRO.priority(TI)) {
HasHigherPriorityCallee = true;
return;
}
@@ -5487,8 +5481,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// FIXME: we could allow a lower-priority match when the
// features are a proper subset. But for now, to keep things
// simpler, we only care about a precise match.
- if (TargetMVPriority(TI, CalleeMVRO) <
- TargetMVPriority(TI, CallerMVRO))
+ if (CalleeMVRO.priority(TI) < CallerMVRO.priority(TI))
return;
if (llvm::Constant *Func = CGM.GetGlobalValue(MangledName)) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 1ad905078d349..f2c93b5e5398b 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -2697,6 +2697,24 @@ void CodeGenFunction::EmitSanitizerStatReport(llvm::SanitizerStatKind SSK) {
CGM.getSanStats().create(IRB, SSK);
}
+unsigned CodeGenFunction::MultiVersionResolverOption::priority(
+ const TargetInfo &TI) const {
+ unsigned Priority = 0;
+ unsigned NumFeatures = 0;
+ for (StringRef Feat : Conditions.Features) {
+ Priority = std::max(Priority, TI.multiVersionSortPriority(Feat));
+ NumFeatures++;
+ }
+
+ if (!Conditions.Architecture.empty())
+ Priority = std::max(Priority,
+ TI.multiVersionSortPriority(Conditions.Architecture));
+
+ Priority += TI.multiVersionFeatureCost() * NumFeatures;
+
+ return Priority;
+}
+
void CodeGenFunction::EmitKCFIOperandBundle(
const CGCallee &Callee, SmallVectorImpl<llvm::OperandBundleDef> &Bundles) {
const FunctionProtoType *FP =
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 143ad64e8816b..525852437dbb8 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4965,6 +4965,8 @@ class CodeGenFunction : public CodeGenTypeCache {
MultiVersionResolverOption(llvm::Function *F, StringRef Arch,
ArrayRef<StringRef> Feats)
: Function(F), Conditions(Arch, Feats) {}
+
+ unsigned priority(const TargetInfo &TI) const;
};
// Emits the body of a multiversion function's resolver. Assumes that the
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index c334e4a3a40f3..d6abd4cc9454d 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -4092,25 +4092,6 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old,
llvm::Function *NewFn);
-unsigned
-TargetMVPriority(const TargetInfo &TI,
- const CodeGenFunction::MultiVersionResolverOption &RO) {
- unsigned Priority = 0;
- unsigned NumFeatures = 0;
- for (StringRef Feat : RO.Conditions.Features) {
- Priority = std::max(Priority, TI.multiVersionSortPriority(Feat));
- NumFeatures++;
- }
-
- if (!RO.Conditions.Architecture.empty())
- Priority = std::max(
- Priority, TI.multiVersionSortPriority(RO.Conditions.Architecture));
-
- Priority += TI.multiVersionFeatureCost() * NumFeatures;
-
- return Priority;
-}
-
// Multiversion functions should be at most 'WeakODRLinkage' so that a different
// TU can forward declare the function without causing problems. Particularly
// in the cases of CPUDispatch, this causes issues. This also makes sure we
@@ -4244,7 +4225,7 @@ void CodeGenModule::emitMultiVersionFunctions() {
llvm::stable_sort(
Options, [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS,
const CodeGenFunction::MultiVersionResolverOption &RHS) {
- return TargetMVPriority(TI, LHS) > TargetMVPriority(TI, RHS);
+ return LHS.priority(TI) > RHS.priority(TI);
});
CodeGenFunction CGF(*this);
CGF.EmitMultiVersionResolver(ResolverFunc, Options);
More information about the cfe-commits
mailing list