[clang] [FMV][Clang][CodeGen] Resolves corresponding callee for multi-versioning callers (PR #107822)

Yingwei Zheng via cfe-commits cfe-commits at lists.llvm.org
Mon Sep 9 01:28:17 PDT 2024


https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/107822

Closes #94949.


>From 110eea45aaaca6508f41032641a083df1c43092f Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 9 Sep 2024 15:53:05 +0800
Subject: [PATCH 1/2] [FMV][Clang][CodeGen] Add pre-commit tests. NFC.

---
 .../test/CodeGen/attr-target-clones-inline.c  | 295 ++++++++++++++++++
 1 file changed, 295 insertions(+)
 create mode 100644 clang/test/CodeGen/attr-target-clones-inline.c

diff --git a/clang/test/CodeGen/attr-target-clones-inline.c b/clang/test/CodeGen/attr-target-clones-inline.c
new file mode 100644
index 00000000000000..cc9e8dc762e483
--- /dev/null
+++ b/clang/test/CodeGen/attr-target-clones-inline.c
@@ -0,0 +1,295 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals all --include-generated-funcs --version 5
+// RUN: %clang_cc1 -O1 -disable-llvm-passes -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+
+__attribute__((target_clones("default,sse4.2,avx2")))
+int callee(void) { return 1; }
+
+__attribute__((target_clones("default,avx2,sse4.2")))
+int caller(void) { return callee(); }
+
+__attribute__((target_clones("default,sse4.2,avx2")))
+int callee_decl(void);
+
+__attribute__((target_clones("default,avx2,sse4.2")))
+int caller_decl(void) { return callee_decl(); }
+
+__attribute__((target_clones("default,sse4.2,avx2")))
+int callee_deferred_def(void);
+
+__attribute__((target_clones("default,avx2,sse4.2")))
+int caller_deferred_def(void) { return callee_deferred_def(); }
+
+__attribute__((target_clones("default,sse4.2,avx2")))
+int callee_deferred_def(void) { return 1; }
+//.
+// CHECK: @__cpu_model = external dso_local global { i32, i32, i32, [1 x i32] }
+// CHECK: @__cpu_features2 = external dso_local global [3 x i32]
+// CHECK: @callee.ifunc = weak_odr alias i32 (), ptr @callee
+// CHECK: @caller.ifunc = weak_odr alias i32 (), ptr @caller
+// CHECK: @callee_decl.ifunc = weak_odr alias i32 (), ptr @callee_decl
+// CHECK: @caller_decl.ifunc = weak_odr alias i32 (), ptr @caller_decl
+// CHECK: @callee_deferred_def.ifunc = weak_odr alias i32 (), ptr @callee_deferred_def
+// CHECK: @caller_deferred_def.ifunc = weak_odr alias i32 (), ptr @caller_deferred_def
+// CHECK: @callee = weak_odr ifunc i32 (), ptr @callee.resolver
+// CHECK: @caller = weak_odr ifunc i32 (), ptr @caller.resolver
+// CHECK: @callee_decl = weak_odr ifunc i32 (), ptr @callee_decl.resolver
+// CHECK: @caller_decl = weak_odr ifunc i32 (), ptr @caller_decl.resolver
+// CHECK: @callee_deferred_def = weak_odr ifunc i32 (), ptr @callee_deferred_def.resolver
+// CHECK: @caller_deferred_def = weak_odr ifunc i32 (), ptr @caller_deferred_def.resolver
+//.
+// CHECK: Function Attrs: nounwind
+// CHECK-LABEL: define dso_local i32 @callee.default.2(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i32 1
+//
+//
+// CHECK: Function Attrs: nounwind
+// CHECK-LABEL: define dso_local i32 @callee.sse4.2.0(
+// CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i32 1
+//
+//
+// CHECK: Function Attrs: nounwind
+// CHECK-LABEL: define dso_local i32 @callee.avx2.1(
+// CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i32 1
+//
+//
+// CHECK-LABEL: define weak_odr ptr @callee.resolver() comdat {
+// CHECK-NEXT:  [[RESOLVER_ENTRY:.*:]]
+// CHECK-NEXT:    call void @__cpu_indicator_init()
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 1024
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1024
+// CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT:    br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]]
+// CHECK:       [[RESOLVER_RETURN]]:
+// CHECK-NEXT:    ret ptr @callee.avx2.1
+// CHECK:       [[RESOLVER_ELSE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 256
+// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 256
+// CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-NEXT:    br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]]
+// CHECK:       [[RESOLVER_RETURN1]]:
+// CHECK-NEXT:    ret ptr @callee.sse4.2.0
+// CHECK:       [[RESOLVER_ELSE2]]:
+// CHECK-NEXT:    ret ptr @callee.default.2
+//
+//
+// CHECK: Function Attrs: nounwind
+// CHECK-LABEL: define dso_local i32 @caller.default.2(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee()
+// CHECK-NEXT:    ret i32 [[CALL]]
+//
+//
+// CHECK: Function Attrs: nounwind
+// CHECK-LABEL: define dso_local i32 @caller.avx2.0(
+// CHECK-SAME: ) #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee()
+// CHECK-NEXT:    ret i32 [[CALL]]
+//
+//
+// CHECK: Function Attrs: nounwind
+// CHECK-LABEL: define dso_local i32 @caller.sse4.2.1(
+// CHECK-SAME: ) #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee()
+// CHECK-NEXT:    ret i32 [[CALL]]
+//
+//
+// CHECK-LABEL: define weak_odr ptr @caller.resolver() comdat {
+// CHECK-NEXT:  [[RESOLVER_ENTRY:.*:]]
+// CHECK-NEXT:    call void @__cpu_indicator_init()
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 1024
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1024
+// CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT:    br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]]
+// CHECK:       [[RESOLVER_RETURN]]:
+// CHECK-NEXT:    ret ptr @caller.avx2.0
+// CHECK:       [[RESOLVER_ELSE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 256
+// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 256
+// CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-NEXT:    br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]]
+// CHECK:       [[RESOLVER_RETURN1]]:
+// CHECK-NEXT:    ret ptr @caller.sse4.2.1
+// CHECK:       [[RESOLVER_ELSE2]]:
+// CHECK-NEXT:    ret ptr @caller.default.2
+//
+//
+// CHECK: Function Attrs: nounwind
+// CHECK-LABEL: define dso_local i32 @caller_decl.default.2(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee_decl()
+// CHECK-NEXT:    ret i32 [[CALL]]
+//
+//
+// CHECK-LABEL: define weak_odr ptr @callee_decl.resolver() comdat {
+// CHECK-NEXT:  [[RESOLVER_ENTRY:.*:]]
+// CHECK-NEXT:    call void @__cpu_indicator_init()
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 1024
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1024
+// CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT:    br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]]
+// CHECK:       [[RESOLVER_RETURN]]:
+// CHECK-NEXT:    ret ptr @callee_decl.avx2.1
+// CHECK:       [[RESOLVER_ELSE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 256
+// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 256
+// CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-NEXT:    br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]]
+// CHECK:       [[RESOLVER_RETURN1]]:
+// CHECK-NEXT:    ret ptr @callee_decl.sse4.2.0
+// CHECK:       [[RESOLVER_ELSE2]]:
+// CHECK-NEXT:    ret ptr @callee_decl.default.2
+//
+//
+// CHECK: Function Attrs: nounwind
+// CHECK-LABEL: define dso_local i32 @caller_decl.avx2.0(
+// CHECK-SAME: ) #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee_decl()
+// CHECK-NEXT:    ret i32 [[CALL]]
+//
+//
+// CHECK: Function Attrs: nounwind
+// CHECK-LABEL: define dso_local i32 @caller_decl.sse4.2.1(
+// CHECK-SAME: ) #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee_decl()
+// CHECK-NEXT:    ret i32 [[CALL]]
+//
+//
+// CHECK-LABEL: define weak_odr ptr @caller_decl.resolver() comdat {
+// CHECK-NEXT:  [[RESOLVER_ENTRY:.*:]]
+// CHECK-NEXT:    call void @__cpu_indicator_init()
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 1024
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1024
+// CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT:    br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]]
+// CHECK:       [[RESOLVER_RETURN]]:
+// CHECK-NEXT:    ret ptr @caller_decl.avx2.0
+// CHECK:       [[RESOLVER_ELSE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 256
+// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 256
+// CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-NEXT:    br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]]
+// CHECK:       [[RESOLVER_RETURN1]]:
+// CHECK-NEXT:    ret ptr @caller_decl.sse4.2.1
+// CHECK:       [[RESOLVER_ELSE2]]:
+// CHECK-NEXT:    ret ptr @caller_decl.default.2
+//
+//
+// CHECK: Function Attrs: nounwind
+// CHECK-LABEL: define dso_local i32 @caller_deferred_def.default.2(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee_deferred_def()
+// CHECK-NEXT:    ret i32 [[CALL]]
+//
+//
+// CHECK-LABEL: define weak_odr ptr @callee_deferred_def.resolver() comdat {
+// CHECK-NEXT:  [[RESOLVER_ENTRY:.*:]]
+// CHECK-NEXT:    call void @__cpu_indicator_init()
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 1024
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1024
+// CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT:    br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]]
+// CHECK:       [[RESOLVER_RETURN]]:
+// CHECK-NEXT:    ret ptr @callee_deferred_def.avx2.1
+// CHECK:       [[RESOLVER_ELSE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 256
+// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 256
+// CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-NEXT:    br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]]
+// CHECK:       [[RESOLVER_RETURN1]]:
+// CHECK-NEXT:    ret ptr @callee_deferred_def.sse4.2.0
+// CHECK:       [[RESOLVER_ELSE2]]:
+// CHECK-NEXT:    ret ptr @callee_deferred_def.default.2
+//
+//
+// CHECK: Function Attrs: nounwind
+// CHECK-LABEL: define dso_local i32 @caller_deferred_def.avx2.0(
+// CHECK-SAME: ) #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee_deferred_def()
+// CHECK-NEXT:    ret i32 [[CALL]]
+//
+//
+// CHECK: Function Attrs: nounwind
+// CHECK-LABEL: define dso_local i32 @caller_deferred_def.sse4.2.1(
+// CHECK-SAME: ) #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee_deferred_def()
+// CHECK-NEXT:    ret i32 [[CALL]]
+//
+//
+// CHECK-LABEL: define weak_odr ptr @caller_deferred_def.resolver() comdat {
+// CHECK-NEXT:  [[RESOLVER_ENTRY:.*:]]
+// CHECK-NEXT:    call void @__cpu_indicator_init()
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 1024
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1024
+// CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT:    br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]]
+// CHECK:       [[RESOLVER_RETURN]]:
+// CHECK-NEXT:    ret ptr @caller_deferred_def.avx2.0
+// CHECK:       [[RESOLVER_ELSE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 256
+// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 256
+// CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-NEXT:    br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]]
+// CHECK:       [[RESOLVER_RETURN1]]:
+// CHECK-NEXT:    ret ptr @caller_deferred_def.sse4.2.1
+// CHECK:       [[RESOLVER_ELSE2]]:
+// CHECK-NEXT:    ret ptr @caller_deferred_def.default.2
+//
+//
+// CHECK: Function Attrs: nounwind
+// CHECK-LABEL: define dso_local i32 @callee_deferred_def.default.2(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i32 1
+//
+//
+// CHECK: Function Attrs: nounwind
+// CHECK-LABEL: define dso_local i32 @callee_deferred_def.sse4.2.0(
+// CHECK-SAME: ) #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i32 1
+//
+//
+// CHECK: Function Attrs: nounwind
+// CHECK-LABEL: define dso_local i32 @callee_deferred_def.avx2.1(
+// CHECK-SAME: ) #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i32 1
+//
+//.
+// CHECK: attributes #[[ATTR0]] = { nounwind "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// CHECK: attributes #[[ATTR1]] = { nounwind "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" }
+// CHECK: attributes #[[ATTR2]] = { nounwind "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+avx,+avx2,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" }
+// CHECK: attributes #[[ATTR3:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// CHECK: attributes #[[ATTR4:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" }
+// CHECK: attributes #[[ATTR5:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+avx,+avx2,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" }
+//.
+// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.

>From e2edb0cb73a348959399987584837b1c3a6b4eae Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 9 Sep 2024 16:26:42 +0800
Subject: [PATCH 2/2] [FMV][Clang][CodeGen] Resolves corresponding callee for
 multi-versioning callers

---
 clang/lib/CodeGen/CGExpr.cpp                  |  29 ++++
 .../test/CodeGen/attr-target-clones-inline.c  | 128 +++++++-----------
 2 files changed, 81 insertions(+), 76 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 99cd61b9e78953..714c4b8922094b 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -5637,6 +5637,35 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) {
       return CGCallee::forBuiltin(builtinID, FD);
   }
 
+  // Resolves corresponding callee for this version of multi-versioning caller
+  // if they share the same features.
+  if (CGF.CGM.getCodeGenOpts().OptimizationLevel > 0 &&
+      FD->isTargetClonesMultiVersion()) {
+    if (auto *TC = CGF.CurFuncDecl->getAttr<TargetClonesAttr>()) {
+      llvm::Constant *CalleePtr = nullptr;
+      CGF.getContext().forEachMultiversionedFunctionVersion(
+          FD, [&](const FunctionDecl *CurFD) {
+            if (const auto *CalleeTC = FD->getAttr<TargetClonesAttr>()) {
+              StringRef FeatStr =
+                  TC->getFeatureStr(CGF.CurGD.getMultiVersionIndex());
+              auto It = llvm::find(CalleeTC->featuresStrs(), FeatStr);
+              if (It != CalleeTC->featuresStrs_end()) {
+                GD = GlobalDecl(CurFD, It - CalleeTC->featuresStrs_begin());
+                const CGFunctionInfo &FI =
+                    CGF.CGM.getTypes().arrangeGlobalDeclaration(GD);
+                llvm::FunctionType *Ty = CGF.CGM.getTypes().GetFunctionType(FI);
+                CalleePtr = CGF.CGM.GetAddrOfFunction(
+                    GD, Ty, /*ForVTable=*/false,
+                    /*DontDefer=*/false, ForDefinition);
+              }
+            }
+          });
+
+      if (CalleePtr)
+        return CGCallee::forDirect(CalleePtr, GD);
+    }
+  }
+
   llvm::Constant *CalleePtr = CGF.CGM.getRawFunctionPointer(GD);
   if (CGF.CGM.getLangOpts().CUDA && !CGF.CGM.getLangOpts().CUDAIsDevice &&
       FD->hasAttr<CUDAGlobalAttr>())
diff --git a/clang/test/CodeGen/attr-target-clones-inline.c b/clang/test/CodeGen/attr-target-clones-inline.c
index cc9e8dc762e483..7dc3819b81e188 100644
--- a/clang/test/CodeGen/attr-target-clones-inline.c
+++ b/clang/test/CodeGen/attr-target-clones-inline.c
@@ -26,16 +26,14 @@ int callee_deferred_def(void) { return 1; }
 // CHECK: @__cpu_features2 = external dso_local global [3 x i32]
 // CHECK: @callee.ifunc = weak_odr alias i32 (), ptr @callee
 // CHECK: @caller.ifunc = weak_odr alias i32 (), ptr @caller
-// CHECK: @callee_decl.ifunc = weak_odr alias i32 (), ptr @callee_decl
 // CHECK: @caller_decl.ifunc = weak_odr alias i32 (), ptr @caller_decl
-// CHECK: @callee_deferred_def.ifunc = weak_odr alias i32 (), ptr @callee_deferred_def
 // CHECK: @caller_deferred_def.ifunc = weak_odr alias i32 (), ptr @caller_deferred_def
+// CHECK: @callee_deferred_def.ifunc = weak_odr alias i32 (), ptr @callee_deferred_def
 // CHECK: @callee = weak_odr ifunc i32 (), ptr @callee.resolver
 // CHECK: @caller = weak_odr ifunc i32 (), ptr @caller.resolver
-// CHECK: @callee_decl = weak_odr ifunc i32 (), ptr @callee_decl.resolver
 // CHECK: @caller_decl = weak_odr ifunc i32 (), ptr @caller_decl.resolver
-// CHECK: @callee_deferred_def = weak_odr ifunc i32 (), ptr @callee_deferred_def.resolver
 // CHECK: @caller_deferred_def = weak_odr ifunc i32 (), ptr @caller_deferred_def.resolver
+// CHECK: @callee_deferred_def = weak_odr ifunc i32 (), ptr @callee_deferred_def.resolver
 //.
 // CHECK: Function Attrs: nounwind
 // CHECK-LABEL: define dso_local i32 @callee.default.2(
@@ -84,7 +82,7 @@ int callee_deferred_def(void) { return 1; }
 // CHECK-LABEL: define dso_local i32 @caller.default.2(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee()
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee.default.2()
 // CHECK-NEXT:    ret i32 [[CALL]]
 //
 //
@@ -92,7 +90,7 @@ int callee_deferred_def(void) { return 1; }
 // CHECK-LABEL: define dso_local i32 @caller.avx2.0(
 // CHECK-SAME: ) #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee()
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee.avx2.1()
 // CHECK-NEXT:    ret i32 [[CALL]]
 //
 //
@@ -100,7 +98,7 @@ int callee_deferred_def(void) { return 1; }
 // CHECK-LABEL: define dso_local i32 @caller.sse4.2.1(
 // CHECK-SAME: ) #[[ATTR1]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee()
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee.sse4.2.0()
 // CHECK-NEXT:    ret i32 [[CALL]]
 //
 //
@@ -130,37 +128,15 @@ int callee_deferred_def(void) { return 1; }
 // CHECK-LABEL: define dso_local i32 @caller_decl.default.2(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee_decl()
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee_decl.default.2()
 // CHECK-NEXT:    ret i32 [[CALL]]
 //
 //
-// CHECK-LABEL: define weak_odr ptr @callee_decl.resolver() comdat {
-// CHECK-NEXT:  [[RESOLVER_ENTRY:.*:]]
-// CHECK-NEXT:    call void @__cpu_indicator_init()
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 1024
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1024
-// CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
-// CHECK-NEXT:    br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]]
-// CHECK:       [[RESOLVER_RETURN]]:
-// CHECK-NEXT:    ret ptr @callee_decl.avx2.1
-// CHECK:       [[RESOLVER_ELSE]]:
-// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
-// CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 256
-// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 256
-// CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
-// CHECK-NEXT:    br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]]
-// CHECK:       [[RESOLVER_RETURN1]]:
-// CHECK-NEXT:    ret ptr @callee_decl.sse4.2.0
-// CHECK:       [[RESOLVER_ELSE2]]:
-// CHECK-NEXT:    ret ptr @callee_decl.default.2
-//
-//
 // CHECK: Function Attrs: nounwind
 // CHECK-LABEL: define dso_local i32 @caller_decl.avx2.0(
 // CHECK-SAME: ) #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee_decl()
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee_decl.avx2.1()
 // CHECK-NEXT:    ret i32 [[CALL]]
 //
 //
@@ -168,7 +144,7 @@ int callee_deferred_def(void) { return 1; }
 // CHECK-LABEL: define dso_local i32 @caller_decl.sse4.2.1(
 // CHECK-SAME: ) #[[ATTR1]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee_decl()
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee_decl.sse4.2.0()
 // CHECK-NEXT:    ret i32 [[CALL]]
 //
 //
@@ -198,48 +174,47 @@ int callee_deferred_def(void) { return 1; }
 // CHECK-LABEL: define dso_local i32 @caller_deferred_def.default.2(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee_deferred_def()
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee_deferred_def.default.2()
 // CHECK-NEXT:    ret i32 [[CALL]]
 //
 //
-// CHECK-LABEL: define weak_odr ptr @callee_deferred_def.resolver() comdat {
-// CHECK-NEXT:  [[RESOLVER_ENTRY:.*:]]
-// CHECK-NEXT:    call void @__cpu_indicator_init()
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 1024
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1024
-// CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
-// CHECK-NEXT:    br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]]
-// CHECK:       [[RESOLVER_RETURN]]:
-// CHECK-NEXT:    ret ptr @callee_deferred_def.avx2.1
-// CHECK:       [[RESOLVER_ELSE]]:
-// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
-// CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 256
-// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 256
-// CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
-// CHECK-NEXT:    br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]]
-// CHECK:       [[RESOLVER_RETURN1]]:
-// CHECK-NEXT:    ret ptr @callee_deferred_def.sse4.2.0
-// CHECK:       [[RESOLVER_ELSE2]]:
-// CHECK-NEXT:    ret ptr @callee_deferred_def.default.2
+// CHECK: Function Attrs: nounwind
+// CHECK-LABEL: define dso_local i32 @callee_deferred_def.default.2(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i32 1
 //
 //
 // CHECK: Function Attrs: nounwind
 // CHECK-LABEL: define dso_local i32 @caller_deferred_def.avx2.0(
 // CHECK-SAME: ) #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee_deferred_def()
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee_deferred_def.avx2.1()
 // CHECK-NEXT:    ret i32 [[CALL]]
 //
 //
 // CHECK: Function Attrs: nounwind
+// CHECK-LABEL: define dso_local i32 @callee_deferred_def.avx2.1(
+// CHECK-SAME: ) #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i32 1
+//
+//
+// CHECK: Function Attrs: nounwind
 // CHECK-LABEL: define dso_local i32 @caller_deferred_def.sse4.2.1(
 // CHECK-SAME: ) #[[ATTR1]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee_deferred_def()
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @callee_deferred_def.sse4.2.0()
 // CHECK-NEXT:    ret i32 [[CALL]]
 //
 //
+// CHECK: Function Attrs: nounwind
+// CHECK-LABEL: define dso_local i32 @callee_deferred_def.sse4.2.0(
+// CHECK-SAME: ) #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i32 1
+//
+//
 // CHECK-LABEL: define weak_odr ptr @caller_deferred_def.resolver() comdat {
 // CHECK-NEXT:  [[RESOLVER_ENTRY:.*:]]
 // CHECK-NEXT:    call void @__cpu_indicator_init()
@@ -262,33 +237,34 @@ int callee_deferred_def(void) { return 1; }
 // CHECK-NEXT:    ret ptr @caller_deferred_def.default.2
 //
 //
-// CHECK: Function Attrs: nounwind
-// CHECK-LABEL: define dso_local i32 @callee_deferred_def.default.2(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    ret i32 1
-//
-//
-// CHECK: Function Attrs: nounwind
-// CHECK-LABEL: define dso_local i32 @callee_deferred_def.sse4.2.0(
-// CHECK-SAME: ) #[[ATTR1]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    ret i32 1
-//
-//
-// CHECK: Function Attrs: nounwind
-// CHECK-LABEL: define dso_local i32 @callee_deferred_def.avx2.1(
-// CHECK-SAME: ) #[[ATTR2]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    ret i32 1
+// CHECK-LABEL: define weak_odr ptr @callee_deferred_def.resolver() comdat {
+// CHECK-NEXT:  [[RESOLVER_ENTRY:.*:]]
+// CHECK-NEXT:    call void @__cpu_indicator_init()
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 1024
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1024
+// CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT:    br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]]
+// CHECK:       [[RESOLVER_RETURN]]:
+// CHECK-NEXT:    ret ptr @callee_deferred_def.avx2.1
+// CHECK:       [[RESOLVER_ELSE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 256
+// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 256
+// CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-NEXT:    br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]]
+// CHECK:       [[RESOLVER_RETURN1]]:
+// CHECK-NEXT:    ret ptr @callee_deferred_def.sse4.2.0
+// CHECK:       [[RESOLVER_ELSE2]]:
+// CHECK-NEXT:    ret ptr @callee_deferred_def.default.2
 //
 //.
 // CHECK: attributes #[[ATTR0]] = { nounwind "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
 // CHECK: attributes #[[ATTR1]] = { nounwind "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" }
 // CHECK: attributes #[[ATTR2]] = { nounwind "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+avx,+avx2,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" }
 // CHECK: attributes #[[ATTR3:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
-// CHECK: attributes #[[ATTR4:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" }
-// CHECK: attributes #[[ATTR5:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+avx,+avx2,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" }
+// CHECK: attributes #[[ATTR4:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+avx,+avx2,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" }
+// CHECK: attributes #[[ATTR5:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" }
 //.
 // CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 // CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}



More information about the cfe-commits mailing list