[clang] b46b1a9 - recommit c77a4078e01033aa2206c31a579d217c8a07569b
Yaxun Liu via cfe-commits
cfe-commits at lists.llvm.org
Fri Apr 24 13:53:44 PDT 2020
Author: Yaxun (Sam) Liu
Date: 2020-04-24T16:53:18-04:00
New Revision: b46b1a916d44216f0c70de55ae2123eb9de69027
URL: https://github.com/llvm/llvm-project/commit/b46b1a916d44216f0c70de55ae2123eb9de69027
DIFF: https://github.com/llvm/llvm-project/commit/b46b1a916d44216f0c70de55ae2123eb9de69027.diff
LOG: recommit c77a4078e01033aa2206c31a579d217c8a07569b
Added:
Modified:
clang/lib/Sema/SemaOverload.cpp
clang/test/SemaCUDA/function-overload.cu
Removed:
################################################################################
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index a32bc0c84c70..ecc4e7ee19fb 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -9374,16 +9374,22 @@ static Comparison compareEnableIfAttrs(const Sema &S, const FunctionDecl *Cand1,
return Comparison::Equal;
}
-static bool isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
- const OverloadCandidate &Cand2) {
+static Comparison
+isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
+ const OverloadCandidate &Cand2) {
if (!Cand1.Function || !Cand1.Function->isMultiVersion() || !Cand2.Function ||
!Cand2.Function->isMultiVersion())
- return false;
+ return Comparison::Equal;
- // If Cand1 is invalid, it cannot be a better match, if Cand2 is invalid, this
- // is obviously better.
- if (Cand1.Function->isInvalidDecl()) return false;
- if (Cand2.Function->isInvalidDecl()) return true;
+ // If both are invalid, they are equal. If one of them is invalid, the other
+ // is better.
+ if (Cand1.Function->isInvalidDecl()) {
+ if (Cand2.Function->isInvalidDecl())
+ return Comparison::Equal;
+ return Comparison::Worse;
+ }
+ if (Cand2.Function->isInvalidDecl())
+ return Comparison::Better;
// If this is a cpu_dispatch/cpu_specific multiversion situation, prefer
// cpu_dispatch, else arbitrarily based on the identifiers.
@@ -9393,16 +9399,18 @@ static bool isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
const auto *Cand2CPUSpec = Cand2.Function->getAttr<CPUSpecificAttr>();
if (!Cand1CPUDisp && !Cand2CPUDisp && !Cand1CPUSpec && !Cand2CPUSpec)
- return false;
+ return Comparison::Equal;
if (Cand1CPUDisp && !Cand2CPUDisp)
- return true;
+ return Comparison::Better;
if (Cand2CPUDisp && !Cand1CPUDisp)
- return false;
+ return Comparison::Worse;
if (Cand1CPUSpec && Cand2CPUSpec) {
if (Cand1CPUSpec->cpus_size() != Cand2CPUSpec->cpus_size())
- return Cand1CPUSpec->cpus_size() < Cand2CPUSpec->cpus_size();
+ return Cand1CPUSpec->cpus_size() < Cand2CPUSpec->cpus_size()
+ ? Comparison::Better
+ : Comparison::Worse;
std::pair<CPUSpecificAttr::cpus_iterator, CPUSpecificAttr::cpus_iterator>
FirstDiff = std::mismatch(
@@ -9415,7 +9423,9 @@ static bool isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
assert(FirstDiff.first != Cand1CPUSpec->cpus_end() &&
"Two
diff erent cpu-specific versions should not have the same "
"identifier list, otherwise they'd be the same decl!");
- return (*FirstDiff.first)->getName() < (*FirstDiff.second)->getName();
+ return (*FirstDiff.first)->getName() < (*FirstDiff.second)->getName()
+ ? Comparison::Better
+ : Comparison::Worse;
}
llvm_unreachable("No way to get here unless both had cpu_dispatch");
}
@@ -9475,6 +9485,50 @@ bool clang::isBetterOverloadCandidate(
else if (!Cand1.Viable)
return false;
+ // [CUDA] A function with 'never' preference is marked not viable, therefore
+ // is never shown up here. The worst preference shown up here is 'wrong side',
+ // e.g. a host function called by a device host function in device
+ // compilation. This is valid AST as long as the host device function is not
+ // emitted, e.g. it is an inline function which is called only by a host
+ // function. A deferred diagnostic will be triggered if it is emitted.
+ // However a wrong-sided function is still a viable candidate here.
+ //
+ // If Cand1 can be emitted and Cand2 cannot be emitted in the current
+ // context, Cand1 is better than Cand2. If Cand1 can not be emitted and Cand2
+ // can be emitted, Cand1 is not better than Cand2. This rule should have
+ // precedence over other rules.
+ //
+ // If both Cand1 and Cand2 can be emitted, or neither can be emitted, then
+ // other rules should be used to determine which is better. This is because
+ // host/device based overloading resolution is mostly for determining
+ // viability of a function. If two functions are both viable, other factors
+ // should take precedence in preference, e.g. the standard-defined preferences
+ // like argument conversion ranks or enable_if partial-ordering. The
+ // preference for pass-object-size parameters is probably most similar to a
+ // type-based-overloading decision and so should take priority.
+ //
+ // If other rules cannot determine which is better, CUDA preference will be
+ // used again to determine which is better.
+ //
+ // TODO: Currently IdentifyCUDAPreference does not return correct values
+ // for functions called in global variable initializers due to missing
+ // correct context about device/host. Therefore we can only enforce this
+ // rule when there is a caller. We should enforce this rule for functions
+ // in global variable initializers once proper context is added.
+ if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
+ if (FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext)) {
+ auto P1 = S.IdentifyCUDAPreference(Caller, Cand1.Function);
+ auto P2 = S.IdentifyCUDAPreference(Caller, Cand2.Function);
+ assert(P1 != Sema::CFP_Never && P2 != Sema::CFP_Never);
+ auto Cand1Emittable = P1 > Sema::CFP_WrongSide;
+ auto Cand2Emittable = P2 > Sema::CFP_WrongSide;
+ if (Cand1Emittable && !Cand2Emittable)
+ return true;
+ if (!Cand1Emittable && Cand2Emittable)
+ return false;
+ }
+ }
+
// C++ [over.match.best]p1:
//
// -- if F is a static member function, ICS1(F) is defined such
@@ -9709,12 +9763,6 @@ bool clang::isBetterOverloadCandidate(
return Cmp == Comparison::Better;
}
- if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
- FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
- return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
- S.IdentifyCUDAPreference(Caller, Cand2.Function);
- }
-
bool HasPS1 = Cand1.Function != nullptr &&
functionHasPassObjectSizeParams(Cand1.Function);
bool HasPS2 = Cand2.Function != nullptr &&
@@ -9722,7 +9770,22 @@ bool clang::isBetterOverloadCandidate(
if (HasPS1 != HasPS2 && HasPS1)
return true;
- return isBetterMultiversionCandidate(Cand1, Cand2);
+ auto MV = isBetterMultiversionCandidate(Cand1, Cand2);
+ if (MV == Comparison::Better)
+ return true;
+ if (MV == Comparison::Worse)
+ return false;
+
+ // If other rules cannot determine which is better, CUDA preference is used
+ // to determine which is better.
+ if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
+ if (FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext)) {
+ return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
+ S.IdentifyCUDAPreference(Caller, Cand2.Function);
+ }
+ }
+
+ return false;
}
/// Determine whether two declarations are "equivalent" for the purposes of
@@ -9808,33 +9871,6 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc,
std::transform(begin(), end(), std::back_inserter(Candidates),
[](OverloadCandidate &Cand) { return &Cand; });
- // [CUDA] HD->H or HD->D calls are technically not allowed by CUDA but
- // are accepted by both clang and NVCC. However, during a particular
- // compilation mode only one call variant is viable. We need to
- // exclude non-viable overload candidates from consideration based
- // only on their host/device attributes. Specifically, if one
- // candidate call is WrongSide and the other is SameSide, we ignore
- // the WrongSide candidate.
- if (S.getLangOpts().CUDA) {
- const FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
- bool ContainsSameSideCandidate =
- llvm::any_of(Candidates, [&](OverloadCandidate *Cand) {
- // Check viable function only.
- return Cand->Viable && Cand->Function &&
- S.IdentifyCUDAPreference(Caller, Cand->Function) ==
- Sema::CFP_SameSide;
- });
- if (ContainsSameSideCandidate) {
- auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) {
- // Check viable function only to avoid unnecessary data copying/moving.
- return Cand->Viable && Cand->Function &&
- S.IdentifyCUDAPreference(Caller, Cand->Function) ==
- Sema::CFP_WrongSide;
- };
- llvm::erase_if(Candidates, IsWrongSideCandidate);
- }
- }
-
// Find the best viable function.
Best = end();
for (auto *Cand : Candidates) {
diff --git a/clang/test/SemaCUDA/function-overload.cu b/clang/test/SemaCUDA/function-overload.cu
index b9efd1c09e69..b0e2852a12a7 100644
--- a/clang/test/SemaCUDA/function-overload.cu
+++ b/clang/test/SemaCUDA/function-overload.cu
@@ -331,9 +331,6 @@ __device__ void test_device_calls_template_fn() {
// If we have a mix of HD and H-only or D-only candidates in the overload set,
// normal C++ overload resolution rules apply first.
template <typename T> TemplateReturnTy template_vs_hd_function(T arg)
-#ifdef __CUDA_ARCH__
-//expected-note at -2 {{declared here}}
-#endif
{
return TemplateReturnTy();
}
@@ -342,11 +339,13 @@ __host__ __device__ HostDeviceReturnTy template_vs_hd_function(float arg) {
}
__host__ __device__ void test_host_device_calls_hd_template() {
- HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
- TemplateReturnTy ret2 = template_vs_hd_function(1);
#ifdef __CUDA_ARCH__
- // expected-error at -2 {{reference to __host__ function 'template_vs_hd_function<int>' in __host__ __device__ function}}
+ typedef HostDeviceReturnTy ExpectedReturnTy;
+#else
+ typedef TemplateReturnTy ExpectedReturnTy;
#endif
+ HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
+ ExpectedReturnTy ret2 = template_vs_hd_function(1);
}
__host__ void test_host_calls_hd_template() {
@@ -367,14 +366,14 @@ __device__ void test_device_calls_hd_template() {
__device__ DeviceReturnTy device_only_function(int arg) { return DeviceReturnTy(); }
__device__ DeviceReturnTy2 device_only_function(float arg) { return DeviceReturnTy2(); }
#ifndef __CUDA_ARCH__
- // expected-note at -3 {{'device_only_function' declared here}}
- // expected-note at -3 {{'device_only_function' declared here}}
+ // expected-note at -3 2{{'device_only_function' declared here}}
+ // expected-note at -3 2{{'device_only_function' declared here}}
#endif
__host__ HostReturnTy host_only_function(int arg) { return HostReturnTy(); }
__host__ HostReturnTy2 host_only_function(float arg) { return HostReturnTy2(); }
#ifdef __CUDA_ARCH__
- // expected-note at -3 {{'host_only_function' declared here}}
- // expected-note at -3 {{'host_only_function' declared here}}
+ // expected-note at -3 2{{'host_only_function' declared here}}
+ // expected-note at -3 2{{'host_only_function' declared here}}
#endif
__host__ __device__ void test_host_device_single_side_overloading() {
@@ -392,6 +391,37 @@ __host__ __device__ void test_host_device_single_side_overloading() {
#endif
}
+// wrong-sided overloading should not cause diagnostic unless it is emitted.
+// This inline function is not emitted.
+inline __host__ __device__ void test_host_device_wrong_side_overloading_inline_no_diag() {
+ DeviceReturnTy ret1 = device_only_function(1);
+ DeviceReturnTy2 ret2 = device_only_function(1.0f);
+ HostReturnTy ret3 = host_only_function(1);
+ HostReturnTy2 ret4 = host_only_function(1.0f);
+}
+
+// wrong-sided overloading should cause diagnostic if it is emitted.
+// This inline function is emitted since it is called by an emitted function.
+inline __host__ __device__ void test_host_device_wrong_side_overloading_inline_diag() {
+ DeviceReturnTy ret1 = device_only_function(1);
+ DeviceReturnTy2 ret2 = device_only_function(1.0f);
+#ifndef __CUDA_ARCH__
+ // expected-error at -3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}}
+ // expected-error at -3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}}
+#endif
+ HostReturnTy ret3 = host_only_function(1);
+ HostReturnTy2 ret4 = host_only_function(1.0f);
+#ifdef __CUDA_ARCH__
+ // expected-error at -3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}}
+ // expected-error at -3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}}
+#endif
+}
+
+__host__ __device__ void test_host_device_wrong_side_overloading_inline_diag_caller() {
+ test_host_device_wrong_side_overloading_inline_diag();
+ // expected-note at -1 {{called by 'test_host_device_wrong_side_overloading_inline_diag_caller'}}
+}
+
// Verify that we allow overloading function templates.
template <typename T> __host__ T template_overload(const T &a) { return a; };
template <typename T> __device__ T template_overload(const T &a) { return a; };
More information about the cfe-commits
mailing list