[clang] dfc0d94 - Revert D80450 "[CUDA][HIP] Fix implicit HD function resolution"
Fangrui Song via cfe-commits
cfe-commits at lists.llvm.org
Wed Jun 10 17:42:39 PDT 2020
Author: Fangrui Song
Date: 2020-06-10T17:42:28-07:00
New Revision: dfc0d9475556cb04f443f728e68cf8c7afa904eb
URL: https://github.com/llvm/llvm-project/commit/dfc0d9475556cb04f443f728e68cf8c7afa904eb
DIFF: https://github.com/llvm/llvm-project/commit/dfc0d9475556cb04f443f728e68cf8c7afa904eb.diff
LOG: Revert D80450 "[CUDA][HIP] Fix implicit HD function resolution"
This reverts commit 263390d4f5f23967a31af09eb6e0c12e633d6104.
This can still cause bogus errors:
eigen3/Eigen/src/Core/CoreEvaluators.h:94:38: error: call to implicitly-deleted copy constructor of 'unary_evaluator<Eigen::Inverse<Eigen::Matrix<double, 4, 4, 0, 4, 4>>>'
thrust/system/detail/generic/for_each.h:49:3: error: implicit instantiation of undefined template
'thrust::detail::STATIC_ASSERTION_FAILURE<false>'
Added:
Modified:
clang/include/clang/Sema/Sema.h
clang/lib/Sema/SemaCUDA.cpp
clang/lib/Sema/SemaOverload.cpp
clang/test/SemaCUDA/function-overload.cu
Removed:
################################################################################
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 33be6c68b11a..e7e6dc4f3adc 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -11698,8 +11698,6 @@ class Sema final {
return IdentifyCUDATarget(dyn_cast<FunctionDecl>(CurContext));
}
- static bool IsCUDAImplicitHostDeviceFunction(const FunctionDecl *D);
-
// CUDA function call preference. Must be ordered numerically from
// worst to best.
enum CUDAFunctionPreference {
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 1106cef0eca2..5d6c15196750 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -211,20 +211,6 @@ Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
llvm_unreachable("All cases should've been handled by now.");
}
-template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) {
- if (!D)
- return false;
- if (auto *A = D->getAttr<AttrT>())
- return A->isImplicit();
- return D->isImplicit();
-}
-
-bool Sema::IsCUDAImplicitHostDeviceFunction(const FunctionDecl *D) {
- bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D);
- bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D);
- return IsImplicitDevAttr && IsImplicitHostAttr;
-}
-
void Sema::EraseUnwantedCUDAMatches(
const FunctionDecl *Caller,
SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) {
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 5e5f53991a68..319a4b534eba 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -9397,22 +9397,16 @@ static Comparison compareEnableIfAttrs(const Sema &S, const FunctionDecl *Cand1,
return Comparison::Equal;
}
-static Comparison
-isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
- const OverloadCandidate &Cand2) {
+static bool isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
+ const OverloadCandidate &Cand2) {
if (!Cand1.Function || !Cand1.Function->isMultiVersion() || !Cand2.Function ||
!Cand2.Function->isMultiVersion())
- return Comparison::Equal;
+ return false;
- // If both are invalid, they are equal. If one of them is invalid, the other
- // is better.
- if (Cand1.Function->isInvalidDecl()) {
- if (Cand2.Function->isInvalidDecl())
- return Comparison::Equal;
- return Comparison::Worse;
- }
- if (Cand2.Function->isInvalidDecl())
- return Comparison::Better;
+ // If Cand1 is invalid, it cannot be a better match, if Cand2 is invalid, this
+ // is obviously better.
+ if (Cand1.Function->isInvalidDecl()) return false;
+ if (Cand2.Function->isInvalidDecl()) return true;
// If this is a cpu_dispatch/cpu_specific multiversion situation, prefer
// cpu_dispatch, else arbitrarily based on the identifiers.
@@ -9422,18 +9416,16 @@ isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
const auto *Cand2CPUSpec = Cand2.Function->getAttr<CPUSpecificAttr>();
if (!Cand1CPUDisp && !Cand2CPUDisp && !Cand1CPUSpec && !Cand2CPUSpec)
- return Comparison::Equal;
+ return false;
if (Cand1CPUDisp && !Cand2CPUDisp)
- return Comparison::Better;
+ return true;
if (Cand2CPUDisp && !Cand1CPUDisp)
- return Comparison::Worse;
+ return false;
if (Cand1CPUSpec && Cand2CPUSpec) {
if (Cand1CPUSpec->cpus_size() != Cand2CPUSpec->cpus_size())
- return Cand1CPUSpec->cpus_size() < Cand2CPUSpec->cpus_size()
- ? Comparison::Better
- : Comparison::Worse;
+ return Cand1CPUSpec->cpus_size() < Cand2CPUSpec->cpus_size();
std::pair<CPUSpecificAttr::cpus_iterator, CPUSpecificAttr::cpus_iterator>
FirstDiff = std::mismatch(
@@ -9446,9 +9438,7 @@ isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
assert(FirstDiff.first != Cand1CPUSpec->cpus_end() &&
"Two
diff erent cpu-specific versions should not have the same "
"identifier list, otherwise they'd be the same decl!");
- return (*FirstDiff.first)->getName() < (*FirstDiff.second)->getName()
- ? Comparison::Better
- : Comparison::Worse;
+ return (*FirstDiff.first)->getName() < (*FirstDiff.second)->getName();
}
llvm_unreachable("No way to get here unless both had cpu_dispatch");
}
@@ -9508,66 +9498,6 @@ bool clang::isBetterOverloadCandidate(
else if (!Cand1.Viable)
return false;
- // [CUDA] A function with 'never' preference is marked not viable, therefore
- // is never shown up here. The worst preference shown up here is 'wrong side',
- // e.g. a host function called by a device host function in device
- // compilation. This is valid AST as long as the host device function is not
- // emitted, e.g. it is an inline function which is called only by a host
- // function. A deferred diagnostic will be triggered if it is emitted.
- // However a wrong-sided function is still a viable candidate here.
- //
- // If Cand1 can be emitted and Cand2 cannot be emitted in the current
- // context, Cand1 is better than Cand2. If Cand1 can not be emitted and Cand2
- // can be emitted, Cand1 is not better than Cand2. This rule should have
- // precedence over other rules.
- //
- // If both Cand1 and Cand2 can be emitted, or neither can be emitted, then
- // other rules should be used to determine which is better. This is because
- // host/device based overloading resolution is mostly for determining
- // viability of a function. If two functions are both viable, other factors
- // should take precedence in preference, e.g. the standard-defined preferences
- // like argument conversion ranks or enable_if partial-ordering. The
- // preference for pass-object-size parameters is probably most similar to a
- // type-based-overloading decision and so should take priority.
- //
- // If other rules cannot determine which is better, CUDA preference will be
- // used again to determine which is better.
- //
- // TODO: Currently IdentifyCUDAPreference does not return correct values
- // for functions called in global variable initializers due to missing
- // correct context about device/host. Therefore we can only enforce this
- // rule when there is a caller. We should enforce this rule for functions
- // in global variable initializers once proper context is added.
- if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
- if (FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext)) {
- bool IsCallerImplicitHD = Sema::IsCUDAImplicitHostDeviceFunction(Caller);
- bool IsCand1ImplicitHD =
- Sema::IsCUDAImplicitHostDeviceFunction(Cand1.Function);
- bool IsCand2ImplicitHD =
- Sema::IsCUDAImplicitHostDeviceFunction(Cand2.Function);
- auto P1 = S.IdentifyCUDAPreference(Caller, Cand1.Function);
- auto P2 = S.IdentifyCUDAPreference(Caller, Cand2.Function);
- assert(P1 != Sema::CFP_Never && P2 != Sema::CFP_Never);
- // The implicit HD function may be a function in a system header which
- // is forced by pragma. In device compilation, if we prefer HD candidates
- // over wrong-sided candidates, overloading resolution may change, which
- // may result in non-deferrable diagnostics. As a workaround, we let
- // implicit HD candidates take equal preference as wrong-sided candidates.
- // This will preserve the overloading resolution.
- auto EmitThreshold =
- (S.getLangOpts().CUDAIsDevice && IsCallerImplicitHD &&
- (IsCand1ImplicitHD || IsCand2ImplicitHD))
- ? Sema::CFP_Never
- : Sema::CFP_WrongSide;
- auto Cand1Emittable = P1 > EmitThreshold;
- auto Cand2Emittable = P2 > EmitThreshold;
- if (Cand1Emittable && !Cand2Emittable)
- return true;
- if (!Cand1Emittable && Cand2Emittable)
- return false;
- }
- }
-
// C++ [over.match.best]p1:
//
// -- if F is a static member function, ICS1(F) is defined such
@@ -9802,6 +9732,12 @@ bool clang::isBetterOverloadCandidate(
return Cmp == Comparison::Better;
}
+ if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
+ FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
+ return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
+ S.IdentifyCUDAPreference(Caller, Cand2.Function);
+ }
+
bool HasPS1 = Cand1.Function != nullptr &&
functionHasPassObjectSizeParams(Cand1.Function);
bool HasPS2 = Cand2.Function != nullptr &&
@@ -9809,21 +9745,7 @@ bool clang::isBetterOverloadCandidate(
if (HasPS1 != HasPS2 && HasPS1)
return true;
- auto MV = isBetterMultiversionCandidate(Cand1, Cand2);
- if (MV == Comparison::Better)
- return true;
- if (MV == Comparison::Worse)
- return false;
-
- // If other rules cannot determine which is better, CUDA preference is used
- // to determine which is better.
- if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
- FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
- return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
- S.IdentifyCUDAPreference(Caller, Cand2.Function);
- }
-
- return false;
+ return isBetterMultiversionCandidate(Cand1, Cand2);
}
/// Determine whether two declarations are "equivalent" for the purposes of
@@ -9909,6 +9831,33 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc,
std::transform(begin(), end(), std::back_inserter(Candidates),
[](OverloadCandidate &Cand) { return &Cand; });
+ // [CUDA] HD->H or HD->D calls are technically not allowed by CUDA but
+ // are accepted by both clang and NVCC. However, during a particular
+ // compilation mode only one call variant is viable. We need to
+ // exclude non-viable overload candidates from consideration based
+ // only on their host/device attributes. Specifically, if one
+ // candidate call is WrongSide and the other is SameSide, we ignore
+ // the WrongSide candidate.
+ if (S.getLangOpts().CUDA) {
+ const FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
+ bool ContainsSameSideCandidate =
+ llvm::any_of(Candidates, [&](OverloadCandidate *Cand) {
+ // Check viable function only.
+ return Cand->Viable && Cand->Function &&
+ S.IdentifyCUDAPreference(Caller, Cand->Function) ==
+ Sema::CFP_SameSide;
+ });
+ if (ContainsSameSideCandidate) {
+ auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) {
+ // Check viable function only to avoid unnecessary data copying/moving.
+ return Cand->Viable && Cand->Function &&
+ S.IdentifyCUDAPreference(Caller, Cand->Function) ==
+ Sema::CFP_WrongSide;
+ };
+ llvm::erase_if(Candidates, IsWrongSideCandidate);
+ }
+ }
+
// Find the best viable function.
Best = end();
for (auto *Cand : Candidates) {
diff --git a/clang/test/SemaCUDA/function-overload.cu b/clang/test/SemaCUDA/function-overload.cu
index da4c4911c4f5..b9efd1c09e69 100644
--- a/clang/test/SemaCUDA/function-overload.cu
+++ b/clang/test/SemaCUDA/function-overload.cu
@@ -1,8 +1,8 @@
// REQUIRES: x86-registered-target
// REQUIRES: nvptx-registered-target
-// RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
-// RUN: %clang_cc1 -std=c++14 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s
#include "Inputs/cuda.h"
@@ -14,13 +14,6 @@ struct DeviceReturnTy2 {};
struct HostDeviceReturnTy {};
struct TemplateReturnTy {};
-struct CorrectOverloadRetTy{};
-#if __CUDA_ARCH__
-// expected-note at -2 {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'IncorrectOverloadRetTy' to 'const CorrectOverloadRetTy &' for 1st argument}}
-// expected-note at -3 {{candidate constructor (the implicit move constructor) not viable: no known conversion from 'IncorrectOverloadRetTy' to 'CorrectOverloadRetTy &&' for 1st argument}}
-#endif
-struct IncorrectOverloadRetTy{};
-
typedef HostReturnTy (*HostFnPtr)();
typedef DeviceReturnTy (*DeviceFnPtr)();
typedef HostDeviceReturnTy (*HostDeviceFnPtr)();
@@ -338,6 +331,9 @@ __device__ void test_device_calls_template_fn() {
// If we have a mix of HD and H-only or D-only candidates in the overload set,
// normal C++ overload resolution rules apply first.
template <typename T> TemplateReturnTy template_vs_hd_function(T arg)
+#ifdef __CUDA_ARCH__
+//expected-note at -2 {{declared here}}
+#endif
{
return TemplateReturnTy();
}
@@ -346,13 +342,11 @@ __host__ __device__ HostDeviceReturnTy template_vs_hd_function(float arg) {
}
__host__ __device__ void test_host_device_calls_hd_template() {
+ HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
+ TemplateReturnTy ret2 = template_vs_hd_function(1);
#ifdef __CUDA_ARCH__
- typedef HostDeviceReturnTy ExpectedReturnTy;
-#else
- typedef TemplateReturnTy ExpectedReturnTy;
+ // expected-error at -2 {{reference to __host__ function 'template_vs_hd_function<int>' in __host__ __device__ function}}
#endif
- HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
- ExpectedReturnTy ret2 = template_vs_hd_function(1);
}
__host__ void test_host_calls_hd_template() {
@@ -373,14 +367,14 @@ __device__ void test_device_calls_hd_template() {
__device__ DeviceReturnTy device_only_function(int arg) { return DeviceReturnTy(); }
__device__ DeviceReturnTy2 device_only_function(float arg) { return DeviceReturnTy2(); }
#ifndef __CUDA_ARCH__
- // expected-note at -3 2{{'device_only_function' declared here}}
- // expected-note at -3 2{{'device_only_function' declared here}}
+ // expected-note at -3 {{'device_only_function' declared here}}
+ // expected-note at -3 {{'device_only_function' declared here}}
#endif
__host__ HostReturnTy host_only_function(int arg) { return HostReturnTy(); }
__host__ HostReturnTy2 host_only_function(float arg) { return HostReturnTy2(); }
#ifdef __CUDA_ARCH__
- // expected-note at -3 2{{'host_only_function' declared here}}
- // expected-note at -3 2{{'host_only_function' declared here}}
+ // expected-note at -3 {{'host_only_function' declared here}}
+ // expected-note at -3 {{'host_only_function' declared here}}
#endif
__host__ __device__ void test_host_device_single_side_overloading() {
@@ -398,37 +392,6 @@ __host__ __device__ void test_host_device_single_side_overloading() {
#endif
}
-// wrong-sided overloading should not cause diagnostic unless it is emitted.
-// This inline function is not emitted.
-inline __host__ __device__ void test_host_device_wrong_side_overloading_inline_no_diag() {
- DeviceReturnTy ret1 = device_only_function(1);
- DeviceReturnTy2 ret2 = device_only_function(1.0f);
- HostReturnTy ret3 = host_only_function(1);
- HostReturnTy2 ret4 = host_only_function(1.0f);
-}
-
-// wrong-sided overloading should cause diagnostic if it is emitted.
-// This inline function is emitted since it is called by an emitted function.
-inline __host__ __device__ void test_host_device_wrong_side_overloading_inline_diag() {
- DeviceReturnTy ret1 = device_only_function(1);
- DeviceReturnTy2 ret2 = device_only_function(1.0f);
-#ifndef __CUDA_ARCH__
- // expected-error at -3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}}
- // expected-error at -3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}}
-#endif
- HostReturnTy ret3 = host_only_function(1);
- HostReturnTy2 ret4 = host_only_function(1.0f);
-#ifdef __CUDA_ARCH__
- // expected-error at -3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}}
- // expected-error at -3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}}
-#endif
-}
-
-__host__ __device__ void test_host_device_wrong_side_overloading_inline_diag_caller() {
- test_host_device_wrong_side_overloading_inline_diag();
- // expected-note at -1 {{called by 'test_host_device_wrong_side_overloading_inline_diag_caller'}}
-}
-
// Verify that we allow overloading function templates.
template <typename T> __host__ T template_overload(const T &a) { return a; };
template <typename T> __device__ T template_overload(const T &a) { return a; };
@@ -456,136 +419,3 @@ __host__ __device__ int constexpr_overload(const T &x, const T &y) {
int test_constexpr_overload(C2 &x, C2 &y) {
return constexpr_overload(x, y);
}
-
-// Verify no ambiguity for new operator.
-void *a = new int;
-__device__ void *b = new int;
-// expected-error at -1{{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables.}}
-
-// Verify no ambiguity for new operator.
-template<typename _Tp> _Tp&& f();
-template<typename _Tp, typename = decltype(new _Tp(f<_Tp>()))>
-void __test();
-
-void foo() {
- __test<int>();
-}
-
-// Test resolving implicit host device candidate vs wrong-sided candidate.
-// In device compilation, implicit host device caller choose implicit host
-// device candidate and wrong-sided candidate with equal preference.
-// Resolution result should not change with/without pragma.
-namespace ImplicitHostDeviceVsWrongSided {
-CorrectOverloadRetTy callee(double x);
-#pragma clang force_cuda_host_device begin
-IncorrectOverloadRetTy callee(int x);
-inline CorrectOverloadRetTy implicit_hd_caller() {
- return callee(1.0);
-}
-#pragma clang force_cuda_host_device end
-}
-
-// Test resolving implicit host device candidate vs same-sided candidate.
-// In host compilation, implicit host device caller choose implicit host
-// device candidate and same-sided candidate with equal preference.
-// Resolution result should not change with/without pragma.
-namespace ImplicitHostDeviceVsSameSide {
-IncorrectOverloadRetTy callee(int x);
-#pragma clang force_cuda_host_device begin
-CorrectOverloadRetTy callee(double x);
-inline CorrectOverloadRetTy implicit_hd_caller() {
- return callee(1.0);
-}
-#pragma clang force_cuda_host_device end
-}
-
-// Test resolving explicit host device candidate vs. wrong-sided candidate.
-// Explicit host device caller favors host device candidate against wrong-sided
-// candidate.
-namespace ExplicitHostDeviceVsWrongSided {
-CorrectOverloadRetTy callee(double x);
-__host__ __device__ IncorrectOverloadRetTy callee(int x);
-inline __host__ __device__ CorrectOverloadRetTy explicit_hd_caller() {
- return callee(1.0);
-#if __CUDA_ARCH__
- // expected-error at -2 {{no viable conversion from returned value of type 'IncorrectOverloadRetTy' to function return type 'CorrectOverloadRetTy'}}
-#endif
-}
-}
-
-// In the implicit host device function 'caller', the second 'callee' should be
-// chosen since it has better match, even though it is an implicit host device
-// function whereas the first 'callee' is a host function. A diagnostic will be
-// emitted if the first 'callee' is chosen since deduced return type cannot be
-// used before it is defined.
-namespace ImplicitHostDeviceByConstExpr {
-template <class a> a b;
-auto callee(...);
-template <class d> constexpr auto callee(d) -> decltype(0);
-struct e {
- template <class ad, class... f> static auto g(ad, f...) {
- return h<e, decltype(b<f>)...>;
- }
- struct i {
- template <class, class... f> static constexpr auto caller(f... k) {
- return callee(k...);
- }
- };
- template <class, class... f> static auto h() {
- return i::caller<int, f...>;
- }
-};
-class l {
- l() {
- e::g([] {}, this);
- }
-};
-}
-
-// Implicit HD candidate competes with device candidate.
-// a and b have implicit HD copy ctor. In copy ctor of b, ctor of a is resolved.
-// copy ctor of a should win over a(short), otherwise there will be ambiguity
-// due to conversion operator.
-namespace TestImplicitHDWithD {
- struct a {
- __device__ a(short);
- __device__ operator unsigned() const;
- __device__ operator int() const;
- };
- struct b {
- a d;
- };
- void f(b g) { b e = g; }
-}
-
-// Implicit HD candidate competes with host candidate.
-// a and b have implicit HD copy ctor. In copy ctor of b, ctor of a is resolved.
-// copy ctor of a should win over a(short), otherwise there will be ambiguity
-// due to conversion operator.
-namespace TestImplicitHDWithH {
- struct a {
- a(short);
- __device__ operator unsigned() const;
- __device__ operator int() const;
- };
- struct b {
- a d;
- };
- void f(b g) { b e = g; }
-}
-
-// Implicit HD candidate comptes with HD candidate.
-// a and b have implicit HD copy ctor. In copy ctor of b, ctor of a is resolved.
-// copy ctor of a should win over a(short), otherwise there will be ambiguity
-// due to conversion operator.
-namespace TestImplicitHDWithHD {
- struct a {
- __host__ __device__ a(short);
- __device__ operator unsigned() const;
- __device__ operator int() const;
- };
- struct b {
- a d;
- };
- void f(b g) { b e = g; }
-}
More information about the cfe-commits
mailing list