r289091 - [CUDA] Ignore implicit target attributes during function template instantiation.

Artem Belevich via cfe-commits cfe-commits at lists.llvm.org
Thu Dec 8 11:38:14 PST 2016


Author: tra
Date: Thu Dec  8 13:38:13 2016
New Revision: 289091

URL: http://llvm.org/viewvc/llvm-project?rev=289091&view=rev
Log:
[CUDA] Ignore implicit target attributes during function template instantiation.

Some functions and templates are treated as __host__ __device__ even
when they don't have explicitly specified target attributes.
What's worse, this treatment may change depending on command line
options (-fno-cuda-host-device-constexpr) or
#pragma clang force_cuda_host_device.

Combined with strict checking for matching function target that comes
with D25809(r288962), it makes it hard to write code which would
explicitly instantiate or specialize some functions regardless of
pragmas or command line options in effect.

This patch changes the way we match target attributes of base template
vs attributes used in explicit instantiation or specialization so that
only explicitly specified attributes are considered. This makes base
template selection behave consistently regardless of pragma of command
line options that may affect CUDA target.

Differential Revision: https://reviews.llvm.org/D25845

Modified:
    cfe/trunk/include/clang/Sema/Sema.h
    cfe/trunk/lib/Sema/SemaCUDA.cpp
    cfe/trunk/lib/Sema/SemaDecl.cpp
    cfe/trunk/lib/Sema/SemaTemplate.cpp
    cfe/trunk/test/SemaCUDA/function-template-overload.cu

Modified: cfe/trunk/include/clang/Sema/Sema.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Sema/Sema.h?rev=289091&r1=289090&r2=289091&view=diff
==============================================================================
--- cfe/trunk/include/clang/Sema/Sema.h (original)
+++ cfe/trunk/include/clang/Sema/Sema.h Thu Dec  8 13:38:13 2016
@@ -9420,7 +9420,8 @@ public:
   ///
   /// Use this rather than examining the function's attributes yourself -- you
   /// will get it wrong.  Returns CFT_Host if D is null.
-  CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D);
+  CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D,
+                                        bool IgnoreImplicitHDAttr = false);
   CUDAFunctionTarget IdentifyCUDATarget(const AttributeList *Attr);
 
   /// Gets the CUDA target for the current context.
@@ -9522,7 +9523,10 @@ public:
 
   /// Check whether NewFD is a valid overload for CUDA. Emits
   /// diagnostics and invalidates NewFD if not.
-  void checkCUDATargetOverload(FunctionDecl *NewFD, LookupResult &Previous);
+  void checkCUDATargetOverload(FunctionDecl *NewFD,
+                               const LookupResult &Previous);
+  /// Copies target attributes from the template TD to the function FD.
+  void inheritCUDATargetAttrs(FunctionDecl *FD, const FunctionTemplateDecl &TD);
 
   /// \name Code completion
   //@{

Modified: cfe/trunk/lib/Sema/SemaCUDA.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaCUDA.cpp?rev=289091&r1=289090&r2=289091&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaCUDA.cpp (original)
+++ cfe/trunk/lib/Sema/SemaCUDA.cpp Thu Dec  8 13:38:13 2016
@@ -93,8 +93,17 @@ Sema::CUDAFunctionTarget Sema::IdentifyC
   return CFT_Host;
 }
 
+template <typename A>
+static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) {
+  return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
+           return isa<A>(Attribute) &&
+                  !(IgnoreImplicitAttr && Attribute->isImplicit());
+         });
+}
+
 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function
-Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
+Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
+                                                  bool IgnoreImplicitHDAttr) {
   // Code that lives outside a function is run on the host.
   if (D == nullptr)
     return CFT_Host;
@@ -105,13 +114,13 @@ Sema::CUDAFunctionTarget Sema::IdentifyC
   if (D->hasAttr<CUDAGlobalAttr>())
     return CFT_Global;
 
-  if (D->hasAttr<CUDADeviceAttr>()) {
-    if (D->hasAttr<CUDAHostAttr>())
+  if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) {
+    if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr))
       return CFT_HostDevice;
     return CFT_Device;
-  } else if (D->hasAttr<CUDAHostAttr>()) {
+  } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) {
     return CFT_Host;
-  } else if (D->isImplicit()) {
+  } else if (D->isImplicit() && !IgnoreImplicitHDAttr) {
     // Some implicit declarations (like intrinsic functions) are not marked.
     // Set the most lenient target on them for maximal flexibility.
     return CFT_HostDevice;
@@ -856,7 +865,7 @@ void Sema::CUDASetLambdaAttrs(CXXMethodD
 }
 
 void Sema::checkCUDATargetOverload(FunctionDecl *NewFD,
-                                   LookupResult &Previous) {
+                                   const LookupResult &Previous) {
   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
   CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD);
   for (NamedDecl *OldND : Previous) {
@@ -883,3 +892,21 @@ void Sema::checkCUDATargetOverload(Funct
     }
   }
 }
+
+template <typename AttrTy>
+static void copyAttrIfPresent(Sema &S, FunctionDecl *FD,
+                              const FunctionDecl &TemplateFD) {
+  if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
+    AttrTy *Clone = Attribute->clone(S.Context);
+    Clone->setInherited(true);
+    FD->addAttr(Clone);
+  }
+}
+
+void Sema::inheritCUDATargetAttrs(FunctionDecl *FD,
+                                  const FunctionTemplateDecl &TD) {
+  const FunctionDecl &TemplateFD = *TD.getTemplatedDecl();
+  copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD);
+  copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD);
+  copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD);
+}

Modified: cfe/trunk/lib/Sema/SemaDecl.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaDecl.cpp?rev=289091&r1=289090&r2=289091&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaDecl.cpp (original)
+++ cfe/trunk/lib/Sema/SemaDecl.cpp Thu Dec  8 13:38:13 2016
@@ -8305,9 +8305,6 @@ Sema::ActOnFunctionDeclarator(Scope *S,
   // Handle attributes.
   ProcessDeclAttributes(S, NewFD, D);
 
-  if (getLangOpts().CUDA)
-    maybeAddCUDAHostDeviceAttrs(NewFD, Previous);
-
   if (getLangOpts().OpenCL) {
     // OpenCL v1.1 s6.5: Using an address space qualifier in a function return
     // type declaration will generate a compilation error.
@@ -8410,6 +8407,15 @@ Sema::ActOnFunctionDeclarator(Scope *S,
       TemplateArgs.setRAngleLoc(D.getIdentifierLoc());
     }
 
+    // We do not add HD attributes to specializations here because
+    // they may have different constexpr-ness compared to their
+    // templates and, after maybeAddCUDAHostDeviceAttrs() is applied,
+    // may end up with different effective targets. Instead, a
+    // specialization inherits its target attributes from its template
+    // in the CheckFunctionTemplateSpecialization() call below.
+    if (getLangOpts().CUDA & !isFunctionTemplateSpecialization)
+      maybeAddCUDAHostDeviceAttrs(NewFD, Previous);
+
     // If it's a friend (and only if it's a friend), it's possible
     // that either the specialized function type or the specialized
     // template is dependent, and therefore matching will fail.  In

Modified: cfe/trunk/lib/Sema/SemaTemplate.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaTemplate.cpp?rev=289091&r1=289090&r2=289091&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaTemplate.cpp (original)
+++ cfe/trunk/lib/Sema/SemaTemplate.cpp Thu Dec  8 13:38:13 2016
@@ -7043,13 +7043,15 @@ bool Sema::CheckFunctionTemplateSpeciali
         continue;
       }
 
-      // Target attributes are part of function signature during cuda
-      // compilation, so deduced template must also have matching CUDA
-      // target. Given that regular template deduction does not take
-      // target attributes into account, we perform target match check
-      // here and reject candidates that have different target.
+      // Target attributes are part of the cuda function signature, so
+      // the deduced template's cuda target must match that of the
+      // specialization.  Given that C++ template deduction does not
+      // take target attributes into account, we reject candidates
+      // here that have a different target.
       if (LangOpts.CUDA &&
-          IdentifyCUDATarget(Specialization) != IdentifyCUDATarget(FD)) {
+          IdentifyCUDATarget(Specialization,
+                             /* IgnoreImplicitHDAttributes = */ true) !=
+              IdentifyCUDATarget(FD, /* IgnoreImplicitHDAttributes = */ true)) {
         FailedCandidates.addCandidate().set(
             I.getPair(), FunTmpl->getTemplatedDecl(),
             MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info));
@@ -7166,6 +7168,14 @@ bool Sema::CheckFunctionTemplateSpeciali
       SpecInfo->getTemplateSpecializationKind(),
       ExplicitTemplateArgs ? &ConvertedTemplateArgs[Specialization] : nullptr);
 
+  // A function template specialization inherits the target attributes
+  // of its template.  (We require the attributes explicitly in the
+  // code to match, but a template may have implicit attributes by
+  // virtue e.g. of being constexpr, and it passes these implicit
+  // attributes on to its specializations.)
+  if (LangOpts.CUDA)
+    inheritCUDATargetAttrs(FD, *Specialization->getPrimaryTemplate());
+
   // The "previous declaration" for this function template specialization is
   // the prior function template specialization.
   Previous.clear();
@@ -8154,24 +8164,19 @@ DeclResult Sema::ActOnExplicitInstantiat
       continue;
     }
 
-    // Target attributes are part of function signature during cuda
-    // compilation, so deduced template must also have matching CUDA
-    // target. Given that regular template deduction does not take it
-    // into account, we perform target match check here and reject
-    // candidates that have different target.
-    if (LangOpts.CUDA) {
-      CUDAFunctionTarget DeclaratorTarget = IdentifyCUDATarget(Attr);
-      // We need to adjust target when HD is forced by
-      // #pragma clang force_cuda_host_device
-      if (ForceCUDAHostDeviceDepth > 0 &&
-          (DeclaratorTarget == CFT_Device || DeclaratorTarget == CFT_Host))
-        DeclaratorTarget = CFT_HostDevice;
-      if (IdentifyCUDATarget(Specialization) != DeclaratorTarget) {
-        FailedCandidates.addCandidate().set(
-            P.getPair(), FunTmpl->getTemplatedDecl(),
-            MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info));
-        continue;
-      }
+    // Target attributes are part of the cuda function signature, so
+    // the cuda target of the instantiated function must match that of its
+    // template.  Given that C++ template deduction does not take
+    // target attributes into account, we reject candidates here that
+    // have a different target.
+    if (LangOpts.CUDA &&
+        IdentifyCUDATarget(Specialization,
+                           /* IgnoreImplicitHDAttributes = */ true) !=
+            IdentifyCUDATarget(Attr)) {
+      FailedCandidates.addCandidate().set(
+          P.getPair(), FunTmpl->getTemplatedDecl(),
+          MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info));
+      continue;
     }
 
     Matches.addDecl(Specialization, P.getAccess());

Modified: cfe/trunk/test/SemaCUDA/function-template-overload.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCUDA/function-template-overload.cu?rev=289091&r1=289090&r2=289091&view=diff
==============================================================================
--- cfe/trunk/test/SemaCUDA/function-template-overload.cu (original)
+++ cfe/trunk/test/SemaCUDA/function-template-overload.cu Thu Dec  8 13:38:13 2016
@@ -31,7 +31,8 @@ template  <> __device__ DType overload_h
 template  <> __host__ HType overload_h_d(long a); // OK. instantiates H
 
 
-// Can't overload HD template with H or D template, though functions are OK.
+// Can't overload HD template with H or D template, though
+// non-template functions are OK.
 template <typename T> __host__ __device__ HDType overload_hd(T a) { return HDType(); }
 // expected-note at -1 {{previous declaration is here}}
 // expected-note at -2 2 {{candidate template ignored: could not match 'HDType' against 'HType'}}
@@ -56,24 +57,54 @@ template <typename T> __host__ HType ove
 template <typename T> __host__ __device__ HDType overload_h_d2(T a) { return HDType(); }
 template <typename T1, typename T2 = int> __device__ DType overload_h_d2(T1 a) { T1 x; T2 y; return DType(); }
 
+// constexpr functions are implicitly HD, but explicit
+// instantiation/specialization must use target attributes as written.
+template <typename T> constexpr T overload_ce_implicit_hd(T a) { return a+1; }
+// expected-note at -1 3 {{candidate template ignored: target attributes do not match}}
+
+// These will not match the template.
+template __host__ __device__ int overload_ce_implicit_hd(int a);
+// expected-error at -1 {{explicit instantiation of 'overload_ce_implicit_hd' does not refer to a function template, variable template, member function, member class, or static data member}}
+template <> __host__ __device__ long overload_ce_implicit_hd(long a);
+// expected-error at -1 {{no function template matches function template specialization 'overload_ce_implicit_hd'}}
+template <> __host__ __device__ constexpr long overload_ce_implicit_hd(long a);
+// expected-error at -1 {{no function template matches function template specialization 'overload_ce_implicit_hd'}}
+
+// These should work, because template matching ignores the implicit
+// HD attributes the compiler gives to constexpr functions/templates,
+// so 'overload_ce_implicit_hd' template will match __host__ functions
+// only.
+template __host__ int overload_ce_implicit_hd(int a);
+template <> __host__ long overload_ce_implicit_hd(long a);
+
+template float overload_ce_implicit_hd(float a);
+template <> float* overload_ce_implicit_hd(float *a);
+template <> constexpr double overload_ce_implicit_hd(double a) { return a + 3.0; };
+
 __host__ void hf() {
   overload_hd(13);
+  overload_ce_implicit_hd('h');        // Implicitly instantiated
+  overload_ce_implicit_hd(1.0f);       // Explicitly instantiated
+  overload_ce_implicit_hd(2.0);        // Explicitly specialized
 
   HType h = overload_h_d(10);
   HType h2i = overload_h_d2<int>(11);
   HType h2ii = overload_h_d2<int>(12);
 
   // These should be implicitly instantiated from __host__ template returning HType.
-  DType d = overload_h_d(20); // expected-error {{no viable conversion from 'HType' to 'DType'}}
-  DType d2i = overload_h_d2<int>(21); // expected-error {{no viable conversion from 'HType' to 'DType'}}
+  DType d = overload_h_d(20);          // expected-error {{no viable conversion from 'HType' to 'DType'}}
+  DType d2i = overload_h_d2<int>(21);  // expected-error {{no viable conversion from 'HType' to 'DType'}}
   DType d2ii = overload_h_d2<int>(22); // expected-error {{no viable conversion from 'HType' to 'DType'}}
 }
 __device__ void df() {
   overload_hd(23);
+  overload_ce_implicit_hd('d');        // Implicitly instantiated
+  overload_ce_implicit_hd(1.0f);       // Explicitly instantiated
+  overload_ce_implicit_hd(2.0);        // Explicitly specialized
 
   // These should be implicitly instantiated from __device__ template returning DType.
-  HType h = overload_h_d(10); // expected-error {{no viable conversion from 'DType' to 'HType'}}
-  HType h2i = overload_h_d2<int>(11); // expected-error {{no viable conversion from 'DType' to 'HType'}}
+  HType h = overload_h_d(10);          // expected-error {{no viable conversion from 'DType' to 'HType'}}
+  HType h2i = overload_h_d2<int>(11);  // expected-error {{no viable conversion from 'DType' to 'HType'}}
   HType h2ii = overload_h_d2<int>(12); // expected-error {{no viable conversion from 'DType' to 'HType'}}
 
   DType d = overload_h_d(20);




More information about the cfe-commits mailing list