[clang] [Clang][HIP] Target-dependent overload resolution in declarators and specifiers (PR #103031)

Mon Aug 19 23:49:35 PDT 2024

https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/103031

>From 274aaef1847bbdd837213064113adb1182e5bb59 Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Tue, 13 Aug 2024 05:27:45 -0400
Subject: [PATCH 1/4] [Clang][HIP] Target-dependent overload resolution in
 declarators and specifiers

So far, the resolution of host/device overloads for functions in HIP/CUDA
operates as if in a host-device context for code outside of function bodies,
e.g., in expressions that are part of template arguments in top-level
declarations. This means that, if separate host and device overloads are
declared, the device overload is used in the device compilation phase and the
host overload is used in the host compilation phase.

This patch changes overload resolution in such cases to prefer overloads that
match the target of the declaration in which they occur. For example:

__device__ constexpr int get_n() { return 64; }
__host__ constexpr int get_n() { return -1; }
__device__ std::enable_if<(get_n() > 32)>::type foo() { }

Before, this code would not compile, because get_n resolved to the host
overload during host compilation, causing an error. With this patch, the call
to get_n in the declaration of the device function foo resolves to the device
overload in host and device compilation.

If attributes that affect the declaration's target occur after a call with
target-dependent overload resolution, a warning is issued. This is realized by
registering the Kinds of relevant attributes in the CUDATargetContext when they
are parsed.

This is an alternative to PR #93546, which is required for PR #91478.
---
 .../clang/Basic/DiagnosticSemaKinds.td        |   4 +
 clang/include/clang/Sema/SemaCUDA.h           |  32 +-
 clang/lib/Parse/ParseDecl.cpp                 |   6 +
 clang/lib/Parse/ParseDeclCXX.cpp              |   6 +
 clang/lib/Parse/Parser.cpp                    |   8 +
 clang/lib/Sema/SemaCUDA.cpp                   | 113 ++-
 clang/lib/Sema/SemaOverload.cpp               |   2 +-
 .../target-overloads-availability-warnings.cu | 148 ++++
 ...target-overloads-in-function-prototypes.cu | 690 ++++++++++++++++++
 9 files changed, 988 insertions(+), 21 deletions(-)
 create mode 100644 clang/test/SemaCUDA/target-overloads-availability-warnings.cu
 create mode 100644 clang/test/SemaCUDA/target-overloads-in-function-prototypes.cu

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 554dbaff2ce0d8..8709f60678b466 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9017,6 +9017,10 @@ def err_global_call_not_config : Error<
 def err_ref_bad_target : Error<
   "reference to %select{__device__|__global__|__host__|__host__ __device__}0 "
   "%select{function|variable}1 %2 in %select{__device__|__global__|__host__|__host__ __device__}3 function">;
+def warn_target_specfier_ignored : Warning<
+  "target specifier has been ignored for overload resolution; "
+  "move the target specifier to the beginning of the declaration to use it for overload resolution">,
+  InGroup<IgnoredAttributes>;
 def note_cuda_const_var_unpromoted : Note<
   "const variable cannot be emitted on device side due to dynamic initialization">;
 def note_cuda_host_var : Note<
diff --git a/clang/include/clang/Sema/SemaCUDA.h b/clang/include/clang/Sema/SemaCUDA.h
index 63dc3f4da240b3..83083ada889a16 100644
--- a/clang/include/clang/Sema/SemaCUDA.h
+++ b/clang/include/clang/Sema/SemaCUDA.h
@@ -104,6 +104,8 @@ class SemaCUDA : public SemaBase {
   CUDAFunctionTarget IdentifyTarget(const FunctionDecl *D,
                                     bool IgnoreImplicitHDAttr = false);
   CUDAFunctionTarget IdentifyTarget(const ParsedAttributesView &Attrs);
+  CUDAFunctionTarget IdentifyTarget(
+      const SmallVectorImpl<clang::AttributeCommonInfo::Kind> &AttrKinds);
 
   enum CUDAVariableTarget {
     CVT_Device,  /// Emitted on device side with a shadow variable on host side
@@ -120,21 +122,43 @@ class SemaCUDA : public SemaBase {
     CTCK_Unknown,       /// Unknown context
     CTCK_InitGlobalVar, /// Function called during global variable
                         /// initialization
+    CTCK_Declaration,   /// Function called in a declaration specifier or
+                        /// declarator outside of other contexts, usually in
+                        /// template arguments.
   };
 
   /// Define the current global CUDA host/device context where a function may be
   /// called. Only used when a function is called outside of any functions.
-  struct CUDATargetContext {
-    CUDAFunctionTarget Target = CUDAFunctionTarget::HostDevice;
+  class CUDATargetContext {
+  public:
     CUDATargetContextKind Kind = CTCK_Unknown;
-    Decl *D = nullptr;
+
+    CUDATargetContext() = default;
+
+    CUDATargetContext(SemaCUDA *S, CUDATargetContextKind Kind,
+                      CUDAFunctionTarget Target);
+
+    CUDAFunctionTarget getTarget();
+
+    /// If this is a CTCK_Declaration context, update the Target based on Attrs.
+    /// No-op otherwise.
+    /// Issues a diagnostic if the target changes after it has been queried
+    /// before.
+    void tryRegisterTargetAttrs(const ParsedAttributesView &Attrs);
+
+  private:
+    SemaCUDA *S = nullptr;
+    CUDAFunctionTarget Target = CUDAFunctionTarget::HostDevice;
+    SmallVector<clang::AttributeCommonInfo::Kind, 0> AttrKinds;
+    bool TargetQueried = false;
+
   } CurCUDATargetCtx;
 
   struct CUDATargetContextRAII {
     SemaCUDA &S;
     SemaCUDA::CUDATargetContext SavedCtx;
     CUDATargetContextRAII(SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K,
-                          Decl *D);
+                          Decl *D = nullptr);
     ~CUDATargetContextRAII() { S.CurCUDATargetCtx = SavedCtx; }
   };
 
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index a8a9d3f3f5b088..615aa8e4c5df02 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -311,6 +311,9 @@ void Parser::ParseGNUAttributes(ParsedAttributes &Attrs,
   }
 
   Attrs.Range = SourceRange(StartLoc, EndLoc);
+
+  if (Actions.getLangOpts().CUDA)
+    Actions.CUDA().CurCUDATargetCtx.tryRegisterTargetAttrs(Attrs);
 }
 
 /// Determine whether the given attribute has an identifier argument.
@@ -1003,6 +1006,9 @@ void Parser::ParseMicrosoftDeclSpecs(ParsedAttributes &Attrs) {
   }
 
   Attrs.Range = SourceRange(StartLoc, EndLoc);
+
+  if (Actions.getLangOpts().CUDA)
+    Actions.CUDA().CurCUDATargetCtx.tryRegisterTargetAttrs(Attrs);
 }
 
 void Parser::ParseMicrosoftTypeAttributes(ParsedAttributes &attrs) {
diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp
index aac89d910bbc83..00010731043330 100644
--- a/clang/lib/Parse/ParseDeclCXX.cpp
+++ b/clang/lib/Parse/ParseDeclCXX.cpp
@@ -27,6 +27,7 @@
 #include "clang/Sema/EnterExpressionEvaluationContext.h"
 #include "clang/Sema/ParsedTemplate.h"
 #include "clang/Sema/Scope.h"
+#include "clang/Sema/SemaCUDA.h"
 #include "clang/Sema/SemaCodeCompletion.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/TimeProfiler.h"
@@ -2852,6 +2853,11 @@ Parser::DeclGroupPtrTy Parser::ParseCXXClassMemberDeclaration(
     ParsedTemplateInfo &TemplateInfo, ParsingDeclRAIIObject *TemplateDiags) {
   assert(getLangOpts().CPlusPlus &&
          "ParseCXXClassMemberDeclaration should only be called in C++ mode");
+  SemaCUDA::CUDATargetContextRAII CTCRAII(Actions.CUDA(),
+                                          SemaCUDA::CTCK_Declaration);
+  if (Actions.getLangOpts().CUDA)
+    Actions.CUDA().CurCUDATargetCtx.tryRegisterTargetAttrs(AccessAttrs);
+
   if (Tok.is(tok::at)) {
     if (getLangOpts().ObjC && NextToken().isObjCAtKeyword(tok::objc_defs))
       Diag(Tok, diag::err_at_defs_cxx);
diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp
index 04c2f1d380bc48..b7bc11964e9687 100644
--- a/clang/lib/Parse/Parser.cpp
+++ b/clang/lib/Parse/Parser.cpp
@@ -21,6 +21,7 @@
 #include "clang/Sema/DeclSpec.h"
 #include "clang/Sema/ParsedTemplate.h"
 #include "clang/Sema/Scope.h"
+#include "clang/Sema/SemaCUDA.h"
 #include "clang/Sema/SemaCodeCompletion.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/TimeProfiler.h"
@@ -1133,6 +1134,13 @@ bool Parser::isStartOfFunctionDefinition(const ParsingDeclarator &Declarator) {
 Parser::DeclGroupPtrTy Parser::ParseDeclOrFunctionDefInternal(
     ParsedAttributes &Attrs, ParsedAttributes &DeclSpecAttrs,
     ParsingDeclSpec &DS, AccessSpecifier AS) {
+  SemaCUDA::CUDATargetContextRAII CTCRAII(Actions.CUDA(),
+                                          SemaCUDA::CTCK_Declaration);
+  if (Actions.getLangOpts().CUDA) {
+    Actions.CUDA().CurCUDATargetCtx.tryRegisterTargetAttrs(Attrs);
+    Actions.CUDA().CurCUDATargetCtx.tryRegisterTargetAttrs(DeclSpecAttrs);
+  }
+
   // Because we assume that the DeclSpec has not yet been initialised, we simply
   // overwrite the source range and attribute the provided leading declspec
   // attributes.
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index ec37c0df56c671..b16c50a95ccba2 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -18,6 +18,7 @@
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Sema/Lookup.h"
+#include "clang/Sema/ParsedAttr.h"
 #include "clang/Sema/ScopeInfo.h"
 #include "clang/Sema/Sema.h"
 #include "clang/Sema/SemaDiagnostic.h"
@@ -68,13 +69,28 @@ ExprResult SemaCUDA::ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc,
                                /*IsExecConfig=*/true);
 }
 
-CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) {
+namespace {
+
+// This iterator adaptor enables sharing a IdentifyTarget implementation for
+// ParsedAttributesView and for vectors of AttributeCommonInfo::Kind.
+struct AttrKindIterator
+    : llvm::iterator_adaptor_base<
+          AttrKindIterator, ParsedAttributesView::const_iterator,
+          std::random_access_iterator_tag, clang::AttributeCommonInfo::Kind> {
+  AttrKindIterator() : iterator_adaptor_base(nullptr) {}
+  AttrKindIterator(ParsedAttributesView::const_iterator I)
+      : iterator_adaptor_base(I) {}
+  clang::AttributeCommonInfo::Kind operator*() const { return I->getKind(); }
+};
+
+template <typename AKIterRange>
+CUDAFunctionTarget IdentifyTargetImpl(const AKIterRange &AttrKinds) {
   bool HasHostAttr = false;
   bool HasDeviceAttr = false;
   bool HasGlobalAttr = false;
   bool HasInvalidTargetAttr = false;
-  for (const ParsedAttr &AL : Attrs) {
-    switch (AL.getKind()) {
+  for (const auto &AK : AttrKinds) {
+    switch (AK) {
     case ParsedAttr::AT_CUDAGlobal:
       HasGlobalAttr = true;
       break;
@@ -107,6 +123,18 @@ CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) {
   return CUDAFunctionTarget::Host;
 }
 
+} // namespace
+
+CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) {
+  return IdentifyTargetImpl(make_range(AttrKindIterator(Attrs.begin()),
+                                       AttrKindIterator(Attrs.end())));
+}
+
+CUDAFunctionTarget SemaCUDA::IdentifyTarget(
+    const SmallVectorImpl<clang::AttributeCommonInfo::Kind> &AttrKinds) {
+  return IdentifyTargetImpl(AttrKinds);
+}
+
 template <typename A>
 static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) {
   return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
@@ -115,20 +143,65 @@ static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) {
          });
 }
 
+SemaCUDA::CUDATargetContext::CUDATargetContext(SemaCUDA *S,
+                                               CUDATargetContextKind Kind,
+                                               CUDAFunctionTarget Target)
+    : Kind(Kind), S(S), Target(Target) {}
+
+CUDAFunctionTarget SemaCUDA::CUDATargetContext::getTarget() {
+  TargetQueried = true;
+  return Target;
+}
+
+void SemaCUDA::CUDATargetContext::tryRegisterTargetAttrs(
+    const ParsedAttributesView &Attrs) {
+  if (Kind != CTCK_Declaration)
+    return;
+  for (const auto &A : Attrs) {
+    auto AK = A.getKind();
+    switch (AK) {
+    case ParsedAttr::AT_CUDAGlobal:
+    case ParsedAttr::AT_CUDAHost:
+    case ParsedAttr::AT_CUDADevice:
+    case ParsedAttr::AT_CUDAInvalidTarget:
+      break;
+    default:
+      continue;
+    }
+    AttrKinds.push_back(AK);
+    CUDAFunctionTarget NewTarget = S->IdentifyTarget(AttrKinds);
+    if (TargetQueried && (NewTarget != Target))
+      S->Diag(A.getLoc(), diag::warn_target_specfier_ignored);
+    Target = NewTarget;
+  }
+}
+
 SemaCUDA::CUDATargetContextRAII::CUDATargetContextRAII(
     SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K, Decl *D)
     : S(S_) {
   SavedCtx = S.CurCUDATargetCtx;
-  assert(K == SemaCUDA::CTCK_InitGlobalVar);
-  auto *VD = dyn_cast_or_null<VarDecl>(D);
-  if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) {
-    auto Target = CUDAFunctionTarget::Host;
-    if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) &&
-         !hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) ||
-        hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) ||
-        hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true))
-      Target = CUDAFunctionTarget::Device;
-    S.CurCUDATargetCtx = {Target, K, VD};
+
+  switch (K) {
+  case SemaCUDA::CTCK_InitGlobalVar: {
+    auto *VD = dyn_cast_or_null<VarDecl>(D);
+    if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) {
+      auto Target = CUDAFunctionTarget::Host;
+      if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) &&
+           !hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) ||
+          hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) ||
+          hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true))
+        Target = CUDAFunctionTarget::Device;
+      S.CurCUDATargetCtx = CUDATargetContext(&S, K, Target);
+    }
+    break;
+  }
+  case SemaCUDA::CTCK_Declaration:
+    // The target is updated once relevant attributes are parsed. Initialize
+    // with the target used if no attributes are present: Host.
+    S.CurCUDATargetCtx = CUDATargetContext(&S, K, CUDAFunctionTarget::Host);
+    break;
+  default:
+    llvm_unreachable("unexpected context kind");
   }
 }
 
@@ -137,7 +210,7 @@ CUDAFunctionTarget SemaCUDA::IdentifyTarget(const FunctionDecl *D,
                                             bool IgnoreImplicitHDAttr) {
   // Code that lives outside a function gets the target from CurCUDATargetCtx.
   if (D == nullptr)
-    return CurCUDATargetCtx.Target;
+    return CurCUDATargetCtx.getTarget();
 
   if (D->hasAttr<CUDAInvalidTargetAttr>())
     return CUDAFunctionTarget::InvalidTarget;
@@ -232,7 +305,7 @@ SemaCUDA::IdentifyPreference(const FunctionDecl *Caller,
   // trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor
   // will be diagnosed by checkAllowedInitializer.
   if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar &&
-      CurCUDATargetCtx.Target == CUDAFunctionTarget::Device &&
+      CurCUDATargetCtx.getTarget() == CUDAFunctionTarget::Device &&
       (isa<CXXConstructorDecl>(Callee) || isa<CXXDestructorDecl>(Callee)))
     return CFP_HostDevice;
 
@@ -297,8 +370,16 @@ SemaCUDA::IdentifyPreference(const FunctionDecl *Caller,
       (CallerTarget == CUDAFunctionTarget::Device &&
        CalleeTarget == CUDAFunctionTarget::Host) ||
       (CallerTarget == CUDAFunctionTarget::Global &&
-       CalleeTarget == CUDAFunctionTarget::Host))
+       CalleeTarget == CUDAFunctionTarget::Host)) {
+    // In declaration contexts outside of function bodies and variable
+    // initializers, tolerate mismatched function targets as long as they are
+    // not codegened.
+    if (CurCUDATargetCtx.Kind == CTCK_Declaration &&
+        !this->SemaRef.getCurFunctionDecl(/*AllowLambda=*/true))
+      return CFP_WrongSide;
+
     return CFP_Never;
+  }
 
   llvm_unreachable("All cases should've been handled by now.");
 }
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 52f640eb96b73b..e3703c2c735fe1 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -10747,7 +10747,7 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc,
         llvm::any_of(Candidates, [&](OverloadCandidate *Cand) {
           // Check viable function only.
           return Cand->Viable && Cand->Function &&
-                 S.CUDA().IdentifyPreference(Caller, Cand->Function) ==
+                 S.CUDA().IdentifyPreference(Caller, Cand->Function) >=
                      SemaCUDA::CFP_SameSide;
         });
     if (ContainsSameSideCandidate) {
diff --git a/clang/test/SemaCUDA/target-overloads-availability-warnings.cu b/clang/test/SemaCUDA/target-overloads-availability-warnings.cu
new file mode 100644
index 00000000000000..f0fc1bea1db642
--- /dev/null
+++ b/clang/test/SemaCUDA/target-overloads-availability-warnings.cu
@@ -0,0 +1,148 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify=expected,onhost %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify=expected,ondevice %s
+
+template <bool C, class T = void> struct my_enable_if {};
+
+template <class T> struct my_enable_if<true, T> {
+  typedef T type;
+};
+
+__attribute__((host, device)) void use(int x);
+
+// For 'OverloadFunHostDepr', the host overload is deprecated, the device overload is not.
+__attribute__((device)) constexpr int OverloadFunHostDepr(void) { return 1; }
+__attribute__((host, deprecated("Host variant"))) constexpr int OverloadFunHostDepr(void) { return 1; } // expected-note 0+ {{has been explicitly marked deprecated here}}
+
+
+// For 'OverloadFunDeviceDepr', the device overload is deprecated, the host overload is not.
+__attribute__((device, deprecated("Device variant"))) constexpr int OverloadFunDeviceDepr(void) { return 1; } // expected-note 0+ {{has been explicitly marked deprecated here}}
+__attribute__((host)) constexpr int OverloadFunDeviceDepr(void) { return 1; }
+
+
+// For 'TemplateOverloadFun', the host overload is deprecated, the device overload is not.
+template<typename T>
+__attribute__((device)) constexpr T TemplateOverloadFun(void) { return 1; }
+
+template<typename T>
+__attribute__((host, deprecated("Host variant"))) constexpr T TemplateOverloadFun(void) { return 1; } // expected-note 0+ {{has been explicitly marked deprecated here}}
+
+
+// There is only a device overload, and it is deprecated.
+__attribute__((device, deprecated)) constexpr int // expected-note 0+ {{has been explicitly marked deprecated here}}
+DeviceOnlyFunDeprecated(void) { return 1; }
+
+// There is only a host overload, and it is deprecated.
+__attribute__((host, deprecated)) constexpr int // expected-note 0+ {{has been explicitly marked deprecated here}}
+HostOnlyFunDeprecated(void) { return 1; }
+
+class FunSelector {
+public:
+  // This should use the non-deprecated device overload.
+  template<int X> __attribute__((device))
+  auto devicefun(void) -> typename my_enable_if<(X == OverloadFunHostDepr()), int>::type {
+    return 1;
+  }
+
+  // This should use the non-deprecated device overload.
+  template<int X> __attribute__((device))
+  auto devicefun(void) -> typename my_enable_if<(X != OverloadFunHostDepr()), int>::type {
+      return 0;
+  }
+
+  // This should use the deprecated device overload.
+  template<int X> __attribute__((device))
+  auto devicefun_wrong(void) -> typename my_enable_if<(X == OverloadFunDeviceDepr()), int>::type { // expected-warning {{'OverloadFunDeviceDepr' is deprecated: Device variant}}
+    return 1;
+  }
+
+  // This should use the deprecated device overload.
+  template<int X> __attribute__((device))
+  auto devicefun_wrong(void) -> typename my_enable_if<(X != OverloadFunDeviceDepr()), int>::type { // expected-warning {{'OverloadFunDeviceDepr' is deprecated: Device variant}}
+      return 0;
+  }
+
+  // This should use the non-deprecated host overload.
+  template<int X> __attribute__((host))
+  auto hostfun(void) -> typename my_enable_if<(X == OverloadFunDeviceDepr()), int>::type {
+    return 1;
+  }
+
+  // This should use the non-deprecated host overload.
+  template<int X> __attribute__((host))
+  auto hostfun(void) -> typename my_enable_if<(X != OverloadFunDeviceDepr()), int>::type {
+      return 0;
+  }
+
+  // This should use the deprecated host overload.
+  template<int X> __attribute__((host))
+  auto hostfun_wrong(void) -> typename my_enable_if<(X == OverloadFunHostDepr()), int>::type { // expected-warning {{'OverloadFunHostDepr' is deprecated: Host variant}}
+    return 1;
+  }
+
+  // This should use the deprecated host overload.
+  template<int X> __attribute__((host))
+  auto hostfun_wrong(void) -> typename my_enable_if<(X != OverloadFunHostDepr()), int>::type { // expected-warning {{'OverloadFunHostDepr' is deprecated: Host variant}}
+      return 0;
+  }
+};
+
+
+// These should not be diagnosed since the device overload of
+// OverloadFunHostDepr is not deprecated:
+__attribute__((device)) my_enable_if<(OverloadFunHostDepr() > 0), int>::type
+DeviceUserOverloadFunHostDepr1(void) { return 2; }
+
+__attribute__((device)) my_enable_if<(OverloadFunHostDepr() > 0), int>::type constexpr
+DeviceUserOverloadFunHostDeprConstexpr(void) { return 2; }
+
+
+// Analogously for OverloadFunDeviceDepr:
+__attribute__((host)) my_enable_if<(OverloadFunDeviceDepr() > 0), int>::type
+DeviceUserOverloadFunDeviceDepr1(void) { return 2; }
+
+my_enable_if<(OverloadFunDeviceDepr() > 0), int>::type __attribute__((host))
+DeviceUserOverloadFunDeviceDepr2(void) { return 2; }
+
+__attribute__((host)) my_enable_if<(OverloadFunDeviceDepr() > 0), int>::type constexpr
+DeviceUserOverloadFunDeviceDeprConstexpr(void) { return 2; }
+
+
+// Actual uses of the deprecated overloads should be diagnosed:
+__attribute__((host, device)) my_enable_if<(OverloadFunHostDepr() > 0), int>::type // onhost-warning {{'OverloadFunHostDepr' is deprecated: Host variant}}
+HostDeviceUserOverloadFunHostDepr(void) { return 3; }
+
+__attribute__((host)) my_enable_if<(OverloadFunHostDepr() > 0), int>::type constexpr // expected-warning {{'OverloadFunHostDepr' is deprecated: Host variant}}
+HostUserOverloadFunHostDeprConstexpr(void) { return 3; }
+
+__attribute__((device)) my_enable_if<(OverloadFunDeviceDepr() > 0), int>::type constexpr // expected-warning {{'OverloadFunDeviceDepr' is deprecated: Device variant}}
+HostUserOverloadFunDeviceDeprConstexpr(void) { return 3; }
+
+
+// Making the offending decl a template shouldn't change anything:
+__attribute__((host)) my_enable_if<(TemplateOverloadFun<int>() > 0), int>::type // expected-warning {{'TemplateOverloadFun<int>' is deprecated: Host variant}}
+HostUserTemplateOverloadFun(void) { return 3; }
+
+__attribute__((device)) my_enable_if<(TemplateOverloadFun<int>() > 0), int>::type
+DeviceUserTemplateOverloadFun(void) { return 3; }
+
+
+__attribute__((device, deprecated)) constexpr int DeviceVarConstDepr = 1; // expected-note 0+ {{has been explicitly marked deprecated here}}
+
+// Diagnostics for uses in function bodies should work as expected:
+__attribute__((host)) void HostUser(void) {
+  use(DeviceVarConstDepr); // expected-warning {{'DeviceVarConstDepr' is deprecated}}
+  use(HostOnlyFunDeprecated()); // expected-warning {{'HostOnlyFunDeprecated' is deprecated}}
+  use(OverloadFunHostDepr()); // expected-warning {{'OverloadFunHostDepr' is deprecated: Host variant}}
+  use(TemplateOverloadFun<int>()); // expected-warning {{'TemplateOverloadFun<int>' is deprecated: Host variant}}
+
+  use(OverloadFunDeviceDepr());
+}
+
+__attribute__((device)) void DeviceUser(void) {
+  use(DeviceVarConstDepr); // expected-warning {{'DeviceVarConstDepr' is deprecated}}
+  use(DeviceOnlyFunDeprecated()); // expected-warning {{'DeviceOnlyFunDeprecated' is deprecated}}
+  use(OverloadFunDeviceDepr()); // expected-warning {{'OverloadFunDeviceDepr' is deprecated: Device variant}}
+
+  use(OverloadFunHostDepr());
+  use(TemplateOverloadFun<int>());
+}
diff --git a/clang/test/SemaCUDA/target-overloads-in-function-prototypes.cu b/clang/test/SemaCUDA/target-overloads-in-function-prototypes.cu
new file mode 100644
index 00000000000000..7a636084476a99
--- /dev/null
+++ b/clang/test/SemaCUDA/target-overloads-in-function-prototypes.cu
@@ -0,0 +1,690 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify=expected,onhost %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify=expected,ondevice %s
+
+
+// Tests to ensure that functions with host and device overloads in that are
+// called outside of function bodies and variable initializers, e.g., in
+// template arguments are resolved with respect to the declaration to which they
+// belong.
+
+// Opaque types used for tests:
+struct DeviceTy {};
+struct HostTy {};
+struct HostDeviceTy {};
+struct TemplateTy {};
+
+struct TrueTy { static const bool value = true; };
+struct FalseTy { static const bool value = false; };
+
+// Select one of two types based on a boolean condition.
+template <bool COND, typename T, typename F> struct select_type {};
+template <typename T, typename F> struct select_type<true, T, F> { typedef T type; };
+template <typename T, typename F> struct select_type<false, T, F> { typedef F type; };
+
+template <bool C> struct check : public select_type<C, TrueTy, FalseTy> { };
+
+// Check if two types are the same.
+template<class T, class U> struct is_same : public FalseTy { };
+template<class T> struct is_same<T, T> : public TrueTy { };
+
+// A static assertion that fails at compile time if the expression E does not
+// have type T.
+#define ASSERT_HAS_TYPE(E, T) static_assert(is_same<decltype(E), T>::value);
+
+
+// is_on_device() is true when called in a device context and false if called in a host context.
+__attribute__((host)) constexpr bool is_on_device(void) { return false; }
+__attribute__((device)) constexpr bool is_on_device(void) { return true; }
+
+
+// this type depends on whether it occurs in host or device code
+#define targetdep_t select_type<is_on_device(), DeviceTy, HostTy>::type
+
+// Defines and typedefs with different values in host and device compilation.
+#ifdef __CUDA_ARCH__
+#define CurrentTarget DEVICE
+typedef DeviceTy CurrentTargetTy;
+typedef DeviceTy TemplateIfHostTy;
+#else
+#define CurrentTarget HOST
+typedef HostTy CurrentTargetTy;
+typedef TemplateTy TemplateIfHostTy;
+#endif
+
+
+
+// targetdep_t in function declarations should depend on the target of the
+// declared function.
+__attribute__((device)) targetdep_t decl_ret_early_device(void);
+ASSERT_HAS_TYPE(decl_ret_early_device(), DeviceTy)
+
+__attribute__((host)) targetdep_t decl_ret_early_host(void);
+ASSERT_HAS_TYPE(decl_ret_early_host(), HostTy)
+
+__attribute__((host,device)) targetdep_t decl_ret_early_host_device(void);
+ASSERT_HAS_TYPE(decl_ret_early_host_device(), CurrentTargetTy)
+
+// If the function target is specified too late and can therefore not be
+// considered for overload resolution in targetdep_t, warn.
+targetdep_t __attribute__((device)) decl_ret_late_device(void); // expected-warning {{target specifier has been ignored for overload resolution}}
+ASSERT_HAS_TYPE(decl_ret_late_device(), HostTy)
+
+// No warning necessary if the ignored attribute doesn't change the result.
+targetdep_t __attribute__((host)) decl_ret_late_host(void);
+ASSERT_HAS_TYPE(decl_ret_late_host(), HostTy)
+
+targetdep_t __attribute__((host,device)) decl_ret_late_host_device(void); // expected-warning {{target specifier has been ignored for overload resolution}}
+ASSERT_HAS_TYPE(decl_ret_late_host_device(), HostTy)
+
+// An odd way of writing this, but it's possible.
+__attribute__((device)) targetdep_t __attribute__((host)) decl_ret_early_device_late_host(void); // expected-warning {{target specifier has been ignored for overload resolution}}
+ASSERT_HAS_TYPE(decl_ret_early_device_late_host(), DeviceTy)
+
+
+// The same for function definitions and parameter types:
+__attribute__((device)) targetdep_t ret_early_device(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_early_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+__attribute__((host)) targetdep_t ret_early_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_early_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+__attribute__((host, device)) targetdep_t ret_early_hostdevice(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_early_hostdevice({}), CurrentTargetTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+// The parameter is still after the attribute, so it needs no warning.
+targetdep_t __attribute__((device)) // expected-warning {{target specifier has been ignored for overload resolution}}
+ret_late_device(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_late_device({}), HostTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+targetdep_t __attribute__((host, device)) // expected-warning {{target specifier has been ignored for overload resolution}}
+ret_late_hostdevice(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_late_hostdevice({}), HostTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+targetdep_t __attribute__((host)) ret_late_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_late_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+__attribute__((device)) targetdep_t __attribute__((host)) // expected-warning {{target specifier has been ignored for overload resolution}}
+ret_early_device_late_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_early_device_late_host({}), DeviceTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+// The attribute is even later, so we can't choose the expected overload.
+targetdep_t ret_verylate_device(targetdep_t x) __attribute__((device)) { // expected-warning {{target specifier has been ignored for overload resolution}}
+  ASSERT_HAS_TYPE(ret_verylate_device({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+// It's possible to get two different wrong types:
+targetdep_t __attribute__((device)) // expected-warning {{target specifier has been ignored for overload resolution}}
+ret_late_device_verylate_host(targetdep_t x) __attribute__((host)) { // expected-warning {{target specifier has been ignored for overload resolution}}
+  ASSERT_HAS_TYPE(ret_late_device_verylate_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+
+// Increasingly unusual ways to specify a return type:
+
+// The attribute is specified much earlier than the overload happens, works as
+// expected.
+__attribute__((device)) auto autoret_early_device(targetdep_t x) -> targetdep_t {
+  ASSERT_HAS_TYPE(autoret_early_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+// The attribute is specified much earlier than the overload happens, works as
+// expected.
+__attribute__((host)) auto autoret_early_host(targetdep_t x) -> targetdep_t  {
+  ASSERT_HAS_TYPE(autoret_early_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+// The attribute is specified much earlier than the overload happens, works as
+// expected.
+__attribute__((host,device)) auto autoret_early_hostdevice(targetdep_t x) -> targetdep_t {
+  ASSERT_HAS_TYPE(autoret_early_hostdevice({}), CurrentTargetTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+
+// The attribute is still specified earlier than the overload happens, works as
+// expected.
+auto __attribute__((device)) autoret_late_device(targetdep_t x) -> targetdep_t {
+  ASSERT_HAS_TYPE(autoret_late_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+// The attribute is still specified earlier than the overload happens, works as
+// expected.
+auto __attribute__((host)) autoret_late_host(targetdep_t x) -> targetdep_t {
+  ASSERT_HAS_TYPE(autoret_late_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+// The attribute is still specified earlier than the overload happens, works as
+// expected.
+auto __attribute__((host,device)) autoret_late_hostdevice(targetdep_t x) -> targetdep_t {
+  ASSERT_HAS_TYPE(autoret_late_hostdevice({}), CurrentTargetTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+
+// There should be no problem if the return type is inferred from an expression in the body:
+auto __attribute__((device)) fullauto_device(targetdep_t x) {
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return (targetdep_t)(x);
+}
+ASSERT_HAS_TYPE(fullauto_device({}), DeviceTy)
+
+auto __attribute__((host)) fullauto_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(x, HostTy)
+  return (targetdep_t)(x);
+}
+ASSERT_HAS_TYPE(fullauto_host({}), HostTy)
+
+// The return type is as expected, but the argument type precedes the attribute,
+// so we don't get the right type for it.
+auto fullauto_verylate_device(targetdep_t x) __attribute__((device)) { // expected-warning {{target specifier has been ignored for overload resolution}}
+  ASSERT_HAS_TYPE(x, HostTy)
+  return targetdep_t();
+}
+ASSERT_HAS_TYPE(fullauto_verylate_device({}), DeviceTy)
+
+auto fullauto_verylate_host(targetdep_t x) __attribute__((host)) {
+  ASSERT_HAS_TYPE(x, HostTy)
+  return targetdep_t();
+}
+ASSERT_HAS_TYPE(fullauto_verylate_host({}), HostTy)
+
+
+// MS __declspec syntax:
+__declspec(__device__) targetdep_t ms_ret_early_device(targetdep_t x) {
+  ASSERT_HAS_TYPE(ms_ret_early_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+__declspec(__host__) targetdep_t ms_ret_early_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(ms_ret_early_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+__declspec(__host__) __declspec(__device__) targetdep_t ms_ret_early_hostdevice(targetdep_t x) {
+  ASSERT_HAS_TYPE(ms_ret_early_hostdevice({}), CurrentTargetTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+targetdep_t __declspec(__device__) ms_ret_late_device(targetdep_t x) { // expected-warning {{target specifier has been ignored for overload resolution}}
+  ASSERT_HAS_TYPE(ms_ret_late_device({}), HostTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+targetdep_t __declspec(__host__) ms_ret_late_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(ms_ret_late_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+targetdep_t __declspec(__host__) __declspec(__device__) ms_ret_late_hostdevice(targetdep_t x) { // expected-warning {{target specifier has been ignored for overload resolution}}
+  ASSERT_HAS_TYPE(ms_ret_late_hostdevice({}), HostTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+__declspec(__device__) targetdep_t __declspec(__host__) ms_ret_early_device_late_host(targetdep_t x) { // expected-warning {{target specifier has been ignored for overload resolution}}
+  ASSERT_HAS_TYPE(ms_ret_early_device_late_host({}), DeviceTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+__declspec(__device__) auto ms_autoret_early_device(targetdep_t x) -> targetdep_t {
+  ASSERT_HAS_TYPE(ms_autoret_early_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+__declspec(__host__) auto ms_autoret_early_host(targetdep_t x) -> targetdep_t {
+  ASSERT_HAS_TYPE(ms_autoret_early_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+__declspec(__host__) __declspec(__device__) auto ms_autoret_early_hostdevice(targetdep_t x) -> targetdep_t {
+  ASSERT_HAS_TYPE(ms_autoret_early_hostdevice({}), CurrentTargetTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+
+auto __declspec(__device__) ms_autoret_late_device(targetdep_t x) -> targetdep_t {
+  ASSERT_HAS_TYPE(ms_autoret_late_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+auto __declspec(__host__) ms_autoret_late_host(targetdep_t x) -> targetdep_t {
+  ASSERT_HAS_TYPE(ms_autoret_late_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+auto __declspec(__host__) __declspec(__device__) ms_autoret_late_hostdevice(targetdep_t x) -> targetdep_t {
+  ASSERT_HAS_TYPE(ms_autoret_late_hostdevice({}), CurrentTargetTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+
+// Class/Struct member functions:
+
+struct MethodTests {
+  __attribute__((device)) targetdep_t ret_early_device(targetdep_t x) {
+    ASSERT_HAS_TYPE(ret_early_device({}), DeviceTy)
+    ASSERT_HAS_TYPE(x, DeviceTy)
+    return {};
+  }
+
+  __attribute__((host)) targetdep_t ret_early_host(targetdep_t x) {
+    ASSERT_HAS_TYPE(ret_early_host({}), HostTy)
+    ASSERT_HAS_TYPE(x, HostTy)
+    return {};
+  }
+
+  __attribute__((host,device)) targetdep_t ret_early_hostdevice(targetdep_t x) {
+    ASSERT_HAS_TYPE(ret_early_hostdevice({}), CurrentTargetTy)
+    ASSERT_HAS_TYPE(x, CurrentTargetTy)
+    return {};
+  }
+
+  __attribute__((device)) auto autoret_early_device(targetdep_t x) -> targetdep_t {
+    ASSERT_HAS_TYPE(autoret_early_device({}), DeviceTy)
+    ASSERT_HAS_TYPE(x, DeviceTy)
+    return {};
+  }
+  __attribute__((host)) auto autoret_early_host(targetdep_t x) -> targetdep_t {
+    ASSERT_HAS_TYPE(autoret_early_host({}), HostTy)
+    ASSERT_HAS_TYPE(x, HostTy)
+    return {};
+  }
+
+  __attribute__((host,device)) auto autoret_early_hostdevice(targetdep_t x) -> targetdep_t {
+    ASSERT_HAS_TYPE(autoret_early_hostdevice({}), CurrentTargetTy)
+    ASSERT_HAS_TYPE(x, CurrentTargetTy)
+    return {};
+  }
+
+
+  // Overloaded call happens in return type, attribute is after that.
+  targetdep_t __attribute__((device)) ret_late_device(targetdep_t x) {  // expected-warning {{target specifier has been ignored for overload resolution}}
+    ASSERT_HAS_TYPE(ret_late_device({}), HostTy)
+    ASSERT_HAS_TYPE(x, DeviceTy)
+    return {};
+  }
+
+  targetdep_t __attribute__((host)) ret_late_host(targetdep_t x) {
+    ASSERT_HAS_TYPE(ret_late_host({}), HostTy)
+    ASSERT_HAS_TYPE(x, HostTy)
+    return {};
+  }
+
+  targetdep_t __attribute__((host,device)) ret_late_hostdevice(targetdep_t x) {  // expected-warning {{target specifier has been ignored for overload resolution}}
+    ASSERT_HAS_TYPE(ret_late_hostdevice({}), HostTy)
+    ASSERT_HAS_TYPE(x, CurrentTargetTy)
+    return {};
+  }
+
+
+  // Member declarations (tested in the 'tests' function further below):
+  __attribute__((device)) targetdep_t decl_ret_early_device(void);
+  __attribute__((host)) targetdep_t decl_ret_early_host(void);
+  __attribute__((host,device)) targetdep_t decl_ret_early_hostdevice(void);
+  targetdep_t __attribute__((device)) decl_ret_late_device(void);  // expected-warning {{target specifier has been ignored for overload resolution}}
+  targetdep_t __attribute__((host)) decl_ret_late_host(void);
+  targetdep_t __attribute__((host,device)) decl_ret_late_hostdevice(void);  // expected-warning {{target specifier has been ignored for overload resolution}}
+
+  // for out of line definitions:
+  __attribute__((device)) targetdep_t ool_ret_early_device(targetdep_t x);
+  __attribute__((host)) targetdep_t ool_ret_early_host(targetdep_t x);
+  __attribute__((host,device)) targetdep_t ool_ret_early_hostdevice(targetdep_t x);
+  targetdep_t __attribute__((device)) ool_ret_late_device(targetdep_t x);  // expected-warning {{target specifier has been ignored for overload resolution}}
+  targetdep_t __attribute__((host)) ool_ret_late_host(targetdep_t x);
+  targetdep_t __attribute__((host,device)) ool_ret_late_hostdevice(targetdep_t x);  // expected-warning {{target specifier has been ignored for overload resolution}}
+
+};
+
+__attribute__((device)) targetdep_t MethodTests::ool_ret_early_device(targetdep_t x) {
+  ASSERT_HAS_TYPE(ool_ret_early_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+__attribute__((host)) targetdep_t MethodTests::ool_ret_early_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(ool_ret_early_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+__attribute__((host,device)) targetdep_t MethodTests::ool_ret_early_hostdevice(targetdep_t x) {
+  ASSERT_HAS_TYPE(ool_ret_early_hostdevice({}), CurrentTargetTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+targetdep_t __attribute__((device)) MethodTests::ool_ret_late_device(targetdep_t x) { // expected-warning {{target specifier has been ignored for overload resolution}}
+  ASSERT_HAS_TYPE(ool_ret_late_device({}), HostTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+targetdep_t __attribute__((host)) MethodTests::ool_ret_late_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(ool_ret_late_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+targetdep_t __attribute__((host,device)) MethodTests::ool_ret_late_hostdevice(targetdep_t x) { // expected-warning {{target specifier has been ignored for overload resolution}}
+  ASSERT_HAS_TYPE(ool_ret_late_hostdevice({}), HostTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+
+// members of templated structs should also work.
+template <unsigned int N>
+struct TemplateMethodTests {
+  __attribute__((device)) targetdep_t ret_early_device(targetdep_t x) {
+    ASSERT_HAS_TYPE(ret_early_device({}), DeviceTy)
+    ASSERT_HAS_TYPE(x, DeviceTy)
+    return {};
+  }
+
+  __attribute__((host)) targetdep_t ret_early_host(targetdep_t x) {
+    ASSERT_HAS_TYPE(ret_early_host({}), HostTy)
+    ASSERT_HAS_TYPE(x, HostTy)
+    return {};
+  }
+
+  __attribute__((host,device)) targetdep_t ret_early_hostdevice(targetdep_t x) {
+    ASSERT_HAS_TYPE(ret_early_hostdevice({}), CurrentTargetTy)
+    ASSERT_HAS_TYPE(x, CurrentTargetTy)
+    return {};
+  }
+
+  __attribute__((device)) auto autoret_early_device(targetdep_t x) -> targetdep_t {
+    ASSERT_HAS_TYPE(autoret_early_device({}), DeviceTy)
+    ASSERT_HAS_TYPE(x, DeviceTy)
+    return {};
+  }
+
+  __attribute__((host)) auto autoret_early_host(targetdep_t x) -> targetdep_t {
+    ASSERT_HAS_TYPE(autoret_early_host({}), HostTy)
+    ASSERT_HAS_TYPE(x, HostTy)
+    return {};
+  }
+
+  __attribute__((host,device)) auto autoret_early_hostdevice(targetdep_t x) -> targetdep_t {
+    ASSERT_HAS_TYPE(autoret_early_hostdevice({}), CurrentTargetTy)
+    ASSERT_HAS_TYPE(x, CurrentTargetTy)
+    return {};
+  }
+
+  targetdep_t __attribute__((device)) ret_late_device(targetdep_t x) { // expected-warning {{target specifier has been ignored for overload resolution}}
+    ASSERT_HAS_TYPE(ret_late_device({}), HostTy)
+    ASSERT_HAS_TYPE(x, DeviceTy)
+    return {};
+  }
+
+  targetdep_t __attribute__((host)) ret_late_host(targetdep_t x) {
+    ASSERT_HAS_TYPE(ret_late_host({}), HostTy)
+    ASSERT_HAS_TYPE(x, HostTy)
+    return {};
+  }
+
+  targetdep_t __attribute__((host,device)) ret_late_hostdevice(targetdep_t x) { // expected-warning {{target specifier has been ignored for overload resolution}}
+    ASSERT_HAS_TYPE(ret_late_hostdevice({}), HostTy)
+    ASSERT_HAS_TYPE(x, CurrentTargetTy)
+    return {};
+  }
+
+
+  __attribute__((device)) targetdep_t decl_ret_early_device(void);
+  __attribute__((host)) targetdep_t decl_ret_early_host(void);
+  __attribute__((host,device)) targetdep_t decl_ret_early_hostdevice(void);
+
+  targetdep_t __attribute__((device)) decl_ret_late_device(void); // expected-warning {{target specifier has been ignored for overload resolution}}
+  targetdep_t __attribute__((host)) decl_ret_late_host(void);
+  targetdep_t __attribute__((host,device)) decl_ret_late_hostdevice(void); // expected-warning {{target specifier has been ignored for overload resolution}}
+};
+
+void tests(void) {
+  MethodTests mt;
+
+  ASSERT_HAS_TYPE(mt.ret_early_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(mt.ret_early_host({}), HostTy)
+  ASSERT_HAS_TYPE(mt.ret_early_hostdevice({}), CurrentTargetTy)
+
+  ASSERT_HAS_TYPE(mt.autoret_early_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(mt.autoret_early_host({}), HostTy)
+  ASSERT_HAS_TYPE(mt.autoret_early_hostdevice({}), CurrentTargetTy)
+
+  // The target specifier is too late to be considered:
+  ASSERT_HAS_TYPE(mt.ret_late_device({}), HostTy)
+  ASSERT_HAS_TYPE(mt.ret_late_host({}), HostTy)
+  ASSERT_HAS_TYPE(mt.ret_late_hostdevice({}), HostTy)
+
+  ASSERT_HAS_TYPE(mt.decl_ret_early_device(), DeviceTy)
+  ASSERT_HAS_TYPE(mt.decl_ret_early_host(), HostTy)
+  ASSERT_HAS_TYPE(mt.decl_ret_early_hostdevice(), CurrentTargetTy)
+
+  // The target specifier is too late to be considered:
+  ASSERT_HAS_TYPE(mt.decl_ret_late_device(), HostTy)
+  ASSERT_HAS_TYPE(mt.decl_ret_late_host(), HostTy)
+  ASSERT_HAS_TYPE(mt.decl_ret_late_hostdevice(), HostTy)
+
+  TemplateMethodTests<42> tmt;
+  ASSERT_HAS_TYPE(tmt.ret_early_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(tmt.ret_early_host({}), HostTy)
+  ASSERT_HAS_TYPE(tmt.ret_early_hostdevice({}), CurrentTargetTy)
+
+  ASSERT_HAS_TYPE(tmt.autoret_early_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(tmt.autoret_early_host({}), HostTy)
+  ASSERT_HAS_TYPE(tmt.autoret_early_hostdevice({}), CurrentTargetTy)
+
+  ASSERT_HAS_TYPE(tmt.ret_late_device({}), HostTy)
+  ASSERT_HAS_TYPE(tmt.ret_late_host({}), HostTy)
+  ASSERT_HAS_TYPE(tmt.ret_late_hostdevice({}), HostTy)
+
+  ASSERT_HAS_TYPE(tmt.decl_ret_early_device(), DeviceTy)
+  ASSERT_HAS_TYPE(tmt.decl_ret_early_host(), HostTy)
+  ASSERT_HAS_TYPE(tmt.decl_ret_early_hostdevice(), CurrentTargetTy)
+
+  ASSERT_HAS_TYPE(tmt.decl_ret_late_device(), HostTy)
+  ASSERT_HAS_TYPE(tmt.decl_ret_late_host(), HostTy)
+  ASSERT_HAS_TYPE(tmt.decl_ret_late_hostdevice(), HostTy)
+}
+
+
+// global variables:
+__attribute__((device)) targetdep_t var_early_device = {};
+ASSERT_HAS_TYPE(var_early_device, DeviceTy)
+
+targetdep_t var_early_host = {};
+ASSERT_HAS_TYPE(var_early_host, HostTy)
+
+targetdep_t __attribute__((device)) var_late_device = {}; // expected-warning {{target specifier has been ignored for overload resolution}}
+ASSERT_HAS_TYPE(var_late_device, HostTy)
+
+
+// Tests for the overload candidate ordering compared to templates:
+
+enum Candidate {
+  TEMPLATE,
+  HOST,
+  DEVICE,
+  HOSTDEVICE,
+};
+
+// (1.) If the overloaded functions are constexpr
+
+// (1.a) Prefer fitting overloads.
+template <typename T> constexpr Candidate ce_template_vs_H_D_functions(T arg) { return TEMPLATE; }
+__attribute__((device)) constexpr Candidate ce_template_vs_H_D_functions(float arg) { return DEVICE; }
+__attribute__((host)) constexpr Candidate ce_template_vs_H_D_functions(float arg) { return HOST; }
+
+__attribute__((device)) check<ce_template_vs_H_D_functions(1.0f) == DEVICE>::type
+test_ce_template_vs_H_D_functions_for_device() {
+  return TrueTy();
+}
+
+__attribute__((host)) check<ce_template_vs_H_D_functions(1.0f) == HOST>::type
+test_ce_template_vs_H_D_functions_for_host() {
+  return TrueTy();
+}
+
+__attribute__((host,device)) check<ce_template_vs_H_D_functions(1.0f) == CurrentTarget>::type
+test_ce_template_vs_H_D_functions_for_hd() {
+  return TrueTy();
+}
+
+
+// (1.b) Always prefer an HD candidate over a template candidate.
+template <typename T> constexpr Candidate ce_template_vs_HD_function(T arg) { return TEMPLATE; }
+__attribute__((host, device)) constexpr Candidate ce_template_vs_HD_function(float arg) { return HOSTDEVICE; }
+
+__attribute__((device)) check<ce_template_vs_HD_function(1.0f) == HOSTDEVICE>::type
+test_ce_template_vs_HD_function_for_device() {
+  return TrueTy();
+}
+
+__attribute__((host)) check<ce_template_vs_HD_function(1.0f) == HOSTDEVICE>::type
+test_ce_template_vs_HD_function_for_host() {
+  return TrueTy();
+}
+
+__attribute__((host,device)) check<ce_template_vs_HD_function(1.0f) == HOSTDEVICE>::type
+test_ce_template_vs_HD_function_for_hd() {
+  return TrueTy();
+}
+
+
+// (1.c) Even wrong-sided calls are okay if the called function is constexpr, so
+// prefer the device overload over the template.
+template <typename T> constexpr Candidate ce_template_vs_D_function(T arg) { return TEMPLATE; }
+__attribute__((device)) constexpr Candidate ce_template_vs_D_function(float arg) { return DEVICE; }
+
+__attribute__((host)) check<ce_template_vs_D_function(1.0f) == DEVICE>::type
+test_ce_template_vs_D_function_for_host() {
+  return TrueTy();
+}
+
+__attribute__((device)) check<ce_template_vs_D_function(1.0f) == DEVICE>::type
+test_ce_template_vs_D_function_for_device() {
+  return TrueTy();
+}
+
+__attribute__((host,device)) check<ce_template_vs_D_function(1.0f) == DEVICE>::type
+test_ce_template_vs_D_function_for_hd() {
+  return TrueTy();
+}
+
+
+// (2.) If the overloaded functions are NOT constexpr
+
+// (2.a) Prefer fitting overloads.
+template <typename T> TemplateTy template_vs_H_D_functions(T arg) { return {}; }
+__attribute__((device)) DeviceTy template_vs_H_D_functions(float arg) { return {}; }
+__attribute__((host)) HostTy template_vs_H_D_functions(float arg) { return {}; }
+
+__attribute__((device)) check<is_same<decltype(template_vs_H_D_functions(1.0f)), DeviceTy>::value>::type
+test_template_vs_H_D_functions_for_device() {
+  return TrueTy{};
+}
+
+__attribute__((host)) check<is_same<decltype(template_vs_H_D_functions(1.0f)), HostTy>::value>::type
+test_template_vs_H_D_functions_for_host() {
+  return TrueTy{};
+}
+
+__attribute__((host,device)) check<is_same<decltype(template_vs_H_D_functions(1.0f)), CurrentTargetTy>::value>::type
+test_template_vs_H_D_functions_for_hd() {
+  return TrueTy{};
+}
+
+// (2.b) Always prefer an HD candidate over a template candidate.
+template <typename T> TemplateTy template_vs_HD_function(T arg) { return {}; }
+__attribute__((host,device)) HostDeviceTy template_vs_HD_function(float arg) { return {}; }
+
+__attribute__((device)) check<is_same<decltype(template_vs_HD_function(1.0f)), HostDeviceTy>::value>::type
+test_template_vs_HD_function_for_device() {
+  return TrueTy{};
+}
+
+__attribute__((host)) check<is_same<decltype(template_vs_HD_function(1.0f)), HostDeviceTy>::value>::type
+test_template_vs_HD_function_for_host() {
+  return TrueTy{};
+}
+
+__attribute__((host,device)) check<is_same<decltype(template_vs_HD_function(1.0f)), HostDeviceTy>::value>::type
+test_template_vs_HD_function_for_hd() {
+  return TrueTy{};
+}
+
+
+// (2.c) For non-constexpr functions, prefer a sameside or native template
+// function over a wrongside non-template function:
+template <typename T> TemplateTy template_vs_D_function(T arg) { return {}; }
+__attribute__((device)) DeviceTy template_vs_D_function(float arg) { return {}; }
+
+__attribute__((host,device)) check<is_same<decltype(template_vs_D_function(1.0f)), TemplateIfHostTy>::value>::type
+test_template_vs_D_function_for_hd() {
+  return TrueTy{};
+}
+
+__attribute__((device)) check<is_same<decltype(template_vs_D_function(1.0f)), DeviceTy>::value>::type
+test_template_vs_D_function_for_device() {
+  return TrueTy{};
+}
+
+__attribute__((host)) check<is_same<decltype(template_vs_D_function(1.0f)), TemplateTy>::value>::type
+test_template_vs_D_function_for_host() {
+  return TrueTy{};
+}
+
+
+// If only a wrongside function is available, it is selected.
+__attribute__((device)) DeviceTy only_D_function(float arg) { return {}; }
+
+__attribute__((host)) check<is_same<decltype(only_D_function(1.0f)), DeviceTy>::value>::type
+test_only_D_function_for_host() {
+  return TrueTy{};
+}

>From 4c1b6053136306d0b3ffef145d1f019bb1105038 Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Mon, 19 Aug 2024 08:15:56 -0400
Subject: [PATCH 2/4] fixup! [Clang][HIP] Target-dependent overload resolution
 in declarators and specifiers

Handle and test template functions outside of classes.
---
 clang/lib/Parse/ParseDecl.cpp                 |  7 +++
 .../target-overloads-availability-warnings.cu | 51 +++++++++++++++++++
 2 files changed, 58 insertions(+)

diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 615aa8e4c5df02..e6270b252cda8b 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -2029,6 +2029,13 @@ Parser::DeclGroupPtrTy Parser::ParseDeclaration(DeclaratorContext Context,
   // parsing c none objective-c decls.
   ObjCDeclContextSwitch ObjCDC(*this);
 
+  SemaCUDA::CUDATargetContextRAII CTCRAII(Actions.CUDA(),
+                                          SemaCUDA::CTCK_Declaration);
+  if (Actions.getLangOpts().CUDA) {
+    Actions.CUDA().CurCUDATargetCtx.tryRegisterTargetAttrs(DeclAttrs);
+    Actions.CUDA().CurCUDATargetCtx.tryRegisterTargetAttrs(DeclSpecAttrs);
+  }
+
   Decl *SingleDecl = nullptr;
   switch (Tok.getKind()) {
   case tok::kw_template:
diff --git a/clang/test/SemaCUDA/target-overloads-availability-warnings.cu b/clang/test/SemaCUDA/target-overloads-availability-warnings.cu
index f0fc1bea1db642..93498c6fe22e13 100644
--- a/clang/test/SemaCUDA/target-overloads-availability-warnings.cu
+++ b/clang/test/SemaCUDA/target-overloads-availability-warnings.cu
@@ -146,3 +146,54 @@ __attribute__((device)) void DeviceUser(void) {
   use(OverloadFunHostDepr());
   use(TemplateOverloadFun<int>());
 }
+
+
+// Template functions outside of classes:
+
+// This should use the non-deprecated device overload.
+template<int X> __attribute__((device))
+auto devicefun(void) -> typename my_enable_if<(X == OverloadFunHostDepr()), int>::type {
+  return 1;
+}
+
+// This should use the non-deprecated device overload.
+template<int X> __attribute__((device))
+auto devicefun(void) -> typename my_enable_if<(X != OverloadFunHostDepr()), int>::type {
+    return 0;
+}
+
+// This should use the deprecated device overload.
+template<int X> __attribute__((device))
+auto devicefun_wrong(void) -> typename my_enable_if<(X == OverloadFunDeviceDepr()), int>::type { // expected-warning {{'OverloadFunDeviceDepr' is deprecated: Device variant}}
+  return 1;
+}
+
+// This should use the deprecated device overload.
+template<int X> __attribute__((device))
+auto devicefun_wrong(void) -> typename my_enable_if<(X != OverloadFunDeviceDepr()), int>::type { // expected-warning {{'OverloadFunDeviceDepr' is deprecated: Device variant}}
+    return 0;
+}
+
+// This should use the non-deprecated host overload.
+template<int X> __attribute__((host))
+auto hostfun(void) -> typename my_enable_if<(X == OverloadFunDeviceDepr()), int>::type {
+  return 1;
+}
+
+// This should use the non-deprecated host overload.
+template<int X> __attribute__((host))
+auto hostfun(void) -> typename my_enable_if<(X != OverloadFunDeviceDepr()), int>::type {
+    return 0;
+}
+
+// This should use the deprecated host overload.
+template<int X> __attribute__((host))
+auto hostfun_wrong(void) -> typename my_enable_if<(X == OverloadFunHostDepr()), int>::type { // expected-warning {{'OverloadFunHostDepr' is deprecated: Host variant}}
+  return 1;
+}
+
+// This should use the deprecated host overload.
+template<int X> __attribute__((host))
+auto hostfun_wrong(void) -> typename my_enable_if<(X != OverloadFunHostDepr()), int>::type { // expected-warning {{'OverloadFunHostDepr' is deprecated: Host variant}}
+    return 0;
+}

>From 91deb043d4d421ef564a5df18e216cabaa918521 Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Mon, 19 Aug 2024 09:53:20 -0400
Subject: [PATCH 3/4] fixup! fixup! [Clang][HIP] Target-dependent overload
 resolution in declarators and specifiers

Add a test to document the behavior for default arguments of template parameters.
---
 .../target-overloads-in-function-prototypes.cu      | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/clang/test/SemaCUDA/target-overloads-in-function-prototypes.cu b/clang/test/SemaCUDA/target-overloads-in-function-prototypes.cu
index 7a636084476a99..996b0cba1aa8fc 100644
--- a/clang/test/SemaCUDA/target-overloads-in-function-prototypes.cu
+++ b/clang/test/SemaCUDA/target-overloads-in-function-prototypes.cu
@@ -688,3 +688,16 @@ __attribute__((host)) check<is_same<decltype(only_D_function(1.0f)), DeviceTy>::
 test_only_D_function_for_host() {
   return TrueTy{};
 }
+
+// Default arguments for template parameters occur before the target attribute,
+// so we can't identify the "right" overload for them.
+template <typename T = targetdep_t>
+__attribute__((device)) // expected-warning {{target specifier has been ignored for overload resolution}}
+T use_in_template_default_arg(void) {
+  return HostTy{};
+}
+
+__attribute__((device))
+void test_use_in_template(void) {
+  use_in_template_default_arg<>();
+}

>From e273a996854cb3256cfeed832c3f16ff6fd0f534 Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Tue, 20 Aug 2024 02:47:36 -0400
Subject: [PATCH 4/4] fixup! fixup! fixup! [Clang][HIP] Target-dependent
 overload resolution in declarators and specifiers

Rename 'target specifier' -> 'target attribute', move CUDATargetContext constructor in header.
---
 .../clang/Basic/DiagnosticSemaKinds.td        |  4 +-
 clang/include/clang/Sema/SemaCUDA.h           |  3 +-
 clang/lib/Sema/SemaCUDA.cpp                   |  5 --
 ...target-overloads-in-function-prototypes.cu | 58 +++++++++----------
 4 files changed, 33 insertions(+), 37 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 8709f60678b466..3497dd53e190df 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9018,8 +9018,8 @@ def err_ref_bad_target : Error<
   "reference to %select{__device__|__global__|__host__|__host__ __device__}0 "
   "%select{function|variable}1 %2 in %select{__device__|__global__|__host__|__host__ __device__}3 function">;
 def warn_target_specfier_ignored : Warning<
-  "target specifier has been ignored for overload resolution; "
-  "move the target specifier to the beginning of the declaration to use it for overload resolution">,
+  "target attribute has been ignored for overload resolution; "
+  "move the target attribute to the beginning of the declaration to use it for overload resolution">,
   InGroup<IgnoredAttributes>;
 def note_cuda_const_var_unpromoted : Note<
   "const variable cannot be emitted on device side due to dynamic initialization">;
diff --git a/clang/include/clang/Sema/SemaCUDA.h b/clang/include/clang/Sema/SemaCUDA.h
index 83083ada889a16..26d839db7fe6ce 100644
--- a/clang/include/clang/Sema/SemaCUDA.h
+++ b/clang/include/clang/Sema/SemaCUDA.h
@@ -136,7 +136,8 @@ class SemaCUDA : public SemaBase {
     CUDATargetContext() = default;
 
     CUDATargetContext(SemaCUDA *S, CUDATargetContextKind Kind,
-                      CUDAFunctionTarget Target);
+                      CUDAFunctionTarget Target)
+        : Kind(Kind), S(S), Target(Target) {}
 
     CUDAFunctionTarget getTarget();
 
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index b16c50a95ccba2..e4c7c1c2c57d8f 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -143,11 +143,6 @@ static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) {
          });
 }
 
-SemaCUDA::CUDATargetContext::CUDATargetContext(SemaCUDA *S,
-                                               CUDATargetContextKind Kind,
-                                               CUDAFunctionTarget Target)
-    : Kind(Kind), S(S), Target(Target) {}
-
 CUDAFunctionTarget SemaCUDA::CUDATargetContext::getTarget() {
   TargetQueried = true;
   return Target;
diff --git a/clang/test/SemaCUDA/target-overloads-in-function-prototypes.cu b/clang/test/SemaCUDA/target-overloads-in-function-prototypes.cu
index 996b0cba1aa8fc..0d8f9aa8676b53 100644
--- a/clang/test/SemaCUDA/target-overloads-in-function-prototypes.cu
+++ b/clang/test/SemaCUDA/target-overloads-in-function-prototypes.cu
@@ -66,18 +66,18 @@ ASSERT_HAS_TYPE(decl_ret_early_host_device(), CurrentTargetTy)
 
 // If the function target is specified too late and can therefore not be
 // considered for overload resolution in targetdep_t, warn.
-targetdep_t __attribute__((device)) decl_ret_late_device(void); // expected-warning {{target specifier has been ignored for overload resolution}}
+targetdep_t __attribute__((device)) decl_ret_late_device(void); // expected-warning {{target attribute has been ignored for overload resolution}}
 ASSERT_HAS_TYPE(decl_ret_late_device(), HostTy)
 
 // No warning necessary if the ignored attribute doesn't change the result.
 targetdep_t __attribute__((host)) decl_ret_late_host(void);
 ASSERT_HAS_TYPE(decl_ret_late_host(), HostTy)
 
-targetdep_t __attribute__((host,device)) decl_ret_late_host_device(void); // expected-warning {{target specifier has been ignored for overload resolution}}
+targetdep_t __attribute__((host,device)) decl_ret_late_host_device(void); // expected-warning {{target attribute has been ignored for overload resolution}}
 ASSERT_HAS_TYPE(decl_ret_late_host_device(), HostTy)
 
 // An odd way of writing this, but it's possible.
-__attribute__((device)) targetdep_t __attribute__((host)) decl_ret_early_device_late_host(void); // expected-warning {{target specifier has been ignored for overload resolution}}
+__attribute__((device)) targetdep_t __attribute__((host)) decl_ret_early_device_late_host(void); // expected-warning {{target attribute has been ignored for overload resolution}}
 ASSERT_HAS_TYPE(decl_ret_early_device_late_host(), DeviceTy)
 
 
@@ -101,14 +101,14 @@ __attribute__((host, device)) targetdep_t ret_early_hostdevice(targetdep_t x) {
 }
 
 // The parameter is still after the attribute, so it needs no warning.
-targetdep_t __attribute__((device)) // expected-warning {{target specifier has been ignored for overload resolution}}
+targetdep_t __attribute__((device)) // expected-warning {{target attribute has been ignored for overload resolution}}
 ret_late_device(targetdep_t x) {
   ASSERT_HAS_TYPE(ret_late_device({}), HostTy)
   ASSERT_HAS_TYPE(x, DeviceTy)
   return {};
 }
 
-targetdep_t __attribute__((host, device)) // expected-warning {{target specifier has been ignored for overload resolution}}
+targetdep_t __attribute__((host, device)) // expected-warning {{target attribute has been ignored for overload resolution}}
 ret_late_hostdevice(targetdep_t x) {
   ASSERT_HAS_TYPE(ret_late_hostdevice({}), HostTy)
   ASSERT_HAS_TYPE(x, CurrentTargetTy)
@@ -121,7 +121,7 @@ targetdep_t __attribute__((host)) ret_late_host(targetdep_t x) {
   return {};
 }
 
-__attribute__((device)) targetdep_t __attribute__((host)) // expected-warning {{target specifier has been ignored for overload resolution}}
+__attribute__((device)) targetdep_t __attribute__((host)) // expected-warning {{target attribute has been ignored for overload resolution}}
 ret_early_device_late_host(targetdep_t x) {
   ASSERT_HAS_TYPE(ret_early_device_late_host({}), DeviceTy)
   ASSERT_HAS_TYPE(x, CurrentTargetTy)
@@ -129,15 +129,15 @@ ret_early_device_late_host(targetdep_t x) {
 }
 
 // The attribute is even later, so we can't choose the expected overload.
-targetdep_t ret_verylate_device(targetdep_t x) __attribute__((device)) { // expected-warning {{target specifier has been ignored for overload resolution}}
+targetdep_t ret_verylate_device(targetdep_t x) __attribute__((device)) { // expected-warning {{target attribute has been ignored for overload resolution}}
   ASSERT_HAS_TYPE(ret_verylate_device({}), HostTy)
   ASSERT_HAS_TYPE(x, HostTy)
   return {};
 }
 
 // It's possible to get two different wrong types:
-targetdep_t __attribute__((device)) // expected-warning {{target specifier has been ignored for overload resolution}}
-ret_late_device_verylate_host(targetdep_t x) __attribute__((host)) { // expected-warning {{target specifier has been ignored for overload resolution}}
+targetdep_t __attribute__((device)) // expected-warning {{target attribute has been ignored for overload resolution}}
+ret_late_device_verylate_host(targetdep_t x) __attribute__((host)) { // expected-warning {{target attribute has been ignored for overload resolution}}
   ASSERT_HAS_TYPE(ret_late_device_verylate_host({}), HostTy)
   ASSERT_HAS_TYPE(x, DeviceTy)
   return {};
@@ -211,7 +211,7 @@ ASSERT_HAS_TYPE(fullauto_host({}), HostTy)
 
 // The return type is as expected, but the argument type precedes the attribute,
 // so we don't get the right type for it.
-auto fullauto_verylate_device(targetdep_t x) __attribute__((device)) { // expected-warning {{target specifier has been ignored for overload resolution}}
+auto fullauto_verylate_device(targetdep_t x) __attribute__((device)) { // expected-warning {{target attribute has been ignored for overload resolution}}
   ASSERT_HAS_TYPE(x, HostTy)
   return targetdep_t();
 }
@@ -243,7 +243,7 @@ __declspec(__host__) __declspec(__device__) targetdep_t ms_ret_early_hostdevice(
   return {};
 }
 
-targetdep_t __declspec(__device__) ms_ret_late_device(targetdep_t x) { // expected-warning {{target specifier has been ignored for overload resolution}}
+targetdep_t __declspec(__device__) ms_ret_late_device(targetdep_t x) { // expected-warning {{target attribute has been ignored for overload resolution}}
   ASSERT_HAS_TYPE(ms_ret_late_device({}), HostTy)
   ASSERT_HAS_TYPE(x, DeviceTy)
   return {};
@@ -255,13 +255,13 @@ targetdep_t __declspec(__host__) ms_ret_late_host(targetdep_t x) {
   return {};
 }
 
-targetdep_t __declspec(__host__) __declspec(__device__) ms_ret_late_hostdevice(targetdep_t x) { // expected-warning {{target specifier has been ignored for overload resolution}}
+targetdep_t __declspec(__host__) __declspec(__device__) ms_ret_late_hostdevice(targetdep_t x) { // expected-warning {{target attribute has been ignored for overload resolution}}
   ASSERT_HAS_TYPE(ms_ret_late_hostdevice({}), HostTy)
   ASSERT_HAS_TYPE(x, CurrentTargetTy)
   return {};
 }
 
-__declspec(__device__) targetdep_t __declspec(__host__) ms_ret_early_device_late_host(targetdep_t x) { // expected-warning {{target specifier has been ignored for overload resolution}}
+__declspec(__device__) targetdep_t __declspec(__host__) ms_ret_early_device_late_host(targetdep_t x) { // expected-warning {{target attribute has been ignored for overload resolution}}
   ASSERT_HAS_TYPE(ms_ret_early_device_late_host({}), DeviceTy)
   ASSERT_HAS_TYPE(x, CurrentTargetTy)
   return {};
@@ -345,7 +345,7 @@ struct MethodTests {
 
 
   // Overloaded call happens in return type, attribute is after that.
-  targetdep_t __attribute__((device)) ret_late_device(targetdep_t x) {  // expected-warning {{target specifier has been ignored for overload resolution}}
+  targetdep_t __attribute__((device)) ret_late_device(targetdep_t x) {  // expected-warning {{target attribute has been ignored for overload resolution}}
     ASSERT_HAS_TYPE(ret_late_device({}), HostTy)
     ASSERT_HAS_TYPE(x, DeviceTy)
     return {};
@@ -357,7 +357,7 @@ struct MethodTests {
     return {};
   }
 
-  targetdep_t __attribute__((host,device)) ret_late_hostdevice(targetdep_t x) {  // expected-warning {{target specifier has been ignored for overload resolution}}
+  targetdep_t __attribute__((host,device)) ret_late_hostdevice(targetdep_t x) {  // expected-warning {{target attribute has been ignored for overload resolution}}
     ASSERT_HAS_TYPE(ret_late_hostdevice({}), HostTy)
     ASSERT_HAS_TYPE(x, CurrentTargetTy)
     return {};
@@ -368,17 +368,17 @@ struct MethodTests {
   __attribute__((device)) targetdep_t decl_ret_early_device(void);
   __attribute__((host)) targetdep_t decl_ret_early_host(void);
   __attribute__((host,device)) targetdep_t decl_ret_early_hostdevice(void);
-  targetdep_t __attribute__((device)) decl_ret_late_device(void);  // expected-warning {{target specifier has been ignored for overload resolution}}
+  targetdep_t __attribute__((device)) decl_ret_late_device(void);  // expected-warning {{target attribute has been ignored for overload resolution}}
   targetdep_t __attribute__((host)) decl_ret_late_host(void);
-  targetdep_t __attribute__((host,device)) decl_ret_late_hostdevice(void);  // expected-warning {{target specifier has been ignored for overload resolution}}
+  targetdep_t __attribute__((host,device)) decl_ret_late_hostdevice(void);  // expected-warning {{target attribute has been ignored for overload resolution}}
 
   // for out of line definitions:
   __attribute__((device)) targetdep_t ool_ret_early_device(targetdep_t x);
   __attribute__((host)) targetdep_t ool_ret_early_host(targetdep_t x);
   __attribute__((host,device)) targetdep_t ool_ret_early_hostdevice(targetdep_t x);
-  targetdep_t __attribute__((device)) ool_ret_late_device(targetdep_t x);  // expected-warning {{target specifier has been ignored for overload resolution}}
+  targetdep_t __attribute__((device)) ool_ret_late_device(targetdep_t x);  // expected-warning {{target attribute has been ignored for overload resolution}}
   targetdep_t __attribute__((host)) ool_ret_late_host(targetdep_t x);
-  targetdep_t __attribute__((host,device)) ool_ret_late_hostdevice(targetdep_t x);  // expected-warning {{target specifier has been ignored for overload resolution}}
+  targetdep_t __attribute__((host,device)) ool_ret_late_hostdevice(targetdep_t x);  // expected-warning {{target attribute has been ignored for overload resolution}}
 
 };
 
@@ -400,7 +400,7 @@ __attribute__((host,device)) targetdep_t MethodTests::ool_ret_early_hostdevice(t
   return {};
 }
 
-targetdep_t __attribute__((device)) MethodTests::ool_ret_late_device(targetdep_t x) { // expected-warning {{target specifier has been ignored for overload resolution}}
+targetdep_t __attribute__((device)) MethodTests::ool_ret_late_device(targetdep_t x) { // expected-warning {{target attribute has been ignored for overload resolution}}
   ASSERT_HAS_TYPE(ool_ret_late_device({}), HostTy)
   ASSERT_HAS_TYPE(x, DeviceTy)
   return {};
@@ -412,7 +412,7 @@ targetdep_t __attribute__((host)) MethodTests::ool_ret_late_host(targetdep_t x)
   return {};
 }
 
-targetdep_t __attribute__((host,device)) MethodTests::ool_ret_late_hostdevice(targetdep_t x) { // expected-warning {{target specifier has been ignored for overload resolution}}
+targetdep_t __attribute__((host,device)) MethodTests::ool_ret_late_hostdevice(targetdep_t x) { // expected-warning {{target attribute has been ignored for overload resolution}}
   ASSERT_HAS_TYPE(ool_ret_late_hostdevice({}), HostTy)
   ASSERT_HAS_TYPE(x, CurrentTargetTy)
   return {};
@@ -458,7 +458,7 @@ struct TemplateMethodTests {
     return {};
   }
 
-  targetdep_t __attribute__((device)) ret_late_device(targetdep_t x) { // expected-warning {{target specifier has been ignored for overload resolution}}
+  targetdep_t __attribute__((device)) ret_late_device(targetdep_t x) { // expected-warning {{target attribute has been ignored for overload resolution}}
     ASSERT_HAS_TYPE(ret_late_device({}), HostTy)
     ASSERT_HAS_TYPE(x, DeviceTy)
     return {};
@@ -470,7 +470,7 @@ struct TemplateMethodTests {
     return {};
   }
 
-  targetdep_t __attribute__((host,device)) ret_late_hostdevice(targetdep_t x) { // expected-warning {{target specifier has been ignored for overload resolution}}
+  targetdep_t __attribute__((host,device)) ret_late_hostdevice(targetdep_t x) { // expected-warning {{target attribute has been ignored for overload resolution}}
     ASSERT_HAS_TYPE(ret_late_hostdevice({}), HostTy)
     ASSERT_HAS_TYPE(x, CurrentTargetTy)
     return {};
@@ -481,9 +481,9 @@ struct TemplateMethodTests {
   __attribute__((host)) targetdep_t decl_ret_early_host(void);
   __attribute__((host,device)) targetdep_t decl_ret_early_hostdevice(void);
 
-  targetdep_t __attribute__((device)) decl_ret_late_device(void); // expected-warning {{target specifier has been ignored for overload resolution}}
+  targetdep_t __attribute__((device)) decl_ret_late_device(void); // expected-warning {{target attribute has been ignored for overload resolution}}
   targetdep_t __attribute__((host)) decl_ret_late_host(void);
-  targetdep_t __attribute__((host,device)) decl_ret_late_hostdevice(void); // expected-warning {{target specifier has been ignored for overload resolution}}
+  targetdep_t __attribute__((host,device)) decl_ret_late_hostdevice(void); // expected-warning {{target attribute has been ignored for overload resolution}}
 };
 
 void tests(void) {
@@ -497,7 +497,7 @@ void tests(void) {
   ASSERT_HAS_TYPE(mt.autoret_early_host({}), HostTy)
   ASSERT_HAS_TYPE(mt.autoret_early_hostdevice({}), CurrentTargetTy)
 
-  // The target specifier is too late to be considered:
+  // The target attribute is too late to be considered:
   ASSERT_HAS_TYPE(mt.ret_late_device({}), HostTy)
   ASSERT_HAS_TYPE(mt.ret_late_host({}), HostTy)
   ASSERT_HAS_TYPE(mt.ret_late_hostdevice({}), HostTy)
@@ -506,7 +506,7 @@ void tests(void) {
   ASSERT_HAS_TYPE(mt.decl_ret_early_host(), HostTy)
   ASSERT_HAS_TYPE(mt.decl_ret_early_hostdevice(), CurrentTargetTy)
 
-  // The target specifier is too late to be considered:
+  // The target attribute is too late to be considered:
   ASSERT_HAS_TYPE(mt.decl_ret_late_device(), HostTy)
   ASSERT_HAS_TYPE(mt.decl_ret_late_host(), HostTy)
   ASSERT_HAS_TYPE(mt.decl_ret_late_hostdevice(), HostTy)
@@ -541,7 +541,7 @@ ASSERT_HAS_TYPE(var_early_device, DeviceTy)
 targetdep_t var_early_host = {};
 ASSERT_HAS_TYPE(var_early_host, HostTy)
 
-targetdep_t __attribute__((device)) var_late_device = {}; // expected-warning {{target specifier has been ignored for overload resolution}}
+targetdep_t __attribute__((device)) var_late_device = {}; // expected-warning {{target attribute has been ignored for overload resolution}}
 ASSERT_HAS_TYPE(var_late_device, HostTy)
 
 
@@ -692,7 +692,7 @@ test_only_D_function_for_host() {
 // Default arguments for template parameters occur before the target attribute,
 // so we can't identify the "right" overload for them.
 template <typename T = targetdep_t>
-__attribute__((device)) // expected-warning {{target specifier has been ignored for overload resolution}}
+__attribute__((device)) // expected-warning {{target attribute has been ignored for overload resolution}}
 T use_in_template_default_arg(void) {
   return HostTy{};
 }