[clang] [clang][Sema][CUDA] Restrict immediate template resolution to host-device functions (PR #200662)
Weibo He via cfe-commits
cfe-commits at lists.llvm.org
Tue Jun 9 05:58:14 PDT 2026
https://github.com/NewSigma updated https://github.com/llvm/llvm-project/pull/200662
>From 65e8bdeb7207df30e0df1f182e4be2067004b516 Mon Sep 17 00:00:00 2001
From: NewSigma <NewSigma at 163.com>
Date: Mon, 8 Jun 2026 11:00:49 +0800
Subject: [PATCH 1/2] [clang][Sema][CUDA] Restrict immediate template
resolution to host-device functions
---
clang/docs/ReleaseNotes.rst | 1 +
clang/include/clang/Sema/Overload.h | 18 +------
clang/lib/Sema/SemaOverload.cpp | 30 ++++++++++--
clang/test/SemaCUDA/pr200545.cu | 73 +++++++++++++++++++++++++++++
4 files changed, 101 insertions(+), 21 deletions(-)
create mode 100644 clang/test/SemaCUDA/pr200545.cu
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index f97e90634396a..4ac3f6db2c732 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -665,6 +665,7 @@ Bug Fixes in This Version
an array via an element-at-a-time copy loop (#GH192026)
- Fixed an issue where certain designated initializers would be rejected for constexpr variables. (#GH193373)
- Fixed a crash when ``#embed`` is used with C++ modules (#GH195350)
+- Fixed a bug where ``-x cuda`` caused clang to immediately resolve templates that should not be. (#GH200545)
- Fixed an issue where ``__typeof_unqual`` and ``__typeof_unqual__`` were rejected as a declaration specifier in block scope in C++.
- Fixed crash when checking for overflow for unary operator that can't overflow (#GH170072)
diff --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h
index d42963e325b58..1e412ff6fc9e2 100644
--- a/clang/include/clang/Sema/Overload.h
+++ b/clang/include/clang/Sema/Overload.h
@@ -1353,7 +1353,7 @@ class Sema;
bool shouldDeferDiags(Sema &S, ArrayRef<Expr *> Args, SourceLocation OpLoc);
// Whether the resolution of template candidates should be deferred
- bool shouldDeferTemplateArgumentDeduction(const LangOptions &Opts) const;
+ bool shouldDeferTemplateArgumentDeduction(const Sema &S) const;
/// Determine when this overload candidate will be new to the
/// overload set.
@@ -1545,22 +1545,6 @@ class Sema;
// good candidate as we can get, despite the fact that it takes one less
// parameter.
bool shouldEnforceArgLimit(bool PartialOverloading, FunctionDecl *Function);
-
- inline bool OverloadCandidateSet::shouldDeferTemplateArgumentDeduction(
- const LangOptions &Opts) const {
- return
- // For user defined conversion we need to check against different
- // combination of CV qualifiers and look at any explicit specifier, so
- // always deduce template candidates.
- Kind != CSK_InitByUserDefinedConversion
- // When doing code completion, we want to see all the
- // viable candidates.
- && Kind != CSK_CodeCompletion
- // CUDA may prefer template candidates even when a non-candidate
- // is a perfect match
- && !Opts.CUDA;
- }
-
} // namespace clang
#endif // LLVM_CLANG_SEMA_OVERLOAD_H
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index d53fd726e9f0b..0ad938568bce2 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -8192,7 +8192,7 @@ void Sema::AddMethodTemplateCandidate(
return;
if (ExplicitTemplateArgs ||
- !CandidateSet.shouldDeferTemplateArgumentDeduction(getLangOpts())) {
+ !CandidateSet.shouldDeferTemplateArgumentDeduction(*this)) {
AddMethodTemplateCandidateImmediately(
*this, CandidateSet, MethodTmpl, FoundDecl, ActingContext,
ExplicitTemplateArgs, ObjectType, ObjectClassification, Args,
@@ -8322,7 +8322,7 @@ void Sema::AddTemplateOverloadCandidate(
bool DependentExplicitSpecifier = hasDependentExplicit(FunctionTemplate);
if (ExplicitTemplateArgs ||
- !CandidateSet.shouldDeferTemplateArgumentDeduction(getLangOpts()) ||
+ !CandidateSet.shouldDeferTemplateArgumentDeduction(*this) ||
(isa<CXXConstructorDecl>(FunctionTemplate->getTemplatedDecl()) &&
DependentExplicitSpecifier)) {
@@ -8760,7 +8760,7 @@ void Sema::AddTemplateConversionCandidate(
if (!CandidateSet.isNewCandidate(FunctionTemplate))
return;
- if (!CandidateSet.shouldDeferTemplateArgumentDeduction(getLangOpts()) ||
+ if (!CandidateSet.shouldDeferTemplateArgumentDeduction(*this) ||
CandidateSet.getKind() ==
OverloadCandidateSet::CSK_InitByUserDefinedConversion ||
CandidateSet.getKind() == OverloadCandidateSet::CSK_InitByConstructor) {
@@ -11581,7 +11581,7 @@ OverloadingResult OverloadCandidateSet::BestViableFunction(Sema &S,
SourceLocation Loc,
iterator &Best) {
- assert((shouldDeferTemplateArgumentDeduction(S.getLangOpts()) ||
+ assert((shouldDeferTemplateArgumentDeduction(S) ||
DeferredCandidatesCount == 0) &&
"Unexpected deferred template candidates");
@@ -13533,6 +13533,28 @@ void OverloadCandidateSet::NoteCandidates(Sema &S, ArrayRef<Expr *> Args,
}
}
+bool OverloadCandidateSet::shouldDeferTemplateArgumentDeduction(
+ const Sema &S) const {
+ if (S.getLangOpts().CUDA) {
+ auto *Caller = S.getCurFunctionDecl(true);
+ // Overloading based on __host__ and __device__ attributes takes
+ // higher priority, HD functions may favor template candidates even when a
+ // non-template candidate would be a perfect match.
+ if (Caller && Caller->hasAttr<CUDAHostAttr>() &&
+ Caller->hasAttr<CUDADeviceAttr>())
+ return false;
+ }
+
+ return
+ // For user defined conversion we need to check against different
+ // combination of CV qualifiers and look at any explicit specifier, so
+ // always deduce template candidates.
+ Kind != CSK_InitByUserDefinedConversion
+ // When doing code completion, we want to see all the
+ // viable candidates.
+ && Kind != CSK_CodeCompletion;
+}
+
static SourceLocation
GetLocationForCandidate(const TemplateSpecCandidate *Cand) {
return Cand->Specialization ? Cand->Specialization->getLocation()
diff --git a/clang/test/SemaCUDA/pr200545.cu b/clang/test/SemaCUDA/pr200545.cu
new file mode 100644
index 0000000000000..97401d1fefccf
--- /dev/null
+++ b/clang/test/SemaCUDA/pr200545.cu
@@ -0,0 +1,73 @@
+// Test that template argument deduction is deferred correctly.
+//
+// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify -verify-ignore-unexpected=note %s
+
+#include "Inputs/cuda.h"
+
+namespace h_free_call {
+ template<class T>
+ concept DoNotDeduct = []() {
+ static_assert(sizeof(T) == 0);
+ return true;
+ }();
+
+ void fn(int) {}
+ void fn(DoNotDeduct auto) {}
+
+ void call() {
+ fn(0);
+ fn(nullptr); // expected-error at -9 {{static assertion failed due to requirement 'sizeof(std::nullptr_t) == 0'}}
+ }
+}
+
+namespace h_member_call {
+ template<class T>
+ concept DoNotDeduct = []() {
+ static_assert(sizeof(T) == 0);
+ return true;
+ }();
+
+ struct A {
+ void operator=(int) {}
+ void operator=(DoNotDeduct auto) {}
+ };
+
+ void call(A a) {
+ a.operator=(0);
+ a.operator=(nullptr); // expected-error at -11 {{static assertion failed due to requirement 'sizeof(std::nullptr_t) == 0'}}
+ }
+}
+
+namespace hd_free_call {
+ template<class T>
+ concept DoNotDeduct = []() {
+ static_assert(sizeof(T) == 0);
+ return true;
+ }();
+
+ __host__ __device__ void fn(int) {}
+ __host__ __device__ void fn(DoNotDeduct auto) {}
+
+ __host__ __device__ void call() {
+ fn(0); // expected-error at -8 {{static assertion failed due to requirement 'sizeof(int) == 0'}}
+ fn(nullptr); // expected-error at -9 {{static assertion failed due to requirement 'sizeof(std::nullptr_t) == 0'}}
+ }
+}
+
+namespace hd_member_call {
+ template<class T>
+ concept DoNotDeduct = []() {
+ static_assert(sizeof(T) == 0);
+ return true;
+ }();
+
+ struct A {
+ __host__ __device__ void operator=(int) {}
+ __host__ __device__ void operator=(DoNotDeduct auto) {}
+ };
+
+ __host__ __device__ void call(A a) {
+ a.operator=(0); // expected-error at -10 {{static assertion failed due to requirement 'sizeof(int) == 0'}}
+ a.operator=(nullptr); // expected-error at -11 {{static assertion failed due to requirement 'sizeof(std::nullptr_t) == 0'}}
+ }
+}
>From 9ab4f7951223d546d7d9cd808e8f6bc9ee804a51 Mon Sep 17 00:00:00 2001
From: NewSigma <NewSigma at 163.com>
Date: Tue, 9 Jun 2026 20:56:16 +0800
Subject: [PATCH 2/2] Add test for conversion path
---
clang/test/SemaCUDA/pr200545.cu | 36 +++++++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)
diff --git a/clang/test/SemaCUDA/pr200545.cu b/clang/test/SemaCUDA/pr200545.cu
index 97401d1fefccf..b839cf134ed7a 100644
--- a/clang/test/SemaCUDA/pr200545.cu
+++ b/clang/test/SemaCUDA/pr200545.cu
@@ -38,6 +38,24 @@ namespace h_member_call {
}
}
+namespace h_conversion_call {
+ template<class T>
+ concept DoNotDeduct = []() {
+ static_assert(sizeof(T) == 0);
+ return true;
+ }();
+
+ struct A {
+ operator int();
+ template<DoNotDeduct T> operator T();
+ };
+
+ void call(A a) {
+ switch (a) {}
+ (void)float(a); // expected-error at -11 {{static assertion failed due to requirement 'sizeof(float) == 0'}}
+ }
+}
+
namespace hd_free_call {
template<class T>
concept DoNotDeduct = []() {
@@ -71,3 +89,21 @@ namespace hd_member_call {
a.operator=(nullptr); // expected-error at -11 {{static assertion failed due to requirement 'sizeof(std::nullptr_t) == 0'}}
}
}
+
+namespace hd_conversion_call {
+ template<class T>
+ concept DoNotDeduct = []() {
+ static_assert(sizeof(T) == 0);
+ return true;
+ }();
+
+ struct A {
+ __host__ __device__ operator int();
+ template<DoNotDeduct T> __host__ __device__ operator T();
+ };
+
+ __host__ __device__ void call(A a) {
+ switch (a) {} // expected-error at -10 {{static assertion failed due to requirement 'sizeof(int) == 0'}}
+ (void)float(a); // expected-error at -11 {{static assertion failed due to requirement 'sizeof(float) == 0'}}
+ }
+}
More information about the cfe-commits
mailing list