[clang] [CUDA] Fix CTAD for host/device constructors (PR #168711)
Yaxun Liu via cfe-commits
cfe-commits at lists.llvm.org
Wed Nov 19 06:24:32 PST 2025
https://github.com/yxsamliu created https://github.com/llvm/llvm-project/pull/168711
Currently Clang does not allow using CTAD in CUDA/HIP device functions since deduction guides are treated as host functions. This patch fixes that by treating deduction guides as host+device. The rationale is that deduction guides do not actually generate code in IR, and there is an existing check for device/host correctness for constructors.
Also suppress duplicate implicit deduction guides from host/device constructors with identical signatures to prevent ambiguity.
This ensures CTAD works correctly in CUDA/HIP for constructors with different target attributes.
Fixes: https://github.com/ROCm/ROCm/issues/5646
Fixes: https://github.com/llvm/llvm-project/issues/146646
>From bc727154da394d2835bd28dc72eb7f43ee8bc3fe Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu at amd.com>
Date: Wed, 19 Nov 2025 00:16:46 -0500
Subject: [PATCH] [CUDA] Fix CTAD for host/device constructors
Currently Clang does not allow using CTAD in CUDA/HIP device functions
since deduction guides are treated as host functions. This patch fixes
that by treating deduction guides as host+device. The rationale is that
deduction guides do not actually generate code in IR, and there is an
existing check for device/host correctness for constructors.
Also suppress duplicate implicit deduction guides from host/device
constructors with identical signatures to prevent ambiguity.
This ensures CTAD works correctly in CUDA/HIP for constructors with
different target attributes.
Fixes: https://github.com/ROCm/ROCm/issues/5646
Fixes: https://github.com/llvm/llvm-project/issues/146646
---
clang/lib/Sema/SemaCUDA.cpp | 12 +++++
clang/lib/Sema/SemaTemplateDeductionGuide.cpp | 28 ++++++++++-
clang/test/SemaCUDA/deduction-guide.cu | 47 +++++++++++++++++++
3 files changed, 85 insertions(+), 2 deletions(-)
create mode 100644 clang/test/SemaCUDA/deduction-guide.cu
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 31735a0f5feb3..8d1e03c8bc571 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -137,6 +137,12 @@ CUDAFunctionTarget SemaCUDA::IdentifyTarget(const FunctionDecl *D,
if (D == nullptr)
return CurCUDATargetCtx.Target;
+ // C++ deduction guides are never codegen'ed and only participate in template
+ // argument deduction. Treat them as if they were always host+device so that
+ // CUDA/HIP target checking never rejects their use based solely on target.
+ if (isa<CXXDeductionGuideDecl>(D))
+ return CUDAFunctionTarget::HostDevice;
+
if (D->hasAttr<CUDAInvalidTargetAttr>())
return CUDAFunctionTarget::InvalidTarget;
@@ -907,6 +913,12 @@ bool SemaCUDA::CheckCall(SourceLocation Loc, FunctionDecl *Callee) {
if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated())
return true;
+ // C++ deduction guides participate in overload resolution but are not
+ // callable functions and are never codegen'ed. Treat them as always
+ // allowed for CUDA/HIP compatibility checking.
+ if (isa<CXXDeductionGuideDecl>(Callee))
+ return true;
+
// FIXME: Is bailing out early correct here? Should we instead assume that
// the caller is a global initializer?
FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
diff --git a/clang/lib/Sema/SemaTemplateDeductionGuide.cpp b/clang/lib/Sema/SemaTemplateDeductionGuide.cpp
index bfb10665c25b1..f91d84916fa3e 100644
--- a/clang/lib/Sema/SemaTemplateDeductionGuide.cpp
+++ b/clang/lib/Sema/SemaTemplateDeductionGuide.cpp
@@ -218,9 +218,33 @@ buildDeductionGuide(Sema &SemaRef, TemplateDecl *OriginalTemplate,
TInfo->getTypeLoc().castAs<FunctionProtoTypeLoc>().getParams();
// Build the implicit deduction guide template.
+ QualType GuideType = TInfo->getType();
+
+ // In CUDA/HIP mode, avoid creating duplicate implicit deduction guides with
+ // identical function types. This can happen when there are separate
+ // __host__ and __device__ constructors with the same signature; each would
+ // otherwise synthesize its own implicit deduction guide, leading to
+ // ambiguous CTAD purely due to target attributes. For such cases we keep the
+ // first guide we created and skip building another one.
+ if (IsImplicit && Ctor && SemaRef.getLangOpts().CUDA) {
+ for (NamedDecl *Existing : DC->lookup(DeductionGuideName)) {
+ auto *ExistingFT = dyn_cast<FunctionTemplateDecl>(Existing);
+ auto *ExistingGuide =
+ ExistingFT
+ ? dyn_cast<CXXDeductionGuideDecl>(ExistingFT->getTemplatedDecl())
+ : dyn_cast<CXXDeductionGuideDecl>(Existing);
+ if (!ExistingGuide)
+ continue;
+
+ if (SemaRef.Context.hasSameType(ExistingGuide->getType(), GuideType)) {
+ return Existing;
+ }
+ }
+ }
+
auto *Guide = CXXDeductionGuideDecl::Create(
- SemaRef.Context, DC, LocStart, ES, Name, TInfo->getType(), TInfo, LocEnd,
- Ctor, DeductionCandidate::Normal, FunctionTrailingRC);
+ SemaRef.Context, DC, LocStart, ES, Name, GuideType, TInfo, LocEnd, Ctor,
+ DeductionCandidate::Normal, FunctionTrailingRC);
Guide->setImplicit(IsImplicit);
Guide->setParams(Params);
diff --git a/clang/test/SemaCUDA/deduction-guide.cu b/clang/test/SemaCUDA/deduction-guide.cu
new file mode 100644
index 0000000000000..30e02f7518053
--- /dev/null
+++ b/clang/test/SemaCUDA/deduction-guide.cu
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -std=c++17 -triple nvptx64-nvidia-cuda -fsyntax-only \
+// RUN: -fcuda-is-device -verify=expected,dev %s
+// RUN: %clang_cc1 -std=c++17 -triple nvptx64-nvidia-cuda -fsyntax-only \
+// RUN: -verify %s
+
+#include "Inputs/cuda.h"
+
+template <class T>
+struct CTADType { // expected-note 2{{candidate constructor (the implicit copy constructor) not viable: requires 1 argument, but 3 were provided}}
+ // expected-note at -1 2{{candidate constructor (the implicit move constructor) not viable: requires 1 argument, but 3 were provided}}
+ T first;
+ T second;
+
+ CTADType(T x) : first(x), second(x) {} // expected-note 2{{candidate constructor not viable: requires single argument 'x', but 3 arguments were provided}}
+ __device__ CTADType(T x) : first(x), second(x) {} // expected-note 2{{candidate constructor not viable: requires single argument 'x', but 3 arguments were provided}}
+ __host__ __device__ CTADType(T x, T y) : first(x), second(y) {} // expected-note 2{{candidate constructor not viable: requires 2 arguments, but 3 were provided}}
+ CTADType(T x, T y, T z) : first(x), second(z) {} // dev-note {{'CTADType' declared here}}
+ // expected-note at -1 {{candidate constructor not viable: call to __host__ function from __device__ function}}
+ // expected-note at -2 {{candidate constructor not viable: call to __host__ function from __global__ function}}
+};
+
+template <class T>
+CTADType(T, T) -> CTADType<T>;
+
+__host__ __device__ void use_ctad_host_device() {
+ CTADType ctad_from_two_args(1, 1);
+ CTADType ctad_from_one_arg(1);
+ CTADType ctad_from_three_args(1, 2, 3); // dev-error {{reference to __host__ function 'CTADType' in __host__ __device__ function}}
+}
+
+__host__ void use_ctad_host() {
+ CTADType ctad_from_two_args(1, 1);
+ CTADType ctad_from_one_arg(1);
+ CTADType ctad_from_three_args(1, 2, 3);
+}
+
+__device__ void use_ctad_device() {
+ CTADType ctad_from_two_args(1, 1);
+ CTADType ctad_from_one_arg(1);
+ CTADType<int> ctad_from_three_args(1, 2, 3); // expected-error {{no matching constructor for initialization of 'CTADType<int>'}}
+}
+
+__global__ void use_ctad_global() {
+ CTADType ctad_from_two_args(1, 1);
+ CTADType ctad_from_one_arg(1);
+ CTADType<int> ctad_from_three_args(1, 2, 3); // expected-error {{no matching constructor for initialization of 'CTADType<int>'}}
+}
More information about the cfe-commits
mailing list