[clang] [CUDA][HIP] Fix init var diag in temmplate (PR #69081)

Yaxun Liu via cfe-commits cfe-commits at lists.llvm.org
Sat Oct 14 16:06:27 PDT 2023


https://github.com/yxsamliu created https://github.com/llvm/llvm-project/pull/69081

Currently clang diagnoses the following code:
(https://godbolt.org/z/s8zK3E5P5) but nvcc
does not.

`
struct A {
   constexpr A(){}
};

struct  B {
  A a;
  int b;
};

template<typename T>
__global__ void kernel( )
{
   __shared__ B x;
}
`

Clang generates an implicit trivial ctor for struct B, which should be allowed for initializing a shared variable.

However, the body of the ctor is defined only if the template kernel is instantiated. Clang checks the initialization of variable in non-instantiated templates, where it cannot find the body of the ctor, therefore diagnoses it.

This patch skips the check for non-instantiated templates.

>From 0b0833af4494276e11b6c9e881dbf8e09491b5bd Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu at amd.com>
Date: Sat, 14 Oct 2023 17:37:29 -0400
Subject: [PATCH] [CUDA][HIP] Fix init var diag in temmplate

Currently clang diagnoses the following code:
(https://godbolt.org/z/s8zK3E5P5) but nvcc
does not.

`
struct A {
   constexpr A(){}
};

struct  B {
  A a;
  int b;
};

template<typename T>
__global__ void kernel( )
{
   __shared__ B x;
}
`

Clang generates an implicit trivial ctor for struct B, which
should be allowed for initializing a shared variable.

However, the body of the ctor is defined only if the template
kernel is instantiated. Clang checks the initialization
of variable in non-instantiated templates, where it cannot
find the body of the ctor, therefore diagnoses it.

This patch skips the check for non-instantiated templates.
---
 clang/lib/Sema/SemaCUDA.cpp                   |  7 +++
 .../test/SemaCUDA/Inputs/cuda-initializers.h  | 11 +++++
 clang/test/SemaCUDA/device-var-init.cu        | 48 +++++++++++++++++++
 3 files changed, 66 insertions(+)

diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 7c4083e4ec4d4bb..d993499cf4a6e6e 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -632,6 +632,13 @@ bool HasAllowedCUDADeviceStaticInitializer(Sema &S, VarDecl *VD,
 } // namespace
 
 void Sema::checkAllowedCUDAInitializer(VarDecl *VD) {
+  // Return early if VD is inside a non-instantiated template function since
+  // the implicit constructor is not defined yet.
+  if (const FunctionDecl *FD =
+          dyn_cast_or_null<FunctionDecl>(VD->getDeclContext()))
+    if (FD->isDependentContext())
+      return;
+
   // Do not check dependent variables since the ctor/dtor/initializer are not
   // determined. Do it after instantiation.
   if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage() ||
diff --git a/clang/test/SemaCUDA/Inputs/cuda-initializers.h b/clang/test/SemaCUDA/Inputs/cuda-initializers.h
index 837b726a13e0f4b..b1e7a1bd48fb576 100644
--- a/clang/test/SemaCUDA/Inputs/cuda-initializers.h
+++ b/clang/test/SemaCUDA/Inputs/cuda-initializers.h
@@ -143,3 +143,14 @@ struct T_F_NED {
 struct T_FA_NED {
   NED ned[2];
 };
+
+// contexpr empty ctor -- allowed
+struct CEEC {
+  constexpr CEEC() {}
+};
+
+// Compiler generated trivial ctor -- allowed
+struct CGTC {
+  CEEC ceec;
+  int a;
+};
diff --git a/clang/test/SemaCUDA/device-var-init.cu b/clang/test/SemaCUDA/device-var-init.cu
index 9d499bddbe1b31a..ee7a9e2276f2df0 100644
--- a/clang/test/SemaCUDA/device-var-init.cu
+++ b/clang/test/SemaCUDA/device-var-init.cu
@@ -31,6 +31,14 @@ __device__ ECD d_ecd_i{};
 __shared__ ECD s_ecd_i{};
 __constant__ ECD c_ecd_i{};
 
+__device__ CEEC d_ceec;
+__shared__ CEEC s_ceec;
+__constant__ CEEC c_ceec;
+
+__device__ CGTC d_cgtc;
+__shared__ CGTC s_cgtc;
+__constant__ CGTC c_cgtc;
+
 __device__ EC d_ec_i(3);
 // expected-error at -1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables.}}
 __shared__ EC s_ec_i(3);
@@ -213,6 +221,17 @@ __device__ void df_sema() {
   static const __device__ int cds = 1;
   static const __constant__ int cdc = 1;
 
+  for (int i = 0; i < 10; i++) {
+    static __device__ CEEC sd_ceec;
+    static __shared__ CEEC ss_ceec;
+    static __constant__ CEEC sc_ceec;
+    __shared__ CEEC s_ceec;
+
+    static __device__ CGTC sd_cgtc;
+    static __shared__ CGTC ss_cgtc;
+    static __constant__ CGTC sc_cgtc;
+    __shared__ CGTC s_cgtc;
+  }
 
   // __shared__ does not need to be explicitly static.
   __shared__ int lsi;
@@ -431,6 +450,35 @@ template <typename T>
 __global__ void bar() {
   __shared__ T bad;
 // expected-error at -1 {{initialization is not supported for __shared__ variables.}}
+  for (int i = 0; i < 10; i++) {
+    static __device__ CEEC sd_ceec;
+    static __shared__ CEEC ss_ceec;
+    static __constant__ CEEC sc_ceec;
+    __shared__ CEEC s_ceec;
+
+    static __device__ CGTC sd_cgtc;
+    static __shared__ CGTC ss_cgtc;
+    static __constant__ CGTC sc_cgtc;
+    __shared__ CGTC s_cgtc;
+  }
+}
+
+// Check specialization of template function.
+template <>
+__global__ void bar<int>() {
+  __shared__ NontrivialInitializer bad;
+// expected-error at -1 {{initialization is not supported for __shared__ variables.}}
+  for (int i = 0; i < 10; i++) {
+    static __device__ CEEC sd_ceec;
+    static __shared__ CEEC ss_ceec;
+    static __constant__ CEEC sc_ceec;
+    __shared__ CEEC s_ceec;
+
+    static __device__ CGTC sd_cgtc;
+    static __shared__ CGTC ss_cgtc;
+    static __constant__ CGTC sc_cgtc;
+    __shared__ CGTC s_cgtc;
+  }
 }
 
 void instantiate() {



More information about the cfe-commits mailing list