[clang] 9857570 - [CUDA][HIP] Fix device template variables

Yaxun Liu via cfe-commits cfe-commits at lists.llvm.org
Wed May 12 08:14:15 PDT 2021


Author: Yaxun (Sam) Liu
Date: 2021-05-12T11:13:29-04:00
New Revision: 98575708da9544ccab8939fece9c3d638a32f09f

URL: https://github.com/llvm/llvm-project/commit/98575708da9544ccab8939fece9c3d638a32f09f
DIFF: https://github.com/llvm/llvm-project/commit/98575708da9544ccab8939fece9c3d638a32f09f.diff

LOG: [CUDA][HIP] Fix device template variables

Currently clang does not emit device template variables
instantiated only in host functions, however, nvcc is
able to do that:

https://godbolt.org/z/fneEfferY

This patch fixes this issue by refactoring and extending
the existing mechanism for emitting static device
var ODR-used by host only. Basically clang records
device variables ODR-used by host code and force
them to be emitted in device compilation. The existing
mechanism makes sure these device variables ODR-used
by host code are added to llvm.compiler-used, therefore
they are guaranteed not to be deleted.

It also fixes non-ODR-use of static device variable by host code
causing static device variable to be emitted and registered,
which should not.

Reviewed by: Artem Belevich

Differential Revision: https://reviews.llvm.org/D102237

Added: 
    

Modified: 
    clang/include/clang/AST/ASTContext.h
    clang/lib/AST/ASTContext.cpp
    clang/lib/CodeGen/CGCUDANV.cpp
    clang/lib/CodeGen/CodeGenModule.cpp
    clang/lib/Sema/SemaExpr.cpp
    clang/test/CodeGenCUDA/device-stub.cu
    clang/test/CodeGenCUDA/host-used-device-var.cu
    clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
    clang/test/CodeGenCUDA/static-device-var-rdc.cu

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index bef793831c6b2..6ebdca06d58ff 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -1064,8 +1064,8 @@ class ASTContext : public RefCountedBase<ASTContext> {
   // Implicitly-declared type 'struct _GUID'.
   mutable TagDecl *MSGuidTagDecl = nullptr;
 
-  /// Keep track of CUDA/HIP static device variables referenced by host code.
-  llvm::DenseSet<const VarDecl *> CUDAStaticDeviceVarReferencedByHost;
+  /// Keep track of CUDA/HIP device-side variables ODR-used by host code.
+  llvm::DenseSet<const VarDecl *> CUDADeviceVarODRUsedByHost;
 
   ASTContext(LangOptions &LOpts, SourceManager &SM, IdentifierTable &idents,
              SelectorTable &sels, Builtin::Context &builtins);

diff  --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 8941d563768d6..6eb8da7411237 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -11635,7 +11635,7 @@ bool ASTContext::mayExternalizeStaticVar(const Decl *D) const {
 bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const {
   return mayExternalizeStaticVar(D) &&
          (D->hasAttr<HIPManagedAttr>() ||
-          CUDAStaticDeviceVarReferencedByHost.count(cast<VarDecl>(D)));
+          CUDADeviceVarODRUsedByHost.count(cast<VarDecl>(D)));
 }
 
 StringRef ASTContext::getCUIDHash() const {

diff  --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 1cd718d2e22fc..995b6a0b5fec6 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -1015,10 +1015,14 @@ void CGNVCUDARuntime::handleVarRegistration(const VarDecl *D,
     // Don't register a C++17 inline variable. The local symbol can be
     // discarded and referencing a discarded local symbol from outside the
     // comdat (__cuda_register_globals) is disallowed by the ELF spec.
-    // TODO: Reject __device__ constexpr and __device__ inline in Sema.
+    //
     // HIP managed variables need to be always recorded in device and host
     // compilations for transformation.
+    //
+    // HIP managed variables and variables in CUDADeviceVarODRUsedByHost are
+    // added to llvm.compiler-used, therefore they are safe to be registered.
     if ((!D->hasExternalStorage() && !D->isInline()) ||
+        CGM.getContext().CUDADeviceVarODRUsedByHost.contains(D) ||
         D->hasAttr<HIPManagedAttr>()) {
       registerDeviceVar(D, GV, !D->hasDefinition(),
                         D->hasAttr<CUDAConstantAttr>());

diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index c61da1c980a2e..e3c843c8e9d37 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2362,8 +2362,8 @@ void CodeGenModule::EmitDeferred() {
   }
 
   // Emit CUDA/HIP static device variables referenced by host code only.
-  if (getLangOpts().CUDA)
-    for (auto V : getContext().CUDAStaticDeviceVarReferencedByHost)
+  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
+    for (const auto *V : getContext().CUDADeviceVarODRUsedByHost)
       DeferredDeclsToEmit.push_back(V);
 
   // Stop if we're out of both deferred vtables and deferred declarations.

diff  --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 1390c17de9ece..719161fb9ba1e 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -17136,10 +17136,7 @@ MarkVarDeclODRUsed(VarDecl *Var, SourceLocation Loc, Sema &SemaRef,
     CaptureType, DeclRefType,
     FunctionScopeIndexToStopAt);
 
-  // Diagnose ODR-use of host global variables in device functions. Reference
-  // of device global variables in host functions is allowed through shadow
-  // variables therefore it is not diagnosed.
-  if (SemaRef.LangOpts.CUDA && SemaRef.LangOpts.CUDAIsDevice) {
+  if (SemaRef.LangOpts.CUDA) {
     auto *FD = dyn_cast_or_null<FunctionDecl>(SemaRef.CurContext);
     auto Target = SemaRef.IdentifyCUDATarget(FD);
     auto IsEmittedOnDeviceSide = [](VarDecl *Var) {
@@ -17155,9 +17152,28 @@ MarkVarDeclODRUsed(VarDecl *Var, SourceLocation Loc, Sema &SemaRef,
       }
       return false;
     };
-    if (Var && Var->hasGlobalStorage() && !IsEmittedOnDeviceSide(Var)) {
-      SemaRef.targetDiag(Loc, diag::err_ref_bad_target)
-          << /*host*/ 2 << /*variable*/ 1 << Var << Target;
+    if (Var && Var->hasGlobalStorage()) {
+      if (!IsEmittedOnDeviceSide(Var)) {
+        // Diagnose ODR-use of host global variables in device functions.
+        // Reference of device global variables in host functions is allowed
+        // through shadow variables therefore it is not diagnosed.
+        if (SemaRef.LangOpts.CUDAIsDevice)
+          SemaRef.targetDiag(Loc, diag::err_ref_bad_target)
+              << /*host*/ 2 << /*variable*/ 1 << Var << Target;
+      } else if ((Target == Sema::CFT_Host || Target == Sema::CFT_HostDevice) &&
+                 !Var->hasExternalStorage()) {
+        // Record a CUDA/HIP device side variable if it is ODR-used
+        // by host code. This is done conservatively, when the variable is
+        // referenced in any of the following contexts:
+        //   - a non-function context
+        //   - a host function
+        //   - a host device function
+        // This makes the ODR-use of the device side variable by host code to
+        // be visible in the device compilation for the compiler to be able to
+        // emit template variables instantiated by host code only and to
+        // externalize the static device side variable ODR-used by host code.
+        SemaRef.getASTContext().CUDADeviceVarODRUsedByHost.insert(Var);
+      }
     }
   }
 
@@ -18323,24 +18339,6 @@ static void DoMarkVarDeclReferenced(Sema &SemaRef, SourceLocation Loc,
   if (Var->isInvalidDecl())
     return;
 
-  // Record a CUDA/HIP static device/constant variable if it is referenced
-  // by host code. This is done conservatively, when the variable is referenced
-  // in any of the following contexts:
-  //   - a non-function context
-  //   - a host function
-  //   - a host device function
-  // This also requires the reference of the static device/constant variable by
-  // host code to be visible in the device compilation for the compiler to be
-  // able to externalize the static device/constant variable.
-  if (SemaRef.getASTContext().mayExternalizeStaticVar(Var)) {
-    auto *CurContext = SemaRef.CurContext;
-    if (!CurContext || !isa<FunctionDecl>(CurContext) ||
-        cast<FunctionDecl>(CurContext)->hasAttr<CUDAHostAttr>() ||
-        (!cast<FunctionDecl>(CurContext)->hasAttr<CUDADeviceAttr>() &&
-         !cast<FunctionDecl>(CurContext)->hasAttr<CUDAGlobalAttr>()))
-      SemaRef.getASTContext().CUDAStaticDeviceVarReferencedByHost.insert(Var);
-  }
-
   auto *MSI = Var->getMemberSpecializationInfo();
   TemplateSpecializationKind TSK = MSI ? MSI->getTemplateSpecializationKind()
                                        : Var->getTemplateSpecializationKind();

diff  --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu
index 9bac4e81a54a2..e2f32adaf032c 100644
--- a/clang/test/CodeGenCUDA/device-stub.cu
+++ b/clang/test/CodeGenCUDA/device-stub.cu
@@ -107,9 +107,14 @@ __constant__ int ext_constant_var_def = 2;
 #if __cplusplus > 201402L
 // NORDC17: @inline_var = internal global i32 undef, comdat, align 4{{$}}
 // RDC17: @inline_var = linkonce_odr global i32 undef, comdat, align 4{{$}}
+// NORDC17-NOT: @inline_var2 =
+// RDC17-NOT: @inline_var2 =
 // NORDC17: @_ZN1C17member_inline_varE = internal constant i32 undef, comdat, align 4{{$}}
 // RDC17: @_ZN1C17member_inline_varE = linkonce_odr constant i32 undef, comdat, align 4{{$}}
+// Check inline variable ODR-used by host is emitted on host and registered.
 __device__ inline int inline_var = 3;
+// Check inline variable not ODR-used by host is not emitted on host or registered.
+__device__ inline int inline_var2 = 5;
 struct C {
   __device__ static constexpr int member_inline_var = 4;
 };
@@ -126,10 +131,17 @@ void use_pointers() {
   p = &ext_host_var;
 #if __cplusplus > 201402L
   p = &inline_var;
+  decltype(inline_var2) tmp;
   p = &C::member_inline_var;
 #endif
 }
 
+__device__ void device_use() {
+#if __cplusplus > 201402L
+  const int *p = &inline_var2;
+#endif
+}
+
 // Make sure that all parts of GPU code init/cleanup are there:
 // * constant unnamed string with the device-side kernel name to be passed to
 //   __hipRegisterFunction/__cudaRegisterFunction.
@@ -212,7 +224,8 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
 // ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{[^,]*}}, {{[^@]*}}@2, {{.*}}i32 0, {{i32|i64}} 4, i32 1, i32 0
 // ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var_def{{[^,]*}}, {{[^@]*}}@3, {{.*}}i32 0, {{i32|i64}} 4, i32 0, i32 0
 // ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var_def{{[^,]*}}, {{[^@]*}}@4, {{.*}}i32 0, {{i32|i64}} 4, i32 1, i32 0
-// LNX_17-NOT: [[PREFIX]]RegisterVar(i8** %0, {{.*}}inline_var
+// LNX_17-DAG: [[PREFIX]]RegisterVar(i8** %0, {{.*}}inline_var
+// LNX_17-NOT: [[PREFIX]]RegisterVar(i8** %0, {{.*}}inline_var2
 // ALL: ret void
 
 // Test that we've built a constructor.

diff  --git a/clang/test/CodeGenCUDA/host-used-device-var.cu b/clang/test/CodeGenCUDA/host-used-device-var.cu
index fd501ed1f2fd7..b12300b73e192 100644
--- a/clang/test/CodeGenCUDA/host-used-device-var.cu
+++ b/clang/test/CodeGenCUDA/host-used-device-var.cu
@@ -1,47 +1,95 @@
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
-// RUN:   -std=c++11 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \
-// RUN:   | FileCheck %s
+// RUN:   -std=c++17 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \
+// RUN:   | FileCheck -check-prefix=DEV %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x hip %s \
+// RUN:   -std=c++17 -O3 -emit-llvm -o - | FileCheck -check-prefix=HOST %s
+
+// Negative tests.
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
+// RUN:   -std=c++17 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \
+// RUN:   | FileCheck -check-prefix=DEV-NEG %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x hip %s \
+// RUN:   -std=c++17 -O3 -emit-llvm -o - | FileCheck -check-prefix=HOST-NEG %s
 
 #include "Inputs/cuda.h"
 
 // Check device variables used by neither host nor device functioins are not kept.
 
-// CHECK-NOT: @v1
+// DEV-NEG-NOT: @v1
 __device__ int v1;
 
-// CHECK-NOT: @v2
+// DEV-NEG-NOT: @v2
 __constant__ int v2;
 
-// CHECK-NOT: @_ZL2v3
+// DEV-NEG-NOT: @_ZL2v3
 static __device__ int v3;
 
 // Check device variables used by host functions are kept.
 
-// CHECK-DAG: @u1
+// DEV-DAG: @u1
 __device__ int u1;
 
-// CHECK-DAG: @u2
+// DEV-DAG: @u2
 __constant__ int u2;
 
 // Check host-used static device var is in llvm.compiler.used.
-// CHECK-DAG: @_ZL2u3
+// DEV-DAG: @_ZL2u3
 static __device__ int u3;
 
 // Check device-used static device var is emitted but is not in llvm.compiler.used.
-// CHECK-DAG: @_ZL2u4
+// DEV-DAG: @_ZL2u4
 static __device__ int u4;
 
 // Check device variables with used attribute are always kept.
-// CHECK-DAG: @u5
+// DEV-DAG: @u5
 __device__ __attribute__((used)) int u5;
 
-int fun1() {
-  return u1 + u2 + u3;
+// Test external device variable ODR-used by host code is not emitted or registered.
+// DEV-NEG-NOT: @ext_var
+extern __device__ int ext_var;
+
+// DEV-DAG: @inline_var = linkonce_odr addrspace(1) externally_initialized global i32 0
+__device__ inline int inline_var;
+
+template<typename T>
+using func_t = T (*) (T, T);
+
+template <typename T>
+__device__ T add_func (T x, T y)
+{
+  return x + y;
+}
+
+// DEV-DAG: @_Z10p_add_funcIiE = linkonce_odr addrspace(1) externally_initialized global i32 (i32, i32)* @_Z8add_funcIiET_S0_S0_
+template <typename T>
+__device__ func_t<T> p_add_func = add_func<T>;
+
+void use(func_t<int> p);
+void use(int *p);
+
+void fun1() {
+  use(&u1);
+  use(&u2);
+  use(&u3);
+  use(&ext_var);
+  use(&inline_var);
+  use(p_add_func<int>);
 }
 
 __global__ void kern1(int **x) {
   *x = &u4;
 }
+
 // Check the exact list of variables to ensure @_ZL2u4 is not among them.
-// CHECK: @llvm.compiler.used = {{[^@]*}} @_ZL2u3 {{[^@]*}} @u1 {{[^@]*}} @u2 {{[^@]*}} @u5
+// DEV: @llvm.compiler.used = {{[^@]*}} @_Z10p_add_funcIiE {{[^@]*}} @_ZL2u3 {{[^@]*}} @inline_var {{[^@]*}} @u1 {{[^@]*}} @u2 {{[^@]*}} @u5
+
+// HOST-DAG: hipRegisterVar{{.*}}@u1
+// HOST-DAG: hipRegisterVar{{.*}}@u2
+// HOST-DAG: hipRegisterVar{{.*}}@_ZL2u3
+// HOST-DAG: hipRegisterVar{{.*}}@u5
+// HOST-DAG: hipRegisterVar{{.*}}@inline_var
+// HOST-DAG: hipRegisterVar{{.*}}@_Z10p_add_funcIiE
+// HOST-NEG-NOT: hipRegisterVar{{.*}}@ext_var
+// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZL2u4

diff  --git a/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu b/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
index f2cd17389d5f7..2cfed3ae79979 100644
--- a/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
+++ b/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
@@ -2,12 +2,18 @@
 // REQUIRES: amdgpu-registered-target
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \
-// RUN:   -emit-llvm -o - -x hip %s | FileCheck \
-// RUN:   -check-prefixes=DEV %s
+// RUN:   -emit-llvm -o - -x hip %s | FileCheck -check-prefix=DEV %s
 
 // RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
-// RUN:   -emit-llvm -o - -x hip %s | FileCheck \
-// RUN:   -check-prefixes=HOST %s
+// RUN:   -emit-llvm -o - -x hip %s | FileCheck -check-prefix=HOST %s
+
+// Negative tests.
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \
+// RUN:   -emit-llvm -o - -x hip %s | FileCheck -check-prefix=DEV-NEG %s
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
+// RUN:   -emit-llvm -o - -x hip %s | FileCheck -check-prefix=HOST-NEG %s
 
 #include "Inputs/cuda.h"
 
@@ -52,7 +58,7 @@ static __constant__ int y;
 
 // Test static host variable, which should not be externalized nor registered.
 // HOST-DAG: @_ZL1z = internal global i32 0
-// DEV-NOT: @_ZL1z
+// DEV-NEG-NOT: @_ZL1z
 static int z;
 
 // Test implicit static constant variable, which should not be externalized.
@@ -72,6 +78,12 @@ static constexpr int z2 = 456;
 
 static __device__ int w;
 
+// Test non-ODR-use of static device var should not be emitted or registered.
+// DEV-NEG-NOT: @_ZL1u
+// HOST-NEG-NOT: @_ZL1u
+
+static __device__ int u;
+
 inline __device__ void devfun(const int ** b) {
   const static int p = 2;
   b[0] = &p;
@@ -88,6 +100,7 @@ __global__ void kernel(int *a, const int **b) {
   a[3] = x3;
   a[4] = x4;
   a[5] = x5;
+  a[6] = sizeof(u);
   b[0] = &w;
   b[1] = &z2;
   b[2] = &local_static_constant;
@@ -108,10 +121,12 @@ void foo(const int **a) {
   getDeviceSymbol(&w);
   z = 123;
   a[0] = &z2;
+  decltype(u) tmp;
 }
 
-// HOST: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]]
-// HOST: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]]
-// HOST: __hipRegisterVar({{.*}}@_ZL1w {{.*}}@[[DEVNAMEW]]
-// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w
-// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p
+// HOST-DAG: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]]
+// HOST-DAG: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]]
+// HOST-DAG: __hipRegisterVar({{.*}}@_ZL1w {{.*}}@[[DEVNAMEW]]
+// HOST-NEG-NOT: __hipRegisterVar({{.*}}@_ZL1u
+// HOST-NEG-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w
+// HOST-NEG-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p

diff  --git a/clang/test/CodeGenCUDA/static-device-var-rdc.cu b/clang/test/CodeGenCUDA/static-device-var-rdc.cu
index eac985fb70de6..aa5b510026a87 100644
--- a/clang/test/CodeGenCUDA/static-device-var-rdc.cu
+++ b/clang/test/CodeGenCUDA/static-device-var-rdc.cu
@@ -2,19 +2,19 @@
 // REQUIRES: amdgpu-registered-target
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
-// RUN:   -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
 // RUN:   -check-prefixes=DEV,INT-DEV %s
 
 // RUN: %clang_cc1 -triple x86_64-gnu-linux \
-// RUN:   -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
 // RUN:   -check-prefixes=HOST,INT-HOST %s
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \
-// RUN:   -fgpu-rdc -emit-llvm -o - -x hip %s > %t.dev
+// RUN:   -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.dev
 // RUN: cat %t.dev | FileCheck -check-prefixes=DEV,EXT-DEV %s
 
 // RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
-// RUN:   -fgpu-rdc -emit-llvm -o - -x hip %s > %t.host
+// RUN:   -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.host
 // RUN: cat %t.host | FileCheck -check-prefixes=HOST,EXT-HOST %s
 
 // Check host and device compilations use the same postfixes for static
@@ -22,6 +22,25 @@
 
 // RUN: cat %t.dev %t.host | FileCheck -check-prefix=POSTFIX %s
 
+// Negative tests.
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN:   -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -check-prefix=DEV-NEG %s
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux \
+// RUN:   -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -check-prefix=HOST-NEG %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \
+// RUN:   -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.dev
+// RUN: cat %t.dev | FileCheck -check-prefix=DEV-NEG %s
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
+// RUN:   -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.host
+// RUN: cat %t.host | FileCheck -check-prefix=HOST-NEG %s
+
+
 #include "Inputs/cuda.h"
 
 // Test function scope static device variable, which should not be externalized.
@@ -61,9 +80,14 @@ static __constant__ int y;
 
 // Test static host variable, which should not be externalized nor registered.
 // HOST-DAG: @_ZL1z = internal global i32 0
-// DEV-NOT: @_ZL1z
+// DEV-NEG-NOT: @_ZL1z
 static int z;
 
+// Test non-ODR-use of static device variable is not emitted or registered.
+// DEV-NEG-NOT: @_ZL1u
+// HOST-NEG-NOT: @_ZL1u
+static __device__ int u;
+
 // Test static device variable in inline function, which should not be
 // externalized nor registered.
 // DEV-DAG: @_ZZ6devfunPPKiE1p = linkonce_odr addrspace(4) constant i32 2, comdat
@@ -77,6 +101,7 @@ __global__ void kernel(int *a, const int **b) {
   const static int w = 1;
   a[0] = x;
   a[1] = y;
+  a[2] = sizeof(u);
   b[0] = &w;
   b[1] = &x2;
   devfun(b);
@@ -88,10 +113,12 @@ void foo() {
   getDeviceSymbol(&x);
   getDeviceSymbol(&y);
   z = 123;
+  decltype(u) tmp;
 }
 
-// HOST: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]]
-// HOST: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]]
-// HOST-NOT: __hipRegisterVar({{.*}}@_ZL2x2
-// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w
-// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p
+// HOST-DAG: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]]
+// HOST-DAG: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]]
+// HOST-NEG-NOT: __hipRegisterVar({{.*}}@_ZL2x2
+// HOST-NEG-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w
+// HOST-NEG-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p
+// HOST-NEG-NOT: __hipRegisterVar({{.*}}@_ZL1u


        


More information about the cfe-commits mailing list