[clang] 9857570 - [CUDA][HIP] Fix device template variables
Yaxun Liu via cfe-commits
cfe-commits at lists.llvm.org
Wed May 12 08:14:15 PDT 2021
Author: Yaxun (Sam) Liu
Date: 2021-05-12T11:13:29-04:00
New Revision: 98575708da9544ccab8939fece9c3d638a32f09f
URL: https://github.com/llvm/llvm-project/commit/98575708da9544ccab8939fece9c3d638a32f09f
DIFF: https://github.com/llvm/llvm-project/commit/98575708da9544ccab8939fece9c3d638a32f09f.diff
LOG: [CUDA][HIP] Fix device template variables
Currently clang does not emit device template variables
instantiated only in host functions, however, nvcc is
able to do that:
https://godbolt.org/z/fneEfferY
This patch fixes this issue by refactoring and extending
the existing mechanism for emitting static device
var ODR-used by host only. Basically clang records
device variables ODR-used by host code and force
them to be emitted in device compilation. The existing
mechanism makes sure these device variables ODR-used
by host code are added to llvm.compiler-used, therefore
they are guaranteed not to be deleted.
It also fixes non-ODR-use of static device variable by host code
causing static device variable to be emitted and registered,
which should not.
Reviewed by: Artem Belevich
Differential Revision: https://reviews.llvm.org/D102237
Added:
Modified:
clang/include/clang/AST/ASTContext.h
clang/lib/AST/ASTContext.cpp
clang/lib/CodeGen/CGCUDANV.cpp
clang/lib/CodeGen/CodeGenModule.cpp
clang/lib/Sema/SemaExpr.cpp
clang/test/CodeGenCUDA/device-stub.cu
clang/test/CodeGenCUDA/host-used-device-var.cu
clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
clang/test/CodeGenCUDA/static-device-var-rdc.cu
Removed:
################################################################################
diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index bef793831c6b2..6ebdca06d58ff 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -1064,8 +1064,8 @@ class ASTContext : public RefCountedBase<ASTContext> {
// Implicitly-declared type 'struct _GUID'.
mutable TagDecl *MSGuidTagDecl = nullptr;
- /// Keep track of CUDA/HIP static device variables referenced by host code.
- llvm::DenseSet<const VarDecl *> CUDAStaticDeviceVarReferencedByHost;
+ /// Keep track of CUDA/HIP device-side variables ODR-used by host code.
+ llvm::DenseSet<const VarDecl *> CUDADeviceVarODRUsedByHost;
ASTContext(LangOptions &LOpts, SourceManager &SM, IdentifierTable &idents,
SelectorTable &sels, Builtin::Context &builtins);
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 8941d563768d6..6eb8da7411237 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -11635,7 +11635,7 @@ bool ASTContext::mayExternalizeStaticVar(const Decl *D) const {
bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const {
return mayExternalizeStaticVar(D) &&
(D->hasAttr<HIPManagedAttr>() ||
- CUDAStaticDeviceVarReferencedByHost.count(cast<VarDecl>(D)));
+ CUDADeviceVarODRUsedByHost.count(cast<VarDecl>(D)));
}
StringRef ASTContext::getCUIDHash() const {
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 1cd718d2e22fc..995b6a0b5fec6 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -1015,10 +1015,14 @@ void CGNVCUDARuntime::handleVarRegistration(const VarDecl *D,
// Don't register a C++17 inline variable. The local symbol can be
// discarded and referencing a discarded local symbol from outside the
// comdat (__cuda_register_globals) is disallowed by the ELF spec.
- // TODO: Reject __device__ constexpr and __device__ inline in Sema.
+ //
// HIP managed variables need to be always recorded in device and host
// compilations for transformation.
+ //
+ // HIP managed variables and variables in CUDADeviceVarODRUsedByHost are
+ // added to llvm.compiler-used, therefore they are safe to be registered.
if ((!D->hasExternalStorage() && !D->isInline()) ||
+ CGM.getContext().CUDADeviceVarODRUsedByHost.contains(D) ||
D->hasAttr<HIPManagedAttr>()) {
registerDeviceVar(D, GV, !D->hasDefinition(),
D->hasAttr<CUDAConstantAttr>());
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index c61da1c980a2e..e3c843c8e9d37 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2362,8 +2362,8 @@ void CodeGenModule::EmitDeferred() {
}
// Emit CUDA/HIP static device variables referenced by host code only.
- if (getLangOpts().CUDA)
- for (auto V : getContext().CUDAStaticDeviceVarReferencedByHost)
+ if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
+ for (const auto *V : getContext().CUDADeviceVarODRUsedByHost)
DeferredDeclsToEmit.push_back(V);
// Stop if we're out of both deferred vtables and deferred declarations.
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 1390c17de9ece..719161fb9ba1e 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -17136,10 +17136,7 @@ MarkVarDeclODRUsed(VarDecl *Var, SourceLocation Loc, Sema &SemaRef,
CaptureType, DeclRefType,
FunctionScopeIndexToStopAt);
- // Diagnose ODR-use of host global variables in device functions. Reference
- // of device global variables in host functions is allowed through shadow
- // variables therefore it is not diagnosed.
- if (SemaRef.LangOpts.CUDA && SemaRef.LangOpts.CUDAIsDevice) {
+ if (SemaRef.LangOpts.CUDA) {
auto *FD = dyn_cast_or_null<FunctionDecl>(SemaRef.CurContext);
auto Target = SemaRef.IdentifyCUDATarget(FD);
auto IsEmittedOnDeviceSide = [](VarDecl *Var) {
@@ -17155,9 +17152,28 @@ MarkVarDeclODRUsed(VarDecl *Var, SourceLocation Loc, Sema &SemaRef,
}
return false;
};
- if (Var && Var->hasGlobalStorage() && !IsEmittedOnDeviceSide(Var)) {
- SemaRef.targetDiag(Loc, diag::err_ref_bad_target)
- << /*host*/ 2 << /*variable*/ 1 << Var << Target;
+ if (Var && Var->hasGlobalStorage()) {
+ if (!IsEmittedOnDeviceSide(Var)) {
+ // Diagnose ODR-use of host global variables in device functions.
+ // Reference of device global variables in host functions is allowed
+ // through shadow variables therefore it is not diagnosed.
+ if (SemaRef.LangOpts.CUDAIsDevice)
+ SemaRef.targetDiag(Loc, diag::err_ref_bad_target)
+ << /*host*/ 2 << /*variable*/ 1 << Var << Target;
+ } else if ((Target == Sema::CFT_Host || Target == Sema::CFT_HostDevice) &&
+ !Var->hasExternalStorage()) {
+ // Record a CUDA/HIP device side variable if it is ODR-used
+ // by host code. This is done conservatively, when the variable is
+ // referenced in any of the following contexts:
+ // - a non-function context
+ // - a host function
+ // - a host device function
+ // This makes the ODR-use of the device side variable by host code to
+ // be visible in the device compilation for the compiler to be able to
+ // emit template variables instantiated by host code only and to
+ // externalize the static device side variable ODR-used by host code.
+ SemaRef.getASTContext().CUDADeviceVarODRUsedByHost.insert(Var);
+ }
}
}
@@ -18323,24 +18339,6 @@ static void DoMarkVarDeclReferenced(Sema &SemaRef, SourceLocation Loc,
if (Var->isInvalidDecl())
return;
- // Record a CUDA/HIP static device/constant variable if it is referenced
- // by host code. This is done conservatively, when the variable is referenced
- // in any of the following contexts:
- // - a non-function context
- // - a host function
- // - a host device function
- // This also requires the reference of the static device/constant variable by
- // host code to be visible in the device compilation for the compiler to be
- // able to externalize the static device/constant variable.
- if (SemaRef.getASTContext().mayExternalizeStaticVar(Var)) {
- auto *CurContext = SemaRef.CurContext;
- if (!CurContext || !isa<FunctionDecl>(CurContext) ||
- cast<FunctionDecl>(CurContext)->hasAttr<CUDAHostAttr>() ||
- (!cast<FunctionDecl>(CurContext)->hasAttr<CUDADeviceAttr>() &&
- !cast<FunctionDecl>(CurContext)->hasAttr<CUDAGlobalAttr>()))
- SemaRef.getASTContext().CUDAStaticDeviceVarReferencedByHost.insert(Var);
- }
-
auto *MSI = Var->getMemberSpecializationInfo();
TemplateSpecializationKind TSK = MSI ? MSI->getTemplateSpecializationKind()
: Var->getTemplateSpecializationKind();
diff --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu
index 9bac4e81a54a2..e2f32adaf032c 100644
--- a/clang/test/CodeGenCUDA/device-stub.cu
+++ b/clang/test/CodeGenCUDA/device-stub.cu
@@ -107,9 +107,14 @@ __constant__ int ext_constant_var_def = 2;
#if __cplusplus > 201402L
// NORDC17: @inline_var = internal global i32 undef, comdat, align 4{{$}}
// RDC17: @inline_var = linkonce_odr global i32 undef, comdat, align 4{{$}}
+// NORDC17-NOT: @inline_var2 =
+// RDC17-NOT: @inline_var2 =
// NORDC17: @_ZN1C17member_inline_varE = internal constant i32 undef, comdat, align 4{{$}}
// RDC17: @_ZN1C17member_inline_varE = linkonce_odr constant i32 undef, comdat, align 4{{$}}
+// Check inline variable ODR-used by host is emitted on host and registered.
__device__ inline int inline_var = 3;
+// Check inline variable not ODR-used by host is not emitted on host or registered.
+__device__ inline int inline_var2 = 5;
struct C {
__device__ static constexpr int member_inline_var = 4;
};
@@ -126,10 +131,17 @@ void use_pointers() {
p = &ext_host_var;
#if __cplusplus > 201402L
p = &inline_var;
+ decltype(inline_var2) tmp;
p = &C::member_inline_var;
#endif
}
+__device__ void device_use() {
+#if __cplusplus > 201402L
+ const int *p = &inline_var2;
+#endif
+}
+
// Make sure that all parts of GPU code init/cleanup are there:
// * constant unnamed string with the device-side kernel name to be passed to
// __hipRegisterFunction/__cudaRegisterFunction.
@@ -212,7 +224,8 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
// ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{[^,]*}}, {{[^@]*}}@2, {{.*}}i32 0, {{i32|i64}} 4, i32 1, i32 0
// ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var_def{{[^,]*}}, {{[^@]*}}@3, {{.*}}i32 0, {{i32|i64}} 4, i32 0, i32 0
// ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var_def{{[^,]*}}, {{[^@]*}}@4, {{.*}}i32 0, {{i32|i64}} 4, i32 1, i32 0
-// LNX_17-NOT: [[PREFIX]]RegisterVar(i8** %0, {{.*}}inline_var
+// LNX_17-DAG: [[PREFIX]]RegisterVar(i8** %0, {{.*}}inline_var
+// LNX_17-NOT: [[PREFIX]]RegisterVar(i8** %0, {{.*}}inline_var2
// ALL: ret void
// Test that we've built a constructor.
diff --git a/clang/test/CodeGenCUDA/host-used-device-var.cu b/clang/test/CodeGenCUDA/host-used-device-var.cu
index fd501ed1f2fd7..b12300b73e192 100644
--- a/clang/test/CodeGenCUDA/host-used-device-var.cu
+++ b/clang/test/CodeGenCUDA/host-used-device-var.cu
@@ -1,47 +1,95 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
-// RUN: -std=c++11 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \
-// RUN: | FileCheck %s
+// RUN: -std=c++17 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \
+// RUN: | FileCheck -check-prefix=DEV %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x hip %s \
+// RUN: -std=c++17 -O3 -emit-llvm -o - | FileCheck -check-prefix=HOST %s
+
+// Negative tests.
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
+// RUN: -std=c++17 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \
+// RUN: | FileCheck -check-prefix=DEV-NEG %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x hip %s \
+// RUN: -std=c++17 -O3 -emit-llvm -o - | FileCheck -check-prefix=HOST-NEG %s
#include "Inputs/cuda.h"
// Check device variables used by neither host nor device functioins are not kept.
-// CHECK-NOT: @v1
+// DEV-NEG-NOT: @v1
__device__ int v1;
-// CHECK-NOT: @v2
+// DEV-NEG-NOT: @v2
__constant__ int v2;
-// CHECK-NOT: @_ZL2v3
+// DEV-NEG-NOT: @_ZL2v3
static __device__ int v3;
// Check device variables used by host functions are kept.
-// CHECK-DAG: @u1
+// DEV-DAG: @u1
__device__ int u1;
-// CHECK-DAG: @u2
+// DEV-DAG: @u2
__constant__ int u2;
// Check host-used static device var is in llvm.compiler.used.
-// CHECK-DAG: @_ZL2u3
+// DEV-DAG: @_ZL2u3
static __device__ int u3;
// Check device-used static device var is emitted but is not in llvm.compiler.used.
-// CHECK-DAG: @_ZL2u4
+// DEV-DAG: @_ZL2u4
static __device__ int u4;
// Check device variables with used attribute are always kept.
-// CHECK-DAG: @u5
+// DEV-DAG: @u5
__device__ __attribute__((used)) int u5;
-int fun1() {
- return u1 + u2 + u3;
+// Test external device variable ODR-used by host code is not emitted or registered.
+// DEV-NEG-NOT: @ext_var
+extern __device__ int ext_var;
+
+// DEV-DAG: @inline_var = linkonce_odr addrspace(1) externally_initialized global i32 0
+__device__ inline int inline_var;
+
+template<typename T>
+using func_t = T (*) (T, T);
+
+template <typename T>
+__device__ T add_func (T x, T y)
+{
+ return x + y;
+}
+
+// DEV-DAG: @_Z10p_add_funcIiE = linkonce_odr addrspace(1) externally_initialized global i32 (i32, i32)* @_Z8add_funcIiET_S0_S0_
+template <typename T>
+__device__ func_t<T> p_add_func = add_func<T>;
+
+void use(func_t<int> p);
+void use(int *p);
+
+void fun1() {
+ use(&u1);
+ use(&u2);
+ use(&u3);
+ use(&ext_var);
+ use(&inline_var);
+ use(p_add_func<int>);
}
__global__ void kern1(int **x) {
*x = &u4;
}
+
// Check the exact list of variables to ensure @_ZL2u4 is not among them.
-// CHECK: @llvm.compiler.used = {{[^@]*}} @_ZL2u3 {{[^@]*}} @u1 {{[^@]*}} @u2 {{[^@]*}} @u5
+// DEV: @llvm.compiler.used = {{[^@]*}} @_Z10p_add_funcIiE {{[^@]*}} @_ZL2u3 {{[^@]*}} @inline_var {{[^@]*}} @u1 {{[^@]*}} @u2 {{[^@]*}} @u5
+
+// HOST-DAG: hipRegisterVar{{.*}}@u1
+// HOST-DAG: hipRegisterVar{{.*}}@u2
+// HOST-DAG: hipRegisterVar{{.*}}@_ZL2u3
+// HOST-DAG: hipRegisterVar{{.*}}@u5
+// HOST-DAG: hipRegisterVar{{.*}}@inline_var
+// HOST-DAG: hipRegisterVar{{.*}}@_Z10p_add_funcIiE
+// HOST-NEG-NOT: hipRegisterVar{{.*}}@ext_var
+// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZL2u4
diff --git a/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu b/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
index f2cd17389d5f7..2cfed3ae79979 100644
--- a/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
+++ b/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
@@ -2,12 +2,18 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \
-// RUN: -emit-llvm -o - -x hip %s | FileCheck \
-// RUN: -check-prefixes=DEV %s
+// RUN: -emit-llvm -o - -x hip %s | FileCheck -check-prefix=DEV %s
// RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
-// RUN: -emit-llvm -o - -x hip %s | FileCheck \
-// RUN: -check-prefixes=HOST %s
+// RUN: -emit-llvm -o - -x hip %s | FileCheck -check-prefix=HOST %s
+
+// Negative tests.
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \
+// RUN: -emit-llvm -o - -x hip %s | FileCheck -check-prefix=DEV-NEG %s
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
+// RUN: -emit-llvm -o - -x hip %s | FileCheck -check-prefix=HOST-NEG %s
#include "Inputs/cuda.h"
@@ -52,7 +58,7 @@ static __constant__ int y;
// Test static host variable, which should not be externalized nor registered.
// HOST-DAG: @_ZL1z = internal global i32 0
-// DEV-NOT: @_ZL1z
+// DEV-NEG-NOT: @_ZL1z
static int z;
// Test implicit static constant variable, which should not be externalized.
@@ -72,6 +78,12 @@ static constexpr int z2 = 456;
static __device__ int w;
+// Test non-ODR-use of static device var should not be emitted or registered.
+// DEV-NEG-NOT: @_ZL1u
+// HOST-NEG-NOT: @_ZL1u
+
+static __device__ int u;
+
inline __device__ void devfun(const int ** b) {
const static int p = 2;
b[0] = &p;
@@ -88,6 +100,7 @@ __global__ void kernel(int *a, const int **b) {
a[3] = x3;
a[4] = x4;
a[5] = x5;
+ a[6] = sizeof(u);
b[0] = &w;
b[1] = &z2;
b[2] = &local_static_constant;
@@ -108,10 +121,12 @@ void foo(const int **a) {
getDeviceSymbol(&w);
z = 123;
a[0] = &z2;
+ decltype(u) tmp;
}
-// HOST: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]]
-// HOST: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]]
-// HOST: __hipRegisterVar({{.*}}@_ZL1w {{.*}}@[[DEVNAMEW]]
-// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w
-// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p
+// HOST-DAG: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]]
+// HOST-DAG: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]]
+// HOST-DAG: __hipRegisterVar({{.*}}@_ZL1w {{.*}}@[[DEVNAMEW]]
+// HOST-NEG-NOT: __hipRegisterVar({{.*}}@_ZL1u
+// HOST-NEG-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w
+// HOST-NEG-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p
diff --git a/clang/test/CodeGenCUDA/static-device-var-rdc.cu b/clang/test/CodeGenCUDA/static-device-var-rdc.cu
index eac985fb70de6..aa5b510026a87 100644
--- a/clang/test/CodeGenCUDA/static-device-var-rdc.cu
+++ b/clang/test/CodeGenCUDA/static-device-var-rdc.cu
@@ -2,19 +2,19 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
-// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
+// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
// RUN: -check-prefixes=DEV,INT-DEV %s
// RUN: %clang_cc1 -triple x86_64-gnu-linux \
-// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
+// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
// RUN: -check-prefixes=HOST,INT-HOST %s
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \
-// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s > %t.dev
+// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.dev
// RUN: cat %t.dev | FileCheck -check-prefixes=DEV,EXT-DEV %s
// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
-// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s > %t.host
+// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.host
// RUN: cat %t.host | FileCheck -check-prefixes=HOST,EXT-HOST %s
// Check host and device compilations use the same postfixes for static
@@ -22,6 +22,25 @@
// RUN: cat %t.dev %t.host | FileCheck -check-prefix=POSTFIX %s
+// Negative tests.
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
+// RUN: -check-prefix=DEV-NEG %s
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux \
+// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
+// RUN: -check-prefix=HOST-NEG %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \
+// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.dev
+// RUN: cat %t.dev | FileCheck -check-prefix=DEV-NEG %s
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
+// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.host
+// RUN: cat %t.host | FileCheck -check-prefix=HOST-NEG %s
+
+
#include "Inputs/cuda.h"
// Test function scope static device variable, which should not be externalized.
@@ -61,9 +80,14 @@ static __constant__ int y;
// Test static host variable, which should not be externalized nor registered.
// HOST-DAG: @_ZL1z = internal global i32 0
-// DEV-NOT: @_ZL1z
+// DEV-NEG-NOT: @_ZL1z
static int z;
+// Test non-ODR-use of static device variable is not emitted or registered.
+// DEV-NEG-NOT: @_ZL1u
+// HOST-NEG-NOT: @_ZL1u
+static __device__ int u;
+
// Test static device variable in inline function, which should not be
// externalized nor registered.
// DEV-DAG: @_ZZ6devfunPPKiE1p = linkonce_odr addrspace(4) constant i32 2, comdat
@@ -77,6 +101,7 @@ __global__ void kernel(int *a, const int **b) {
const static int w = 1;
a[0] = x;
a[1] = y;
+ a[2] = sizeof(u);
b[0] = &w;
b[1] = &x2;
devfun(b);
@@ -88,10 +113,12 @@ void foo() {
getDeviceSymbol(&x);
getDeviceSymbol(&y);
z = 123;
+ decltype(u) tmp;
}
-// HOST: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]]
-// HOST: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]]
-// HOST-NOT: __hipRegisterVar({{.*}}@_ZL2x2
-// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w
-// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p
+// HOST-DAG: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]]
+// HOST-DAG: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]]
+// HOST-NEG-NOT: __hipRegisterVar({{.*}}@_ZL2x2
+// HOST-NEG-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w
+// HOST-NEG-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p
+// HOST-NEG-NOT: __hipRegisterVar({{.*}}@_ZL1u
More information about the cfe-commits
mailing list