[clang] 4cb4256 - [CUDA][HIP] Fix device variables used by host
Yaxun Liu via cfe-commits
cfe-commits at lists.llvm.org
Thu May 20 14:04:55 PDT 2021
Author: Yaxun (Sam) Liu
Date: 2021-05-20T17:04:29-04:00
New Revision: 4cb42564ec4b56ef7eb4758bfa4ddf844a163687
URL: https://github.com/llvm/llvm-project/commit/4cb42564ec4b56ef7eb4758bfa4ddf844a163687
DIFF: https://github.com/llvm/llvm-project/commit/4cb42564ec4b56ef7eb4758bfa4ddf844a163687.diff
LOG: [CUDA][HIP] Fix device variables used by host
variables emitted on both host and device side with different addresses
when ODR-used by host function should not cause device side counter-part
to be force emitted.
This fixes the regression caused by https://reviews.llvm.org/D102237
Reviewed by: Artem Belevich
Differential Revision: https://reviews.llvm.org/D102801
Added:
clang/test/AST/ast-dump-constant-var.cu
Modified:
clang/include/clang/Sema/Sema.h
clang/lib/CodeGen/CGDeclCXX.cpp
clang/lib/CodeGen/CodeGenModule.cpp
clang/lib/Sema/SemaCUDA.cpp
clang/lib/Sema/SemaDeclAttr.cpp
clang/lib/Sema/SemaExpr.cpp
clang/test/CodeGenCUDA/host-used-device-var.cu
clang/test/SemaCUDA/static-device-var.cu
Removed:
################################################################################
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 9c459a95a6916..114ff6441b4a8 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -12066,6 +12066,15 @@ class Sema final {
bool IgnoreImplicitHDAttr = false);
CUDAFunctionTarget IdentifyCUDATarget(const ParsedAttributesView &Attrs);
+ enum CUDAVariableTarget {
+ CVT_Device, /// Emitted on device side with a shadow variable on host side
+ CVT_Host, /// Emitted on host side only
+ CVT_Both, /// Emitted on both sides with
diff erent addresses
+ CVT_Unified, /// Emitted as a unified address, e.g. managed variables
+ };
+ /// Determines whether the given variable is emitted on host or device side.
+ CUDAVariableTarget IdentifyCUDATarget(const VarDecl *D);
+
/// Gets the CUDA target for the current context.
CUDAFunctionTarget CurrentCUDATarget() {
return IdentifyCUDATarget(dyn_cast<FunctionDecl>(CurContext));
diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index 8131b5285075c..27a93c243eca0 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -644,7 +644,9 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL);
}
- if (getLangOpts().HIP) {
+ assert(!getLangOpts().CUDA || !getLangOpts().CUDAIsDevice ||
+ getLangOpts().GPUAllowDeviceInit);
+ if (getLangOpts().HIP && getLangOpts().CUDAIsDevice) {
Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
Fn->addFnAttr("device-init");
}
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 03920982ee086..8d1b2ea5d5228 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2368,6 +2368,8 @@ void CodeGenModule::EmitDeferred() {
}
// Emit CUDA/HIP static device variables referenced by host code only.
+ // Note we should not clear CUDADeviceVarODRUsedByHost since it is still
+ // needed for further handling.
if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
for (const auto *V : getContext().CUDADeviceVarODRUsedByHost)
DeferredDeclsToEmit.push_back(V);
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index b907374b466f3..8f9800767f896 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -26,6 +26,14 @@
#include "llvm/ADT/SmallVector.h"
using namespace clang;
+template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) {
+ if (!D)
+ return false;
+ if (auto *A = D->getAttr<AttrT>())
+ return !A->isImplicit();
+ return false;
+}
+
void Sema::PushForceCUDAHostDevice() {
assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
ForceCUDAHostDeviceDepth++;
@@ -133,6 +141,35 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
return CFT_Host;
}
+/// IdentifyTarget - Determine the CUDA compilation target for this variable.
+Sema::CUDAVariableTarget Sema::IdentifyCUDATarget(const VarDecl *Var) {
+ if (Var->hasAttr<HIPManagedAttr>())
+ return CVT_Unified;
+ if (Var->isConstexpr() && !hasExplicitAttr<CUDAConstantAttr>(Var))
+ return CVT_Both;
+ if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() ||
+ Var->hasAttr<CUDASharedAttr>() ||
+ Var->getType()->isCUDADeviceBuiltinSurfaceType() ||
+ Var->getType()->isCUDADeviceBuiltinTextureType())
+ return CVT_Device;
+ // Function-scope static variable without explicit device or constant
+ // attribute are emitted
+ // - on both sides in host device functions
+ // - on device side in device or global functions
+ if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) {
+ switch (IdentifyCUDATarget(FD)) {
+ case CFT_HostDevice:
+ return CVT_Both;
+ case CFT_Device:
+ case CFT_Global:
+ return CVT_Device;
+ default:
+ return CVT_Host;
+ }
+ }
+ return CVT_Host;
+}
+
// * CUDA Call preference table
//
// F - from,
@@ -637,7 +674,8 @@ void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD,
void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) {
if (getLangOpts().CUDAIsDevice && VD->isConstexpr() &&
- (VD->isFileVarDecl() || VD->isStaticDataMember())) {
+ (VD->isFileVarDecl() || VD->isStaticDataMember()) &&
+ !VD->hasAttr<CUDAConstantAttr>()) {
VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext()));
}
}
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 4bbbcf985cbd3..ea1930ea47d67 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -4419,6 +4419,13 @@ static void handleConstantAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
S.Diag(AL.getLoc(), diag::err_cuda_nonstatic_constdev);
return;
}
+ // constexpr variable may already get an implicit constant attr, which should
+ // be replaced by the explicit constant attr.
+ if (auto *A = D->getAttr<CUDAConstantAttr>()) {
+ if (!A->isImplicit())
+ return;
+ D->dropAttr<CUDAConstantAttr>();
+ }
D->addAttr(::new (S.Context) CUDAConstantAttr(S.Context, AL));
}
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 16fab09a96dda..8181c12547b7d 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -17146,28 +17146,20 @@ MarkVarDeclODRUsed(VarDecl *Var, SourceLocation Loc, Sema &SemaRef,
if (SemaRef.LangOpts.CUDA && Var && Var->hasGlobalStorage()) {
auto *FD = dyn_cast_or_null<FunctionDecl>(SemaRef.CurContext);
- auto Target = SemaRef.IdentifyCUDATarget(FD);
- auto IsEmittedOnDeviceSide = [](VarDecl *Var) {
- if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() ||
- Var->hasAttr<CUDASharedAttr>() ||
- Var->getType()->isCUDADeviceBuiltinSurfaceType() ||
- Var->getType()->isCUDADeviceBuiltinTextureType())
- return true;
- // Function-scope static variable in device functions or kernels are
- // emitted on device side.
- if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) {
- return FD->hasAttr<CUDADeviceAttr>() || FD->hasAttr<CUDAGlobalAttr>();
- }
- return false;
- };
- if (!IsEmittedOnDeviceSide(Var)) {
+ auto VarTarget = SemaRef.IdentifyCUDATarget(Var);
+ auto UserTarget = SemaRef.IdentifyCUDATarget(FD);
+ if (VarTarget == Sema::CVT_Host &&
+ (UserTarget == Sema::CFT_Device || UserTarget == Sema::CFT_HostDevice ||
+ UserTarget == Sema::CFT_Global)) {
// Diagnose ODR-use of host global variables in device functions.
// Reference of device global variables in host functions is allowed
// through shadow variables therefore it is not diagnosed.
if (SemaRef.LangOpts.CUDAIsDevice)
SemaRef.targetDiag(Loc, diag::err_ref_bad_target)
- << /*host*/ 2 << /*variable*/ 1 << Var << Target;
- } else if ((Target == Sema::CFT_Host || Target == Sema::CFT_HostDevice) &&
+ << /*host*/ 2 << /*variable*/ 1 << Var << UserTarget;
+ } else if (VarTarget == Sema::CVT_Device &&
+ (UserTarget == Sema::CFT_Host ||
+ UserTarget == Sema::CFT_HostDevice) &&
!Var->hasExternalStorage()) {
// Record a CUDA/HIP device side variable if it is ODR-used
// by host code. This is done conservatively, when the variable is
diff --git a/clang/test/AST/ast-dump-constant-var.cu b/clang/test/AST/ast-dump-constant-var.cu
new file mode 100644
index 0000000000000..cec93af5ff7bf
--- /dev/null
+++ b/clang/test/AST/ast-dump-constant-var.cu
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -std=c++14 -ast-dump -x hip %s | FileCheck -check-prefixes=CHECK,HOST %s
+// RUN: %clang_cc1 -std=c++14 -ast-dump -fcuda-is-device -x hip %s | FileCheck -check-prefixes=CHECK,DEV %s
+
+#include "Inputs/cuda.h"
+
+// CHECK-LABEL: VarDecl {{.*}} m1 'int'
+// CHECK-NEXT: CUDAConstantAttr {{.*}}cuda.h
+__constant__ int m1;
+
+// CHECK-LABEL: VarDecl {{.*}} m2 'int'
+// CHECK-NEXT: CUDAConstantAttr {{.*}}cuda.h
+// CHECK-NOT: CUDAConstantAttr
+__constant__ __constant__ int m2;
+
+// CHECK-LABEL: VarDecl {{.*}} m3 'const int'
+// HOST-NOT: CUDAConstantAttr
+// DEV-NOT: CUDAConstantAttr {{.*}}cuda.h
+// DEV: CUDAConstantAttr {{.*}}Implicit
+// DEV-NOT: CUDAConstantAttr {{.*}}cuda.h
+constexpr int m3 = 1;
+
+// CHECK-LABEL: VarDecl {{.*}} m3a 'const int'
+// CHECK-NOT: CUDAConstantAttr {{.*}}Implicit
+// CHECK: CUDAConstantAttr {{.*}}cuda.h
+// CHECK-NOT: CUDAConstantAttr {{.*}}Implicit
+constexpr __constant__ int m3a = 2;
+
+// CHECK-LABEL: VarDecl {{.*}} m3b 'const int'
+// CHECK-NOT: CUDAConstantAttr {{.*}}Implicit
+// CHECK: CUDAConstantAttr {{.*}}cuda.h
+// CHECK-NOT: CUDAConstantAttr {{.*}}Implicit
+__constant__ constexpr int m3b = 3;
diff --git a/clang/test/CodeGenCUDA/host-used-device-var.cu b/clang/test/CodeGenCUDA/host-used-device-var.cu
index b12300b73e192..b94ef689b3162 100644
--- a/clang/test/CodeGenCUDA/host-used-device-var.cu
+++ b/clang/test/CodeGenCUDA/host-used-device-var.cu
@@ -66,30 +66,148 @@ __device__ T add_func (T x, T y)
template <typename T>
__device__ func_t<T> p_add_func = add_func<T>;
+// Check non-constant constexpr variables ODR-used by host code only is not emitted.
+// DEV-NEG-NOT: constexpr_var1a
+// DEV-NEG-NOT: constexpr_var1b
+constexpr int constexpr_var1a = 1;
+inline constexpr int constexpr_var1b = 1;
+
+// Check constant constexpr variables ODR-used by host code only.
+// Non-inline constexpr variable has internal linkage, therefore it is not accessible by host and not kept.
+// Inline constexpr variable has linkonce_ord linkage, therefore it can be accessed by host and kept.
+// DEV-NEG-NOT: constexpr_var2a
+// DEV-DAG: @constexpr_var2b = linkonce_odr addrspace(4) externally_initialized constant i32 2
+__constant__ constexpr int constexpr_var2a = 2;
+inline __constant__ constexpr int constexpr_var2b = 2;
+
void use(func_t<int> p);
-void use(int *p);
+__host__ __device__ void use(const int *p);
+// Check static device variable in host function.
+// DEV-DAG: @_ZZ4fun1vE11static_var1 = addrspace(1) externally_initialized global i32 3
void fun1() {
+ static __device__ int static_var1 = 3;
use(&u1);
use(&u2);
use(&u3);
use(&ext_var);
use(&inline_var);
use(p_add_func<int>);
+ use(&constexpr_var1a);
+ use(&constexpr_var1b);
+ use(&constexpr_var2a);
+ use(&constexpr_var2b);
+ use(&static_var1);
+}
+
+// Check static variable in host device function.
+// DEV-DAG: @_ZZ4fun2vE11static_var2 = internal addrspace(1) global i32 4
+// DEV-DAG: @_ZZ4fun2vE11static_var3 = addrspace(1) global i32 4
+__host__ __device__ void fun2() {
+ static int static_var2 = 4;
+ static __device__ int static_var3 = 4;
+ use(&static_var2);
+ use(&static_var3);
}
__global__ void kern1(int **x) {
*x = &u4;
+ fun2();
+}
+
+// Check static variables of lambda functions.
+
+// Lambda functions are implicit host device functions.
+// Default static variables in lambda functions should be treated
+// as host variables on host side, therefore should not be forced
+// to be emitted on device.
+
+// DEV-DAG: @_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var2 = addrspace(1) externally_initialized global i32 5
+// DEV-NEG-NOT: @_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var1
+namespace TestStaticVarInLambda {
+class A {
+public:
+ A(char *);
+};
+void fun() {
+ (void) [](char *c) {
+ static A var1(c);
+ static __device__ int var2 = 5;
+ (void) var1;
+ (void) var2;
+ };
+}
+}
+
+// Check implicit constant variable ODR-used by host code is not emitted.
+
+// AST contains instantiation of al<ar>, which triggers AST instantiation
+// of x::al<ar>::am, which triggers AST instatiation of x::ap<ar>,
+// which triggers AST instantiation of aw<ar>::c, which has type
+// ar. ar has base class x which has member ah. x::ah is initialized
+// with function pointer pointing to ar:as, which returns an object
+// of type ou. The constexpr aw<ar>::c is an implicit constant variable
+// which is ODR-used by host function x::ap<ar>. An incorrect implementation
+// will force aw<ar>::c to be emitted on device side, which will trigger
+// emit of x::as and further more ctor of ou and variable o.
+// The ODR-use of aw<ar>::c in x::ap<ar> should be treated as a host variable
+// instead of device variable.
+
+// DEV-NEG-NOT: _ZN16TestConstexprVar1oE
+namespace TestConstexprVar {
+char o;
+class ou {
+public:
+ ou(char) { __builtin_strlen(&o); }
+};
+template < typename ao > struct aw { static constexpr ao c; };
+class x {
+protected:
+ typedef ou (*y)(const x *);
+ constexpr x(y ag) : ah(ag) {}
+ template < bool * > struct ak;
+ template < typename > struct al {
+ static bool am;
+ static ak< &am > an;
+ };
+ template < typename ao > static x ap() { (void)aw< ao >::c; return x(nullptr); }
+ y ah;
+};
+template < typename ao > bool x::al< ao >::am(&ap< ao >);
+class ar : x {
+public:
+ constexpr ar() : x(as) {}
+ static ou as(const x *) { return 0; }
+ al< ar > av;
+};
}
// Check the exact list of variables to ensure @_ZL2u4 is not among them.
-// DEV: @llvm.compiler.used = {{[^@]*}} @_Z10p_add_funcIiE {{[^@]*}} @_ZL2u3 {{[^@]*}} @inline_var {{[^@]*}} @u1 {{[^@]*}} @u2 {{[^@]*}} @u5
+// DEV: @llvm.compiler.used = {{[^@]*}} @_Z10p_add_funcIiE
+// DEV-SAME: {{^[^@]*}} @_ZL2u3
+// DEV-SAME: {{^[^@]*}} @_ZZ4fun1vE11static_var1
+// DEV-SAME: {{^[^@]*}} @_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var2
+// DEV-SAME: {{^[^@]*}} @constexpr_var2b
+// DEV-SAME: {{^[^@]*}} @inline_var
+// DEV-SAME: {{^[^@]*}} @u1
+// DEV-SAME: {{^[^@]*}} @u2
+// DEV-SAME: {{^[^@]*}} @u5
+// DEV-SAME: {{^[^@]*$}}
// HOST-DAG: hipRegisterVar{{.*}}@u1
// HOST-DAG: hipRegisterVar{{.*}}@u2
// HOST-DAG: hipRegisterVar{{.*}}@_ZL2u3
+// HOST-DAG: hipRegisterVar{{.*}}@constexpr_var2b
// HOST-DAG: hipRegisterVar{{.*}}@u5
// HOST-DAG: hipRegisterVar{{.*}}@inline_var
// HOST-DAG: hipRegisterVar{{.*}}@_Z10p_add_funcIiE
+// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZ4fun1vE11static_var1
+// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZ4fun2vE11static_var2
+// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZ4fun2vE11static_var3
+// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var2
+// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var1
// HOST-NEG-NOT: hipRegisterVar{{.*}}@ext_var
// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZL2u4
+// HOST-NEG-NOT: hipRegisterVar{{.*}}@constexpr_var1a
+// HOST-NEG-NOT: hipRegisterVar{{.*}}@constexpr_var1b
+// HOST-NEG-NOT: hipRegisterVar{{.*}}@constexpr_var2a
diff --git a/clang/test/SemaCUDA/static-device-var.cu b/clang/test/SemaCUDA/static-device-var.cu
index 5c8b89853b57b..0416e1e224830 100644
--- a/clang/test/SemaCUDA/static-device-var.cu
+++ b/clang/test/SemaCUDA/static-device-var.cu
@@ -1,16 +1,14 @@
// REQUIRES: x86-registered-target
// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \
-// RUN: -emit-llvm -o - %s -fsyntax-only -verify=dev
+// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -std=c++11 \
+// RUN: -emit-llvm -o - %s -fsyntax-only -verify=dev,com
-// RUN: %clang_cc1 -triple x86_64-gnu-linux \
-// RUN: -emit-llvm -o - %s -fsyntax-only -verify=host
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
+// RUN: -emit-llvm -o - %s -fsyntax-only -verify=host,com
// Checks allowed usage of file-scope and function-scope static variables.
-// host-no-diagnostics
-
#include "Inputs/cuda.h"
// Checks static variables are allowed in device functions.
@@ -42,6 +40,28 @@ __global__ void kernel(int *a) {
// dev-error at -1 {{reference to __host__ variable 'z' in __global__ function}}
}
+// Check dynamic initialization of static device variable is not allowed.
+
+namespace TestStaticVarInLambda {
+class A {
+public:
+ A(char *);
+};
+class B {
+public:
+ __device__ B(char *);
+};
+void fun() {
+ (void) [](char *c) {
+ static A var1(c);
+ static __device__ B var2(c);
+ // com-error at -1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}
+ (void) var1;
+ (void) var2;
+ };
+}
+}
+
int* getDeviceSymbol(int *x);
void foo() {
More information about the cfe-commits
mailing list