[clang] 4cb4256 - [CUDA][HIP] Fix device variables used by host

Yaxun Liu via cfe-commits cfe-commits at lists.llvm.org
Thu May 20 14:04:55 PDT 2021


Author: Yaxun (Sam) Liu
Date: 2021-05-20T17:04:29-04:00
New Revision: 4cb42564ec4b56ef7eb4758bfa4ddf844a163687

URL: https://github.com/llvm/llvm-project/commit/4cb42564ec4b56ef7eb4758bfa4ddf844a163687
DIFF: https://github.com/llvm/llvm-project/commit/4cb42564ec4b56ef7eb4758bfa4ddf844a163687.diff

LOG: [CUDA][HIP] Fix device variables used by host

variables emitted on both host and device side with different addresses
when ODR-used by host function should not cause device side counter-part
to be force emitted.

This fixes the regression caused by https://reviews.llvm.org/D102237

Reviewed by: Artem Belevich

Differential Revision: https://reviews.llvm.org/D102801

Added: 
    clang/test/AST/ast-dump-constant-var.cu

Modified: 
    clang/include/clang/Sema/Sema.h
    clang/lib/CodeGen/CGDeclCXX.cpp
    clang/lib/CodeGen/CodeGenModule.cpp
    clang/lib/Sema/SemaCUDA.cpp
    clang/lib/Sema/SemaDeclAttr.cpp
    clang/lib/Sema/SemaExpr.cpp
    clang/test/CodeGenCUDA/host-used-device-var.cu
    clang/test/SemaCUDA/static-device-var.cu

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 9c459a95a6916..114ff6441b4a8 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -12066,6 +12066,15 @@ class Sema final {
                                         bool IgnoreImplicitHDAttr = false);
   CUDAFunctionTarget IdentifyCUDATarget(const ParsedAttributesView &Attrs);
 
+  enum CUDAVariableTarget {
+    CVT_Device,  /// Emitted on device side with a shadow variable on host side
+    CVT_Host,    /// Emitted on host side only
+    CVT_Both,    /// Emitted on both sides with 
diff erent addresses
+    CVT_Unified, /// Emitted as a unified address, e.g. managed variables
+  };
+  /// Determines whether the given variable is emitted on host or device side.
+  CUDAVariableTarget IdentifyCUDATarget(const VarDecl *D);
+
   /// Gets the CUDA target for the current context.
   CUDAFunctionTarget CurrentCUDATarget() {
     return IdentifyCUDATarget(dyn_cast<FunctionDecl>(CurContext));

diff  --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index 8131b5285075c..27a93c243eca0 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -644,7 +644,9 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
     Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL);
   }
 
-  if (getLangOpts().HIP) {
+  assert(!getLangOpts().CUDA || !getLangOpts().CUDAIsDevice ||
+         getLangOpts().GPUAllowDeviceInit);
+  if (getLangOpts().HIP && getLangOpts().CUDAIsDevice) {
     Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
     Fn->addFnAttr("device-init");
   }

diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 03920982ee086..8d1b2ea5d5228 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2368,6 +2368,8 @@ void CodeGenModule::EmitDeferred() {
   }
 
   // Emit CUDA/HIP static device variables referenced by host code only.
+  // Note we should not clear CUDADeviceVarODRUsedByHost since it is still
+  // needed for further handling.
   if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
     for (const auto *V : getContext().CUDADeviceVarODRUsedByHost)
       DeferredDeclsToEmit.push_back(V);

diff  --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index b907374b466f3..8f9800767f896 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -26,6 +26,14 @@
 #include "llvm/ADT/SmallVector.h"
 using namespace clang;
 
+template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) {
+  if (!D)
+    return false;
+  if (auto *A = D->getAttr<AttrT>())
+    return !A->isImplicit();
+  return false;
+}
+
 void Sema::PushForceCUDAHostDevice() {
   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
   ForceCUDAHostDeviceDepth++;
@@ -133,6 +141,35 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
   return CFT_Host;
 }
 
+/// IdentifyTarget - Determine the CUDA compilation target for this variable.
+Sema::CUDAVariableTarget Sema::IdentifyCUDATarget(const VarDecl *Var) {
+  if (Var->hasAttr<HIPManagedAttr>())
+    return CVT_Unified;
+  if (Var->isConstexpr() && !hasExplicitAttr<CUDAConstantAttr>(Var))
+    return CVT_Both;
+  if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() ||
+      Var->hasAttr<CUDASharedAttr>() ||
+      Var->getType()->isCUDADeviceBuiltinSurfaceType() ||
+      Var->getType()->isCUDADeviceBuiltinTextureType())
+    return CVT_Device;
+  // Function-scope static variable without explicit device or constant
+  // attribute are emitted
+  //  - on both sides in host device functions
+  //  - on device side in device or global functions
+  if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) {
+    switch (IdentifyCUDATarget(FD)) {
+    case CFT_HostDevice:
+      return CVT_Both;
+    case CFT_Device:
+    case CFT_Global:
+      return CVT_Device;
+    default:
+      return CVT_Host;
+    }
+  }
+  return CVT_Host;
+}
+
 // * CUDA Call preference table
 //
 // F - from,
@@ -637,7 +674,8 @@ void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD,
 
 void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) {
   if (getLangOpts().CUDAIsDevice && VD->isConstexpr() &&
-      (VD->isFileVarDecl() || VD->isStaticDataMember())) {
+      (VD->isFileVarDecl() || VD->isStaticDataMember()) &&
+      !VD->hasAttr<CUDAConstantAttr>()) {
     VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext()));
   }
 }

diff  --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 4bbbcf985cbd3..ea1930ea47d67 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -4419,6 +4419,13 @@ static void handleConstantAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
     S.Diag(AL.getLoc(), diag::err_cuda_nonstatic_constdev);
     return;
   }
+  // constexpr variable may already get an implicit constant attr, which should
+  // be replaced by the explicit constant attr.
+  if (auto *A = D->getAttr<CUDAConstantAttr>()) {
+    if (!A->isImplicit())
+      return;
+    D->dropAttr<CUDAConstantAttr>();
+  }
   D->addAttr(::new (S.Context) CUDAConstantAttr(S.Context, AL));
 }
 

diff  --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 16fab09a96dda..8181c12547b7d 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -17146,28 +17146,20 @@ MarkVarDeclODRUsed(VarDecl *Var, SourceLocation Loc, Sema &SemaRef,
 
   if (SemaRef.LangOpts.CUDA && Var && Var->hasGlobalStorage()) {
     auto *FD = dyn_cast_or_null<FunctionDecl>(SemaRef.CurContext);
-    auto Target = SemaRef.IdentifyCUDATarget(FD);
-    auto IsEmittedOnDeviceSide = [](VarDecl *Var) {
-      if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() ||
-          Var->hasAttr<CUDASharedAttr>() ||
-          Var->getType()->isCUDADeviceBuiltinSurfaceType() ||
-          Var->getType()->isCUDADeviceBuiltinTextureType())
-        return true;
-      // Function-scope static variable in device functions or kernels are
-      // emitted on device side.
-      if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) {
-        return FD->hasAttr<CUDADeviceAttr>() || FD->hasAttr<CUDAGlobalAttr>();
-      }
-      return false;
-    };
-    if (!IsEmittedOnDeviceSide(Var)) {
+    auto VarTarget = SemaRef.IdentifyCUDATarget(Var);
+    auto UserTarget = SemaRef.IdentifyCUDATarget(FD);
+    if (VarTarget == Sema::CVT_Host &&
+        (UserTarget == Sema::CFT_Device || UserTarget == Sema::CFT_HostDevice ||
+         UserTarget == Sema::CFT_Global)) {
       // Diagnose ODR-use of host global variables in device functions.
       // Reference of device global variables in host functions is allowed
       // through shadow variables therefore it is not diagnosed.
       if (SemaRef.LangOpts.CUDAIsDevice)
         SemaRef.targetDiag(Loc, diag::err_ref_bad_target)
-            << /*host*/ 2 << /*variable*/ 1 << Var << Target;
-    } else if ((Target == Sema::CFT_Host || Target == Sema::CFT_HostDevice) &&
+            << /*host*/ 2 << /*variable*/ 1 << Var << UserTarget;
+    } else if (VarTarget == Sema::CVT_Device &&
+               (UserTarget == Sema::CFT_Host ||
+                UserTarget == Sema::CFT_HostDevice) &&
                !Var->hasExternalStorage()) {
       // Record a CUDA/HIP device side variable if it is ODR-used
       // by host code. This is done conservatively, when the variable is

diff  --git a/clang/test/AST/ast-dump-constant-var.cu b/clang/test/AST/ast-dump-constant-var.cu
new file mode 100644
index 0000000000000..cec93af5ff7bf
--- /dev/null
+++ b/clang/test/AST/ast-dump-constant-var.cu
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -std=c++14 -ast-dump -x hip %s | FileCheck -check-prefixes=CHECK,HOST %s
+// RUN: %clang_cc1 -std=c++14 -ast-dump -fcuda-is-device -x hip %s | FileCheck -check-prefixes=CHECK,DEV %s
+
+#include "Inputs/cuda.h"
+
+// CHECK-LABEL: VarDecl {{.*}} m1 'int'
+// CHECK-NEXT: CUDAConstantAttr {{.*}}cuda.h
+__constant__ int m1;
+
+// CHECK-LABEL: VarDecl {{.*}} m2 'int'
+// CHECK-NEXT: CUDAConstantAttr {{.*}}cuda.h
+// CHECK-NOT: CUDAConstantAttr
+__constant__ __constant__ int m2;
+
+// CHECK-LABEL: VarDecl {{.*}} m3 'const int'
+// HOST-NOT: CUDAConstantAttr
+// DEV-NOT: CUDAConstantAttr {{.*}}cuda.h
+// DEV: CUDAConstantAttr {{.*}}Implicit
+// DEV-NOT: CUDAConstantAttr {{.*}}cuda.h
+constexpr int m3 = 1;
+
+// CHECK-LABEL: VarDecl {{.*}} m3a 'const int'
+// CHECK-NOT: CUDAConstantAttr {{.*}}Implicit
+// CHECK: CUDAConstantAttr {{.*}}cuda.h
+// CHECK-NOT: CUDAConstantAttr {{.*}}Implicit
+constexpr __constant__ int m3a = 2;
+
+// CHECK-LABEL: VarDecl {{.*}} m3b 'const int'
+// CHECK-NOT: CUDAConstantAttr {{.*}}Implicit
+// CHECK: CUDAConstantAttr {{.*}}cuda.h
+// CHECK-NOT: CUDAConstantAttr {{.*}}Implicit
+__constant__ constexpr int m3b = 3;

diff  --git a/clang/test/CodeGenCUDA/host-used-device-var.cu b/clang/test/CodeGenCUDA/host-used-device-var.cu
index b12300b73e192..b94ef689b3162 100644
--- a/clang/test/CodeGenCUDA/host-used-device-var.cu
+++ b/clang/test/CodeGenCUDA/host-used-device-var.cu
@@ -66,30 +66,148 @@ __device__ T add_func (T x, T y)
 template <typename T>
 __device__ func_t<T> p_add_func = add_func<T>;
 
+// Check non-constant constexpr variables ODR-used by host code only is not emitted.
+// DEV-NEG-NOT: constexpr_var1a
+// DEV-NEG-NOT: constexpr_var1b
+constexpr int constexpr_var1a = 1;
+inline constexpr int constexpr_var1b = 1;
+
+// Check constant constexpr variables ODR-used by host code only.
+// Non-inline constexpr variable has internal linkage, therefore it is not accessible by host and not kept.
+// Inline constexpr variable has linkonce_ord linkage, therefore it can be accessed by host and kept.
+// DEV-NEG-NOT: constexpr_var2a
+// DEV-DAG: @constexpr_var2b = linkonce_odr addrspace(4) externally_initialized constant i32 2
+__constant__ constexpr int constexpr_var2a = 2;
+inline __constant__ constexpr int constexpr_var2b = 2;
+
 void use(func_t<int> p);
-void use(int *p);
+__host__ __device__ void use(const int *p);
 
+// Check static device variable in host function.
+// DEV-DAG:  @_ZZ4fun1vE11static_var1 = addrspace(1) externally_initialized global i32 3
 void fun1() {
+  static __device__ int static_var1 = 3;
   use(&u1);
   use(&u2);
   use(&u3);
   use(&ext_var);
   use(&inline_var);
   use(p_add_func<int>);
+  use(&constexpr_var1a);
+  use(&constexpr_var1b);
+  use(&constexpr_var2a);
+  use(&constexpr_var2b);
+  use(&static_var1);
+}
+
+// Check static variable in host device function.
+// DEV-DAG:  @_ZZ4fun2vE11static_var2 = internal addrspace(1) global i32 4
+// DEV-DAG:  @_ZZ4fun2vE11static_var3 = addrspace(1) global i32 4
+__host__ __device__ void fun2() {
+  static int static_var2 = 4;
+  static __device__ int static_var3 = 4;
+  use(&static_var2);
+  use(&static_var3);
 }
 
 __global__ void kern1(int **x) {
   *x = &u4;
+  fun2();
+}
+
+// Check static variables of lambda functions.
+
+// Lambda functions are implicit host device functions.
+// Default static variables in lambda functions should be treated
+// as host variables on host side, therefore should not be forced
+// to be emitted on device.
+
+// DEV-DAG: @_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var2 = addrspace(1) externally_initialized global i32 5
+// DEV-NEG-NOT: @_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var1
+namespace TestStaticVarInLambda {
+class A {
+public:
+  A(char *);
+};
+void fun() {
+  (void) [](char *c) {
+    static A var1(c);
+    static __device__ int var2 = 5;
+    (void) var1;
+    (void) var2;
+  };
+}
+}
+
+// Check implicit constant variable ODR-used by host code is not emitted.
+
+// AST contains instantiation of al<ar>, which triggers AST instantiation
+// of x::al<ar>::am, which triggers AST instatiation of x::ap<ar>,
+// which triggers AST instantiation of aw<ar>::c, which has type
+// ar. ar has base class x which has member ah. x::ah is initialized
+// with function pointer pointing to ar:as, which returns an object
+// of type ou. The constexpr aw<ar>::c is an implicit constant variable
+// which is ODR-used by host function x::ap<ar>. An incorrect implementation
+// will force aw<ar>::c to be emitted on device side, which will trigger
+// emit of x::as and further more ctor of ou and variable o.
+// The ODR-use of aw<ar>::c in x::ap<ar> should be treated as a host variable
+// instead of device variable.
+
+// DEV-NEG-NOT: _ZN16TestConstexprVar1oE
+namespace TestConstexprVar {
+char o;
+class ou {
+public:
+  ou(char) { __builtin_strlen(&o); }
+};
+template < typename ao > struct aw { static constexpr ao c; };
+class x {
+protected:
+  typedef ou (*y)(const x *);
+  constexpr x(y ag) : ah(ag) {}
+  template < bool * > struct ak;
+  template < typename > struct al {
+    static bool am;
+    static ak< &am > an;
+  };
+  template < typename ao > static x ap() { (void)aw< ao >::c; return x(nullptr); }
+  y ah;
+};
+template < typename ao > bool x::al< ao >::am(&ap< ao >);
+class ar : x {
+public:
+  constexpr ar() : x(as) {}
+  static ou as(const x *) { return 0; }
+  al< ar > av;
+};
 }
 
 // Check the exact list of variables to ensure @_ZL2u4 is not among them.
-// DEV: @llvm.compiler.used = {{[^@]*}} @_Z10p_add_funcIiE {{[^@]*}} @_ZL2u3 {{[^@]*}} @inline_var {{[^@]*}} @u1 {{[^@]*}} @u2 {{[^@]*}} @u5
+// DEV: @llvm.compiler.used = {{[^@]*}} @_Z10p_add_funcIiE
+// DEV-SAME: {{^[^@]*}} @_ZL2u3
+// DEV-SAME: {{^[^@]*}} @_ZZ4fun1vE11static_var1
+// DEV-SAME: {{^[^@]*}} @_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var2
+// DEV-SAME: {{^[^@]*}} @constexpr_var2b
+// DEV-SAME: {{^[^@]*}} @inline_var
+// DEV-SAME: {{^[^@]*}} @u1
+// DEV-SAME: {{^[^@]*}} @u2
+// DEV-SAME: {{^[^@]*}} @u5
+// DEV-SAME: {{^[^@]*$}}
 
 // HOST-DAG: hipRegisterVar{{.*}}@u1
 // HOST-DAG: hipRegisterVar{{.*}}@u2
 // HOST-DAG: hipRegisterVar{{.*}}@_ZL2u3
+// HOST-DAG: hipRegisterVar{{.*}}@constexpr_var2b
 // HOST-DAG: hipRegisterVar{{.*}}@u5
 // HOST-DAG: hipRegisterVar{{.*}}@inline_var
 // HOST-DAG: hipRegisterVar{{.*}}@_Z10p_add_funcIiE
+// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZ4fun1vE11static_var1
+// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZ4fun2vE11static_var2
+// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZ4fun2vE11static_var3
+// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var2
+// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var1
 // HOST-NEG-NOT: hipRegisterVar{{.*}}@ext_var
 // HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZL2u4
+// HOST-NEG-NOT: hipRegisterVar{{.*}}@constexpr_var1a
+// HOST-NEG-NOT: hipRegisterVar{{.*}}@constexpr_var1b
+// HOST-NEG-NOT: hipRegisterVar{{.*}}@constexpr_var2a

diff  --git a/clang/test/SemaCUDA/static-device-var.cu b/clang/test/SemaCUDA/static-device-var.cu
index 5c8b89853b57b..0416e1e224830 100644
--- a/clang/test/SemaCUDA/static-device-var.cu
+++ b/clang/test/SemaCUDA/static-device-var.cu
@@ -1,16 +1,14 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: amdgpu-registered-target
 
-// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \
-// RUN:    -emit-llvm -o - %s -fsyntax-only -verify=dev
+// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -std=c++11 \
+// RUN:    -emit-llvm -o - %s -fsyntax-only -verify=dev,com
 
-// RUN: %clang_cc1 -triple x86_64-gnu-linux \
-// RUN:    -emit-llvm -o - %s -fsyntax-only -verify=host
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
+// RUN:    -emit-llvm -o - %s -fsyntax-only -verify=host,com
 
 // Checks allowed usage of file-scope and function-scope static variables.
 
-// host-no-diagnostics
-
 #include "Inputs/cuda.h"
 
 // Checks static variables are allowed in device functions.
@@ -42,6 +40,28 @@ __global__ void kernel(int *a) {
   // dev-error at -1 {{reference to __host__ variable 'z' in __global__ function}}
 }
 
+// Check dynamic initialization of static device variable is not allowed.
+
+namespace TestStaticVarInLambda {
+class A {
+public:
+  A(char *);
+};
+class B {
+public:
+  __device__ B(char *);
+};
+void fun() {
+  (void) [](char *c) {
+    static A var1(c);
+    static __device__ B var2(c);
+    // com-error at -1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}
+    (void) var1;
+    (void) var2;
+  };
+}
+}
+
 int* getDeviceSymbol(int *x);
 
 void foo() {


        


More information about the cfe-commits mailing list