r245786 - [CUDA] Change initializer for CUDA device code based on CUDA documentation.

Jingyue Wu via cfe-commits cfe-commits at lists.llvm.org
Fri Aug 21 22:49:29 PDT 2015


Author: jingyue
Date: Sat Aug 22 00:49:28 2015
New Revision: 245786

URL: http://llvm.org/viewvc/llvm-project?rev=245786&view=rev
Log:
[CUDA] Change initializer for CUDA device code based on CUDA documentation.

Summary:
According to CUDA documentation, global variables declared with __device__,
__constant__ can be initialized from host code, so mark them as
externally initialized. Because __shared__ variables cannot have an
initialization as part of their declaration and since the value maybe kept
across different kernel invocation, the value of __shared__ is effectively
undefined instead of zero initialized.

Wrongly using zero initializer may cause illegitimate optimization, e.g.
removing unused __constant__ variable because it's not updated in the device
code and the value is initialized with zero.

Test Plan: test/CodeGenCUDA/address-spaces.cu

Patch by Xuetian Weng

Reviewers: jholewinski, eliben, tra, jingyue

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D12241

Modified:
    cfe/trunk/lib/CodeGen/CodeGenModule.cpp
    cfe/trunk/test/CodeGenCUDA/address-spaces.cu
    cfe/trunk/test/CodeGenCUDA/filter-decl.cu

Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=245786&r1=245785&r2=245786&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Sat Aug 22 00:49:28 2015
@@ -1990,7 +1990,16 @@ void CodeGenModule::EmitGlobalVarDefinit
   const VarDecl *InitDecl;
   const Expr *InitExpr = D->getAnyInitializer(InitDecl);
 
-  if (!InitExpr) {
+  // CUDA E.2.4.1 "__shared__ variables cannot have an initialization as part
+  // of their declaration."
+  if (getLangOpts().CPlusPlus && getLangOpts().CUDAIsDevice
+      && D->hasAttr<CUDASharedAttr>()) {
+    if (InitExpr) {
+      Error(D->getLocation(),
+            "__shared__ variable cannot have an initialization.");
+    }
+    Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy));
+  } else if (!InitExpr) {
     // This is a tentative definition; tentative definitions are
     // implicitly initialized with { 0 }.
     //
@@ -2076,6 +2085,17 @@ void CodeGenModule::EmitGlobalVarDefinit
   if (D->hasAttr<AnnotateAttr>())
     AddGlobalAnnotations(D, GV);
 
+  // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on
+  // the device. [...]"
+  // CUDA B.2.2 "The __constant__ qualifier, optionally used together with
+  // __device__, declares a variable that: [...]
+  // Is accessible from all the threads within the grid and from the host
+  // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize()
+  // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())."
+  if (GV && LangOpts.CUDA && LangOpts.CUDAIsDevice &&
+      (D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>())) {
+    GV->setExternallyInitialized(true);
+  }
   GV->setInitializer(Init);
 
   // If it is safe to mark the global 'constant', do so now.

Modified: cfe/trunk/test/CodeGenCUDA/address-spaces.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/address-spaces.cu?rev=245786&r1=245785&r2=245786&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCUDA/address-spaces.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/address-spaces.cu Sat Aug 22 00:49:28 2015
@@ -5,10 +5,10 @@
 
 #include "Inputs/cuda.h"
 
-// CHECK: @i = addrspace(1) global
+// CHECK: @i = addrspace(1) externally_initialized global
 __device__ int i;
 
-// CHECK: @j = addrspace(4) global
+// CHECK: @j = addrspace(4) externally_initialized global
 __constant__ int j;
 
 // CHECK: @k = addrspace(3) global
@@ -24,7 +24,7 @@ struct MyStruct {
 // CHECK: @_ZZ5func2vE1a = internal addrspace(3) global [256 x float] zeroinitializer
 // CHECK: @_ZZ5func3vE1a = internal addrspace(3) global float 0.000000e+00
 // CHECK: @_ZZ5func4vE1a = internal addrspace(3) global float 0.000000e+00
-// CHECK: @b = addrspace(3) global float 0.000000e+00
+// CHECK: @b = addrspace(3) global float undef
 
 __device__ void foo() {
   // CHECK: load i32, i32* addrspacecast (i32 addrspace(1)* @i to i32*)

Modified: cfe/trunk/test/CodeGenCUDA/filter-decl.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/filter-decl.cu?rev=245786&r1=245785&r2=245786&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCUDA/filter-decl.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/filter-decl.cu Sat Aug 22 00:49:28 2015
@@ -9,12 +9,12 @@
 // CHECK-DEVICE-NOT: module asm "file scope asm is host only"
 __asm__("file scope asm is host only");
 
-// CHECK-HOST-NOT: constantdata = global
-// CHECK-DEVICE: constantdata = global
+// CHECK-HOST-NOT: constantdata = externally_initialized global
+// CHECK-DEVICE: constantdata = externally_initialized global
 __constant__ char constantdata[256];
 
-// CHECK-HOST-NOT: devicedata = global
-// CHECK-DEVICE: devicedata = global
+// CHECK-HOST-NOT: devicedata = externally_initialized global
+// CHECK-DEVICE: devicedata = externally_initialized global
 __device__ char devicedata[256];
 
 // CHECK-HOST-NOT: shareddata = global




More information about the cfe-commits mailing list