[PATCH] D12241: [CUDA] Change initializer for CUDA device code based on CUDA documentation.

Xuetian Weng via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 21 09:31:28 PDT 2015


wengxt created this revision.
wengxt added reviewers: jingyue, jholewinski.
wengxt added a subscriber: llvm-commits.

According to CUDA documentation, global variable with __device__,
__constant__ can be initialized from host code, so mark them as
externally initialized. For __shared__ variable, its value might be kept
by GPU Memory among different kernel invocation and it is not possible
to initialize it. So change __shared__ variable it to undefined value.

Wrongly using zero initializer may cause illegitimate optimization, e.g.
removing unused __constant__ variable because it's not updated in the device
code and the value is initialized with zero.

Test Plan: test/CodeGenCUDA/address-spaces.cu

http://reviews.llvm.org/D12241

Files:
  lib/CodeGen/CodeGenModule.cpp
  test/CodeGenCUDA/address-spaces.cu
  test/CodeGenCUDA/filter-decl.cu

Index: test/CodeGenCUDA/filter-decl.cu
===================================================================
--- test/CodeGenCUDA/filter-decl.cu
+++ test/CodeGenCUDA/filter-decl.cu
@@ -9,12 +9,12 @@
 // CHECK-DEVICE-NOT: module asm "file scope asm is host only"
 __asm__("file scope asm is host only");
 
-// CHECK-HOST-NOT: constantdata = global
-// CHECK-DEVICE: constantdata = global
+// CHECK-HOST-NOT: constantdata = externally_initialized global
+// CHECK-DEVICE: constantdata = externally_initialized global
 __constant__ char constantdata[256];
 
-// CHECK-HOST-NOT: devicedata = global
-// CHECK-DEVICE: devicedata = global
+// CHECK-HOST-NOT: devicedata = externally_initialized global
+// CHECK-DEVICE: devicedata = externally_initialized global
 __device__ char devicedata[256];
 
 // CHECK-HOST-NOT: shareddata = global
Index: test/CodeGenCUDA/address-spaces.cu
===================================================================
--- test/CodeGenCUDA/address-spaces.cu
+++ test/CodeGenCUDA/address-spaces.cu
@@ -5,10 +5,10 @@
 
 #include "Inputs/cuda.h"
 
-// CHECK: @i = addrspace(1) global
+// CHECK: @i = addrspace(1) externally_initialized global
 __device__ int i;
 
-// CHECK: @j = addrspace(4) global
+// CHECK: @j = addrspace(4) externally_initialized global
 __constant__ int j;
 
 // CHECK: @k = addrspace(3) global
@@ -24,7 +24,7 @@
 // CHECK: @_ZZ5func2vE1a = internal addrspace(3) global [256 x float] zeroinitializer
 // CHECK: @_ZZ5func3vE1a = internal addrspace(3) global float 0.000000e+00
 // CHECK: @_ZZ5func4vE1a = internal addrspace(3) global float 0.000000e+00
-// CHECK: @b = addrspace(3) global float 0.000000e+00
+// CHECK: @b = addrspace(3) global float undef
 
 __device__ void foo() {
   // CHECK: load i32, i32* addrspacecast (i32 addrspace(1)* @i to i32*)
Index: lib/CodeGen/CodeGenModule.cpp
===================================================================
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -1990,7 +1990,14 @@
   const VarDecl *InitDecl;
   const Expr *InitExpr = D->getAnyInitializer(InitDecl);
 
-  if (!InitExpr) {
+  // CUDA E.2.4.1 "__shared__ variables cannot have an initialization as part
+  // of their declaration."
+  if (LangOpts.CUDA && LangOpts.CUDAIsDevice
+      && D->hasAttr<CUDASharedAttr>()) {
+    if (InitExpr)
+      ErrorUnsupported(D, "static initializer");
+    Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy));
+  } else if (!InitExpr) {
     // This is a tentative definition; tentative definitions are
     // implicitly initialized with { 0 }.
     //
@@ -2076,6 +2083,21 @@
   if (D->hasAttr<AnnotateAttr>())
     AddGlobalAnnotations(D, GV);
 
+  // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on
+  // the device. [...]
+  // Is accessible from all the threads within the grid and from the host
+  // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize()
+  // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol()).
+  //
+  // CUDA B.2.2 "The __constant__ qualifier, optionally used together with
+  // __device__, declares a variable that: [...]
+  // Is accessible from all the threads within the grid and from the host
+  // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize()
+  // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol()).
+  if (GV && LangOpts.CUDA && LangOpts.CUDAIsDevice &&
+      (D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>())) {
+    GV->setExternallyInitialized(true);
+  }
   GV->setInitializer(Init);
 
   // If it is safe to mark the global 'constant', do so now.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D12241.32834.patch
Type: text/x-patch
Size: 3596 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150821/8b02d656/attachment.bin>


More information about the llvm-commits mailing list