[PATCH] D12241: [CUDA] Change initializer for CUDA device code based on CUDA documentation.
Xuetian Weng via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 21 09:31:28 PDT 2015
wengxt created this revision.
wengxt added reviewers: jingyue, jholewinski.
wengxt added a subscriber: llvm-commits.
According to CUDA documentation, global variable with __device__,
__constant__ can be initialized from host code, so mark them as
externally initialized. For __shared__ variable, its value might be kept
by GPU Memory among different kernel invocation and it is not possible
to initialize it. So change __shared__ variable it to undefined value.
Wrongly using zero initializer may cause illegitimate optimization, e.g.
removing unused __constant__ variable because it's not updated in the device
code and the value is initialized with zero.
Test Plan: test/CodeGenCUDA/address-spaces.cu
http://reviews.llvm.org/D12241
Files:
lib/CodeGen/CodeGenModule.cpp
test/CodeGenCUDA/address-spaces.cu
test/CodeGenCUDA/filter-decl.cu
Index: test/CodeGenCUDA/filter-decl.cu
===================================================================
--- test/CodeGenCUDA/filter-decl.cu
+++ test/CodeGenCUDA/filter-decl.cu
@@ -9,12 +9,12 @@
// CHECK-DEVICE-NOT: module asm "file scope asm is host only"
__asm__("file scope asm is host only");
-// CHECK-HOST-NOT: constantdata = global
-// CHECK-DEVICE: constantdata = global
+// CHECK-HOST-NOT: constantdata = externally_initialized global
+// CHECK-DEVICE: constantdata = externally_initialized global
__constant__ char constantdata[256];
-// CHECK-HOST-NOT: devicedata = global
-// CHECK-DEVICE: devicedata = global
+// CHECK-HOST-NOT: devicedata = externally_initialized global
+// CHECK-DEVICE: devicedata = externally_initialized global
__device__ char devicedata[256];
// CHECK-HOST-NOT: shareddata = global
Index: test/CodeGenCUDA/address-spaces.cu
===================================================================
--- test/CodeGenCUDA/address-spaces.cu
+++ test/CodeGenCUDA/address-spaces.cu
@@ -5,10 +5,10 @@
#include "Inputs/cuda.h"
-// CHECK: @i = addrspace(1) global
+// CHECK: @i = addrspace(1) externally_initialized global
__device__ int i;
-// CHECK: @j = addrspace(4) global
+// CHECK: @j = addrspace(4) externally_initialized global
__constant__ int j;
// CHECK: @k = addrspace(3) global
@@ -24,7 +24,7 @@
// CHECK: @_ZZ5func2vE1a = internal addrspace(3) global [256 x float] zeroinitializer
// CHECK: @_ZZ5func3vE1a = internal addrspace(3) global float 0.000000e+00
// CHECK: @_ZZ5func4vE1a = internal addrspace(3) global float 0.000000e+00
-// CHECK: @b = addrspace(3) global float 0.000000e+00
+// CHECK: @b = addrspace(3) global float undef
__device__ void foo() {
// CHECK: load i32, i32* addrspacecast (i32 addrspace(1)* @i to i32*)
Index: lib/CodeGen/CodeGenModule.cpp
===================================================================
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -1990,7 +1990,14 @@
const VarDecl *InitDecl;
const Expr *InitExpr = D->getAnyInitializer(InitDecl);
- if (!InitExpr) {
+ // CUDA E.2.4.1 "__shared__ variables cannot have an initialization as part
+ // of their declaration."
+ if (LangOpts.CUDA && LangOpts.CUDAIsDevice
+ && D->hasAttr<CUDASharedAttr>()) {
+ if (InitExpr)
+ ErrorUnsupported(D, "static initializer");
+ Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy));
+ } else if (!InitExpr) {
// This is a tentative definition; tentative definitions are
// implicitly initialized with { 0 }.
//
@@ -2076,6 +2083,21 @@
if (D->hasAttr<AnnotateAttr>())
AddGlobalAnnotations(D, GV);
+ // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on
+ // the device. [...]
+ // Is accessible from all the threads within the grid and from the host
+ // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize()
+ // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol()).
+ //
+ // CUDA B.2.2 "The __constant__ qualifier, optionally used together with
+ // __device__, declares a variable that: [...]
+ // Is accessible from all the threads within the grid and from the host
+ // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize()
+ // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol()).
+ if (GV && LangOpts.CUDA && LangOpts.CUDAIsDevice &&
+ (D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>())) {
+ GV->setExternallyInitialized(true);
+ }
GV->setInitializer(Init);
// If it is safe to mark the global 'constant', do so now.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D12241.32834.patch
Type: text/x-patch
Size: 3596 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150821/8b02d656/attachment.bin>
More information about the llvm-commits
mailing list