r245786 - [CUDA] Change initializer for CUDA device code based on CUDA documentation.
Jingyue Wu via cfe-commits
cfe-commits at lists.llvm.org
Fri Aug 21 22:49:29 PDT 2015
Author: jingyue
Date: Sat Aug 22 00:49:28 2015
New Revision: 245786
URL: http://llvm.org/viewvc/llvm-project?rev=245786&view=rev
Log:
[CUDA] Change initializer for CUDA device code based on CUDA documentation.
Summary:
According to CUDA documentation, global variables declared with __device__,
__constant__ can be initialized from host code, so mark them as
externally initialized. Because __shared__ variables cannot have an
initialization as part of their declaration and since the value maybe kept
across different kernel invocation, the value of __shared__ is effectively
undefined instead of zero initialized.
Wrongly using zero initializer may cause illegitimate optimization, e.g.
removing unused __constant__ variable because it's not updated in the device
code and the value is initialized with zero.
Test Plan: test/CodeGenCUDA/address-spaces.cu
Patch by Xuetian Weng
Reviewers: jholewinski, eliben, tra, jingyue
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D12241
Modified:
cfe/trunk/lib/CodeGen/CodeGenModule.cpp
cfe/trunk/test/CodeGenCUDA/address-spaces.cu
cfe/trunk/test/CodeGenCUDA/filter-decl.cu
Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=245786&r1=245785&r2=245786&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Sat Aug 22 00:49:28 2015
@@ -1990,7 +1990,16 @@ void CodeGenModule::EmitGlobalVarDefinit
const VarDecl *InitDecl;
const Expr *InitExpr = D->getAnyInitializer(InitDecl);
- if (!InitExpr) {
+ // CUDA E.2.4.1 "__shared__ variables cannot have an initialization as part
+ // of their declaration."
+ if (getLangOpts().CPlusPlus && getLangOpts().CUDAIsDevice
+ && D->hasAttr<CUDASharedAttr>()) {
+ if (InitExpr) {
+ Error(D->getLocation(),
+ "__shared__ variable cannot have an initialization.");
+ }
+ Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy));
+ } else if (!InitExpr) {
// This is a tentative definition; tentative definitions are
// implicitly initialized with { 0 }.
//
@@ -2076,6 +2085,17 @@ void CodeGenModule::EmitGlobalVarDefinit
if (D->hasAttr<AnnotateAttr>())
AddGlobalAnnotations(D, GV);
+ // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on
+ // the device. [...]"
+ // CUDA B.2.2 "The __constant__ qualifier, optionally used together with
+ // __device__, declares a variable that: [...]
+ // Is accessible from all the threads within the grid and from the host
+ // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize()
+ // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())."
+ if (GV && LangOpts.CUDA && LangOpts.CUDAIsDevice &&
+ (D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>())) {
+ GV->setExternallyInitialized(true);
+ }
GV->setInitializer(Init);
// If it is safe to mark the global 'constant', do so now.
Modified: cfe/trunk/test/CodeGenCUDA/address-spaces.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/address-spaces.cu?rev=245786&r1=245785&r2=245786&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCUDA/address-spaces.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/address-spaces.cu Sat Aug 22 00:49:28 2015
@@ -5,10 +5,10 @@
#include "Inputs/cuda.h"
-// CHECK: @i = addrspace(1) global
+// CHECK: @i = addrspace(1) externally_initialized global
__device__ int i;
-// CHECK: @j = addrspace(4) global
+// CHECK: @j = addrspace(4) externally_initialized global
__constant__ int j;
// CHECK: @k = addrspace(3) global
@@ -24,7 +24,7 @@ struct MyStruct {
// CHECK: @_ZZ5func2vE1a = internal addrspace(3) global [256 x float] zeroinitializer
// CHECK: @_ZZ5func3vE1a = internal addrspace(3) global float 0.000000e+00
// CHECK: @_ZZ5func4vE1a = internal addrspace(3) global float 0.000000e+00
-// CHECK: @b = addrspace(3) global float 0.000000e+00
+// CHECK: @b = addrspace(3) global float undef
__device__ void foo() {
// CHECK: load i32, i32* addrspacecast (i32 addrspace(1)* @i to i32*)
Modified: cfe/trunk/test/CodeGenCUDA/filter-decl.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/filter-decl.cu?rev=245786&r1=245785&r2=245786&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCUDA/filter-decl.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/filter-decl.cu Sat Aug 22 00:49:28 2015
@@ -9,12 +9,12 @@
// CHECK-DEVICE-NOT: module asm "file scope asm is host only"
__asm__("file scope asm is host only");
-// CHECK-HOST-NOT: constantdata = global
-// CHECK-DEVICE: constantdata = global
+// CHECK-HOST-NOT: constantdata = externally_initialized global
+// CHECK-DEVICE: constantdata = externally_initialized global
__constant__ char constantdata[256];
-// CHECK-HOST-NOT: devicedata = global
-// CHECK-DEVICE: devicedata = global
+// CHECK-HOST-NOT: devicedata = externally_initialized global
+// CHECK-DEVICE: devicedata = externally_initialized global
__device__ char devicedata[256];
// CHECK-HOST-NOT: shareddata = global
More information about the cfe-commits
mailing list