r349981 - [CUDA] Treat extern global variable shadows same as regular extern vars.

Artem Belevich via cfe-commits cfe-commits at lists.llvm.org
Fri Dec 21 17:11:09 PST 2018


Author: tra
Date: Fri Dec 21 17:11:09 2018
New Revision: 349981

URL: http://llvm.org/viewvc/llvm-project?rev=349981&view=rev
Log:
[CUDA] Treat extern global variable shadows same as regular extern vars.

This fixes compiler crash when we attempted to compile this code:

extern __device__ int data;
__device__ int data = 1;

Differential Revision: https://reviews.llvm.org/D56033

Modified:
    cfe/trunk/lib/CodeGen/CodeGenModule.cpp
    cfe/trunk/test/CodeGenCUDA/device-stub.cu

Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=349981&r1=349980&r2=349981&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Fri Dec 21 17:11:09 2018
@@ -2188,15 +2188,7 @@ void CodeGenModule::EmitGlobal(GlobalDec
   } else {
     const auto *VD = cast<VarDecl>(Global);
     assert(VD->isFileVarDecl() && "Cannot emit local var decl as global.");
-    // We need to emit device-side global CUDA variables even if a
-    // variable does not have a definition -- we still need to define
-    // host-side shadow for it.
-    bool MustEmitForCuda = LangOpts.CUDA && !LangOpts.CUDAIsDevice &&
-                           !VD->hasDefinition() &&
-                           (VD->hasAttr<CUDAConstantAttr>() ||
-                            VD->hasAttr<CUDADeviceAttr>());
-    if (!MustEmitForCuda &&
-        VD->isThisDeclarationADefinition() != VarDecl::Definition &&
+    if (VD->isThisDeclarationADefinition() != VarDecl::Definition &&
         !Context.isMSStaticDataMemberInlineDefinition(VD)) {
       if (LangOpts.OpenMP) {
         // Emit declaration of the must-be-emitted declare target variable.
@@ -3616,7 +3608,10 @@ void CodeGenModule::EmitGlobalVarDefinit
           Flags |= CGCUDARuntime::ExternDeviceVar;
         if (D->hasAttr<CUDAConstantAttr>())
           Flags |= CGCUDARuntime::ConstantDeviceVar;
-        getCUDARuntime().registerDeviceVar(*GV, Flags);
+        // Extern global variables will be registered in the TU where they are
+        // defined.
+        if (!D->hasExternalStorage())
+          getCUDARuntime().registerDeviceVar(*GV, Flags);
       } else if (D->hasAttr<CUDASharedAttr>())
         // __shared__ variables are odd. Shadows do get created, but
         // they are not registered with the CUDA runtime, so they

Modified: cfe/trunk/test/CodeGenCUDA/device-stub.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/device-stub.cu?rev=349981&r1=349980&r2=349981&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCUDA/device-stub.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/device-stub.cu Fri Dec 21 17:11:09 2018
@@ -42,13 +42,20 @@ int host_var;
 // ALL-DAG: @ext_host_var = external global i32
 extern int ext_host_var;
 
-// Shadows for external device-side variables are *definitions* of
-// those variables.
-// ALL-DAG: @ext_device_var = internal global i32
+// external device-side variables -> extern references to their shadows.
+// ALL-DAG: @ext_device_var = external global i32
 extern __device__ int ext_device_var;
-// ALL-DAG: @ext_device_var = internal global i32
+// ALL-DAG: @ext_device_var = external global i32
 extern __constant__ int ext_constant_var;
 
+// external device-side variables with definitions should generate
+// definitions for the shadows.
+// ALL-DAG: @ext_device_var_def = internal global i32 undef,
+extern __device__ int ext_device_var_def;
+__device__ int ext_device_var_def = 1;
+// ALL-DAG: @ext_device_var_def = internal global i32 undef,
+__constant__ int ext_constant_var_def = 2;
+
 void use_pointers() {
   int *p;
   p = &device_var;
@@ -114,8 +121,8 @@ void hostfunc(void) { kernelfunc<<<1, 1>
 // ALL: call{{.*}}[[PREFIX]]RegisterFunction(i8** %0, {{.*}}kernelfunc
 // ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}device_var{{.*}}i32 0, i32 4, i32 0, i32 0
 // ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{.*}}i32 0, i32 4, i32 1, i32 0
-// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var{{.*}}i32 1, i32 4, i32 0, i32 0
-// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var{{.*}}i32 1, i32 4, i32 1, i32 0
+// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var_def{{.*}}i32 0, i32 4, i32 0, i32 0
+// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var_def{{.*}}i32 0, i32 4, i32 1, i32 0
 // ALL: ret void
 
 // Test that we've built a constructor.




More information about the cfe-commits mailing list