r328994 - [CUDA] Let device-side shared variables be initialized with undef

Yaxun Liu via cfe-commits cfe-commits at lists.llvm.org
Mon Apr 2 10:38:24 PDT 2018


Author: yaxunl
Date: Mon Apr  2 10:38:24 2018
New Revision: 328994

URL: http://llvm.org/viewvc/llvm-project?rev=328994&view=rev
Log:
[CUDA] Let device-side shared variables be initialized with undef

CUDA shared variable should be initialized with undef.

Patch by Greg Rodgers.
Revised and lit test added by Yaxun Liu.

Differential Revision: https://reviews.llvm.org/D44985

Modified:
    cfe/trunk/lib/CodeGen/CGDecl.cpp
    cfe/trunk/test/CodeGenCUDA/address-spaces.cu
    cfe/trunk/test/CodeGenCUDA/device-var-init.cu

Modified: cfe/trunk/lib/CodeGen/CGDecl.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGDecl.cpp?rev=328994&r1=328993&r2=328994&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGDecl.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGDecl.cpp Mon Apr  2 10:38:24 2018
@@ -229,12 +229,14 @@ llvm::Constant *CodeGenModule::getOrCrea
   LangAS AS = GetGlobalVarAddressSpace(&D);
   unsigned TargetAS = getContext().getTargetAddressSpace(AS);
 
-  // Local address space cannot have an initializer.
+  // OpenCL variables in local address space and CUDA shared
+  // variables cannot have an initializer.
   llvm::Constant *Init = nullptr;
-  if (Ty.getAddressSpace() != LangAS::opencl_local)
-    Init = EmitNullConstant(Ty);
-  else
+  if (Ty.getAddressSpace() == LangAS::opencl_local ||
+      D.hasAttr<CUDASharedAttr>())
     Init = llvm::UndefValue::get(LTy);
+  else
+    Init = EmitNullConstant(Ty);
 
   llvm::GlobalVariable *GV = new llvm::GlobalVariable(
       getModule(), LTy, Ty.isConstant(getContext()), Linkage, Init, Name,

Modified: cfe/trunk/test/CodeGenCUDA/address-spaces.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/address-spaces.cu?rev=328994&r1=328993&r2=328994&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCUDA/address-spaces.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/address-spaces.cu Mon Apr  2 10:38:24 2018
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple nvptx-unknown-unknown | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple amdgcn | FileCheck %s
 
 // Verifies Clang emits correct address spaces and addrspacecast instructions
 // for CUDA code.
@@ -19,11 +20,11 @@ struct MyStruct {
   int data2;
 };
 
-// CHECK: @_ZZ5func0vE1a = internal addrspace(3) global %struct.MyStruct zeroinitializer
-// CHECK: @_ZZ5func1vE1a = internal addrspace(3) global float 0.000000e+00
-// CHECK: @_ZZ5func2vE1a = internal addrspace(3) global [256 x float] zeroinitializer
-// CHECK: @_ZZ5func3vE1a = internal addrspace(3) global float 0.000000e+00
-// CHECK: @_ZZ5func4vE1a = internal addrspace(3) global float 0.000000e+00
+// CHECK: @_ZZ5func0vE1a = internal addrspace(3) global %struct.MyStruct undef
+// CHECK: @_ZZ5func1vE1a = internal addrspace(3) global float undef
+// CHECK: @_ZZ5func2vE1a = internal addrspace(3) global [256 x float] undef
+// CHECK: @_ZZ5func3vE1a = internal addrspace(3) global float undef
+// CHECK: @_ZZ5func4vE1a = internal addrspace(3) global float undef
 // CHECK: @b = addrspace(3) global float undef
 
 __device__ void foo() {
@@ -48,7 +49,7 @@ __device__ void func0() {
   ap->data2 = 2;
 }
 // CHECK: define void @_Z5func0v()
-// CHECK: store %struct.MyStruct* addrspacecast (%struct.MyStruct addrspace(3)* @_ZZ5func0vE1a to %struct.MyStruct*), %struct.MyStruct** %ap
+// CHECK: store %struct.MyStruct* addrspacecast (%struct.MyStruct addrspace(3)* @_ZZ5func0vE1a to %struct.MyStruct*), %struct.MyStruct** %{{.*}}
 
 __device__ void callee(float *ap) {
   *ap = 1.0f;
@@ -67,7 +68,7 @@ __device__ void func2() {
   *ap = 1.0f;
 }
 // CHECK: define void @_Z5func2v()
-// CHECK: store float* getelementptr inbounds ([256 x float], [256 x float]* addrspacecast ([256 x float] addrspace(3)* @_ZZ5func2vE1a to [256 x float]*), i32 0, i32 128), float** %ap
+// CHECK: store float* getelementptr inbounds ([256 x float], [256 x float]* addrspacecast ([256 x float] addrspace(3)* @_ZZ5func2vE1a to [256 x float]*), i{{32|64}} 0, i{{32|64}} 128), float** %{{.*}}
 
 __device__ void func3() {
   __shared__ float a;
@@ -75,7 +76,7 @@ __device__ void func3() {
   *ap = 1.0f;
 }
 // CHECK: define void @_Z5func3v()
-// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func3vE1a to float*), float** %ap
+// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func3vE1a to float*), float** %{{.*}}
 
 __device__ void func4() {
   __shared__ float a;
@@ -83,7 +84,7 @@ __device__ void func4() {
   *ap = 1.0f;
 }
 // CHECK: define void @_Z5func4v()
-// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func4vE1a to float*), float** %ap
+// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func4vE1a to float*), float** %{{.*}}
 
 __shared__ float b;
 

Modified: cfe/trunk/test/CodeGenCUDA/device-var-init.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/device-var-init.cu?rev=328994&r1=328993&r2=328994&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCUDA/device-var-init.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/device-var-init.cu Mon Apr  2 10:38:24 2018
@@ -1,10 +1,14 @@
 // REQUIRES: nvptx-registered-target
+// REQUIRES: amdgpu-registered-target
 
 // Make sure we don't allow dynamic initialization for device
 // variables, but accept empty constructors allowed by CUDA.
 
 // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \
-// RUN:     -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck %s
+// RUN:     -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,NVPTX %s
+
+// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -std=c++11 \
+// RUN:     -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,AMDGCN %s
 
 #ifdef __clang__
 #include "Inputs/cuda.h"
@@ -105,50 +109,102 @@ __shared__ EC_I_EC s_ec_i_ec;
 __constant__ EC_I_EC c_ec_i_ec;
 // CHECK: @c_ec_i_ec = addrspace(4) externally_initialized global %struct.EC_I_EC zeroinitializer,
 
+// CHECK: @_ZZ2dfvE4s_ec = internal addrspace(3) global %struct.EC undef
+// CHECK: @_ZZ2dfvE5s_etc = internal addrspace(3) global %struct.ETC undef
+
 // We should not emit global initializers for device-side variables.
 // CHECK-NOT: @__cxx_global_var_init
 
 // Make sure that initialization restrictions do not apply to local
 // variables.
 __device__ void df() {
+  // NVPTX:  %[[ec:.*]] = alloca %struct.EC
+  // NVPTX:  %[[ed:.*]] = alloca %struct.ED
+  // NVPTX:  %[[ecd:.*]] = alloca %struct.ECD
+  // NVPTX:  %[[etc:.*]] = alloca %struct.ETC
+  // NVPTX:  %[[uc:.*]] = alloca %struct.UC
+  // NVPTX:  %[[ud:.*]] = alloca %struct.UD
+  // NVPTX:  %[[eci:.*]] = alloca %struct.ECI
+  // NVPTX:  %[[nec:.*]] = alloca %struct.NEC
+  // NVPTX:  %[[ned:.*]] = alloca %struct.NED
+  // NVPTX:  %[[ncv:.*]] = alloca %struct.NCV
+  // NVPTX:  %[[vd:.*]] = alloca %struct.VD
+  // NVPTX:  %[[ncf:.*]] = alloca %struct.NCF
+  // NVPTX:  %[[ncfs:.*]] = alloca %struct.NCFS
+  // NVPTX:  %[[utc:.*]] = alloca %struct.UTC
+  // NVPTX:  %[[netc:.*]] = alloca %struct.NETC
+  // NVPTX:  %[[ec_i_ec:.*]] = alloca %struct.EC_I_EC
+  // NVPTX:  %[[ec_i_ec1:.*]] = alloca %struct.EC_I_EC1
+  // NVPTX:  %[[t_v_t:.*]] = alloca %struct.T_V_T
+  // NVPTX:  %[[t_b_nec:.*]] = alloca %struct.T_B_NEC
+  // NVPTX:  %[[t_f_nec:.*]] = alloca %struct.T_F_NEC
+  // NVPTX:  %[[t_fa_nec:.*]] = alloca %struct.T_FA_NEC
+  // NVPTX:  %[[t_b_ned:.*]] = alloca %struct.T_B_NED
+  // NVPTX:  %[[t_f_ned:.*]] = alloca %struct.T_F_NED
+  // NVPTX:  %[[t_fa_ned:.*]] = alloca %struct.T_FA_NED
+  // AMDGCN:  %[[ec:.*]] = addrspacecast %struct.EC addrspace(5)* %ec to %struct.EC*
+  // AMDGCN:  %[[ed:.*]] = addrspacecast %struct.ED addrspace(5)* %ed to %struct.ED*
+  // AMDGCN:  %[[ecd:.*]] = addrspacecast %struct.ECD addrspace(5)* %ecd to %struct.ECD*
+  // AMDGCN:  %[[etc:.*]] = addrspacecast %struct.ETC addrspace(5)* %etc to %struct.ETC*
+  // AMDGCN:  %[[uc:.*]] = addrspacecast %struct.UC addrspace(5)* %uc to %struct.UC*
+  // AMDGCN:  %[[ud:.*]] = addrspacecast %struct.UD addrspace(5)* %ud to %struct.UD*
+  // AMDGCN:  %[[eci:.*]] = addrspacecast %struct.ECI addrspace(5)* %eci to %struct.ECI*
+  // AMDGCN:  %[[nec:.*]] = addrspacecast %struct.NEC addrspace(5)* %nec to %struct.NEC*
+  // AMDGCN:  %[[ned:.*]] = addrspacecast %struct.NED addrspace(5)* %ned to %struct.NED*
+  // AMDGCN:  %[[ncv:.*]] = addrspacecast %struct.NCV addrspace(5)* %ncv to %struct.NCV*
+  // AMDGCN:  %[[vd:.*]] = addrspacecast %struct.VD addrspace(5)* %vd to %struct.VD*
+  // AMDGCN:  %[[ncf:.*]] = addrspacecast %struct.NCF addrspace(5)* %ncf to %struct.NCF*
+  // AMDGCN:  %[[ncfs:.*]] = addrspacecast %struct.NCFS addrspace(5)* %ncfs to %struct.NCFS*
+  // AMDGCN:  %[[utc:.*]] = addrspacecast %struct.UTC addrspace(5)* %utc to %struct.UTC*
+  // AMDGCN:  %[[netc:.*]] = addrspacecast %struct.NETC addrspace(5)* %netc to %struct.NETC*
+  // AMDGCN:  %[[ec_i_ec:.*]] = addrspacecast %struct.EC_I_EC addrspace(5)* %ec_i_ec to %struct.EC_I_EC*
+  // AMDGCN:  %[[ec_i_ec1:.*]] = addrspacecast %struct.EC_I_EC1 addrspace(5)* %ec_i_ec1 to %struct.EC_I_EC1*
+  // AMDGCN:  %[[t_v_t:.*]] = addrspacecast %struct.T_V_T addrspace(5)* %t_v_t to %struct.T_V_T*
+  // AMDGCN:  %[[t_b_nec:.*]] = addrspacecast %struct.T_B_NEC addrspace(5)* %t_b_nec to %struct.T_B_NEC*
+  // AMDGCN:  %[[t_f_nec:.*]] = addrspacecast %struct.T_F_NEC addrspace(5)* %t_f_nec to %struct.T_F_NEC*
+  // AMDGCN:  %[[t_fa_nec:.*]] = addrspacecast %struct.T_FA_NEC addrspace(5)* %t_fa_nec to %struct.T_FA_NEC*
+  // AMDGCN:  %[[t_b_ned:.*]] = addrspacecast %struct.T_B_NED addrspace(5)* %t_b_ned to %struct.T_B_NED*
+  // AMDGCN:  %[[t_f_ned:.*]] = addrspacecast %struct.T_F_NED addrspace(5)* %t_f_ned to %struct.T_F_NED*
+  // AMDGCN:  %[[t_fa_ned:.*]] = addrspacecast %struct.T_FA_NED addrspace(5)* %t_fa_ned to %struct.T_FA_NED*
+
   T t;
   // CHECK-NOT: call
   EC ec;
-  // CHECK:   call void @_ZN2ECC1Ev(%struct.EC* %ec)
+  // CHECK:  call void @_ZN2ECC1Ev(%struct.EC* %[[ec]])
   ED ed;
   // CHECK-NOT: call
   ECD ecd;
-  // CHECK:   call void @_ZN3ECDC1Ev(%struct.ECD* %ecd)
+  // CHECK:  call void @_ZN3ECDC1Ev(%struct.ECD* %[[ecd]])
   ETC etc;
-  // CHECK:   call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* %etc)
+  // CHECK:  call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* %[[etc]])
   UC uc;
   // undefined constructor -- not allowed
-  // CHECK:   call void @_ZN2UCC1Ev(%struct.UC* %uc)
+  // CHECK:  call void @_ZN2UCC1Ev(%struct.UC* %[[uc]])
   UD ud;
   // undefined destructor -- not allowed
   // CHECK-NOT: call
   ECI eci;
   // empty constructor w/ initializer list -- not allowed
-  // CHECK:   call void @_ZN3ECIC1Ev(%struct.ECI* %eci)
+  // CHECK:  call void @_ZN3ECIC1Ev(%struct.ECI* %[[eci]])
   NEC nec;
   // non-empty constructor -- not allowed
-  // CHECK:   call void @_ZN3NECC1Ev(%struct.NEC* %nec)
+  // CHECK:  call void @_ZN3NECC1Ev(%struct.NEC* %[[nec]])
   // non-empty destructor -- not allowed
   NED ned;
   // no-constructor,  virtual method -- not allowed
-  // CHECK:   call void @_ZN3NCVC1Ev(%struct.NCV* %ncv)
+  // CHECK:  call void @_ZN3NCVC1Ev(%struct.NCV* %[[ncv]])
   NCV ncv;
   // CHECK-NOT: call
   VD vd;
-  // CHECK:   call void @_ZN2VDC1Ev(%struct.VD* %vd)
+  // CHECK:  call void @_ZN2VDC1Ev(%struct.VD* %[[vd]])
   NCF ncf;
-  // CHECK:   call void @_ZN3NCFC1Ev(%struct.NCF* %ncf)
+  // CHECK:   call void @_ZN3NCFC1Ev(%struct.NCF* %[[ncf]])
   NCFS ncfs;
-  // CHECK:   call void @_ZN4NCFSC1Ev(%struct.NCFS* %ncfs)
+  // CHECK:  call void @_ZN4NCFSC1Ev(%struct.NCFS* %[[ncfs]])
   UTC utc;
-  // CHECK:   call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* %utc)
+  // CHECK:  call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* %[[utc]])
   NETC netc;
-  // CHECK:   call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %netc)
+  // CHECK:  call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %[[netc]])
   T_B_T t_b_t;
   // CHECK-NOT: call
   T_F_T t_f_t;
@@ -156,17 +212,17 @@ __device__ void df() {
   T_FA_T t_fa_t;
   // CHECK-NOT: call
   EC_I_EC ec_i_ec;
-  // CHECK:   call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %ec_i_ec)
+  // CHECK:  call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %[[ec_i_ec]])
   EC_I_EC1 ec_i_ec1;
-  // CHECK:   call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %ec_i_ec1)
+  // CHECK:  call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %[[ec_i_ec1]])
   T_V_T t_v_t;
-  // CHECK:   call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t)
+  // CHECK:  call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %[[t_v_t]])
   T_B_NEC t_b_nec;
-  // CHECK:   call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %t_b_nec)
+  // CHECK:  call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %[[t_b_nec]])
   T_F_NEC t_f_nec;
-  // CHECK:   call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %t_f_nec)
+  // CHECK:  call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %[[t_f_nec]])
   T_FA_NEC t_fa_nec;
-  // CHECK:   call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %t_fa_nec)
+  // CHECK:  call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %[[t_fa_nec]])
   T_B_NED t_b_ned;
   // CHECK-NOT: call
   T_F_NED t_f_ned;
@@ -182,14 +238,14 @@ __device__ void df() {
   df(); // CHECK: call void @_Z2dfv()
 
   // Verify that we only call non-empty destructors
-  // CHECK-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED* %t_fa_ned)
-  // CHECK-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED* %t_f_ned)
-  // CHECK-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED* %t_b_ned)
-  // CHECK-NEXT: call void @_ZN2VDD1Ev(%struct.VD* %vd)
-  // CHECK-NEXT: call void @_ZN3NEDD1Ev(%struct.NED* %ned)
-  // CHECK-NEXT: call void @_ZN2UDD1Ev(%struct.UD* %ud)
-  // CHECK-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD* %ecd)
-  // CHECK-NEXT: call void @_ZN2EDD1Ev(%struct.ED* %ed)
+  // CHECK-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED* %[[t_fa_ned]])
+  // CHECK-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED* %[[t_f_ned]])
+  // CHECK-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED* %[[t_b_ned]])
+  // CHECK-NEXT: call void @_ZN2VDD1Ev(%struct.VD* %[[vd]])
+  // CHECK-NEXT: call void @_ZN3NEDD1Ev(%struct.NED* %[[ned]])
+  // CHECK-NEXT: call void @_ZN2UDD1Ev(%struct.UD* %[[ud]])
+  // CHECK-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD* %[[ecd]])
+  // CHECK-NEXT: call void @_ZN2EDD1Ev(%struct.ED* %[[ed]])
 
   // CHECK-NEXT: ret void
 }




More information about the cfe-commits mailing list