r346507 - [OPENMP][NVPTX]Allow to use shared memory for the

Alexey Bataev via cfe-commits cfe-commits at lists.llvm.org
Fri Nov 9 08:18:04 PST 2018


Author: abataev
Date: Fri Nov  9 08:18:04 2018
New Revision: 346507

URL: http://llvm.org/viewvc/llvm-project?rev=346507&view=rev
Log:
[OPENMP][NVPTX]Allow to use shared memory for the
target|teams|distribute variables.

If the total size of the variables, declared in target|teams|distribute
regions, is less than the maximal size of shared memory available, the
buffer is allocated in the shared memory.

Modified:
    cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
    cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
    cfe/trunk/test/OpenMP/nvptx_data_sharing.cpp
    cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
    cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp
    cfe/trunk/test/OpenMP/nvptx_parallel_for_codegen.cpp
    cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
    cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
    cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=346507&r1=346506&r2=346507&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Fri Nov  9 08:18:04 2018
@@ -176,6 +176,9 @@ enum MachineConfiguration : unsigned {
 
   /// Global memory alignment for performance.
   GlobalMemoryAlignment = 128,
+
+  /// Maximal size of the shared memory buffer.
+  SharedMemorySize = 128,
 };
 
 enum NamedBarrier : unsigned {
@@ -1143,13 +1146,6 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDKe
   IsInTTDRegion = true;
   // Reserve place for the globalized memory.
   GlobalizedRecords.emplace_back();
-  if (!StaticGlobalized) {
-    StaticGlobalized = new llvm::GlobalVariable(
-        CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/true,
-        llvm::GlobalValue::WeakAnyLinkage, nullptr,
-        "_openmp_static_glob_rd$ptr");
-    StaticGlobalized->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
-  }
   if (!KernelStaticGlobalized) {
     KernelStaticGlobalized = new llvm::GlobalVariable(
         CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
@@ -1277,13 +1273,6 @@ void CGOpenMPRuntimeNVPTX::emitSPMDKerne
   IsInTTDRegion = true;
   // Reserve place for the globalized memory.
   GlobalizedRecords.emplace_back();
-  if (!StaticGlobalized) {
-    StaticGlobalized = new llvm::GlobalVariable(
-        CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/true,
-        llvm::GlobalValue::WeakAnyLinkage, nullptr,
-        "_openmp_static_glob_rd$ptr");
-    StaticGlobalized->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
-  }
   if (!KernelStaticGlobalized) {
     KernelStaticGlobalized = new llvm::GlobalVariable(
         CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
@@ -2138,30 +2127,41 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa
       GlobalizedRecords.back().Records.push_back(GlobalizedVarsRecord);
       ++GlobalizedRecords.back().RegionCounter;
       if (GlobalizedRecords.back().Records.size() == 1) {
-        assert(StaticGlobalized &&
-               "Static pointer must be initialized already.");
-        Address Buffer = CGF.EmitLoadOfPointer(
-            Address(StaticGlobalized, CGM.getPointerAlign()),
-            CGM.getContext()
-                .getPointerType(CGM.getContext().VoidPtrTy)
-                .castAs<PointerType>());
+        assert(KernelStaticGlobalized &&
+               "Kernel static pointer must be initialized already.");
+        auto *UseSharedMemory = new llvm::GlobalVariable(
+            CGM.getModule(), CGM.Int16Ty, /*isConstant=*/true,
+            llvm::GlobalValue::InternalLinkage, nullptr,
+            "_openmp_static_kernel$is_shared");
+        UseSharedMemory->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+        QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth(
+            /*DestWidth=*/16, /*Signed=*/0);
+        llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar(
+            Address(UseSharedMemory,
+                    CGM.getContext().getTypeAlignInChars(Int16Ty)),
+            /*Volatile=*/false, Int16Ty, Loc);
+        auto *StaticGlobalized = new llvm::GlobalVariable(
+            CGM.getModule(), CGM.Int8Ty, /*isConstant=*/false,
+            llvm::GlobalValue::WeakAnyLinkage, nullptr);
         auto *RecSize = new llvm::GlobalVariable(
             CGM.getModule(), CGM.SizeTy, /*isConstant=*/true,
             llvm::GlobalValue::InternalLinkage, nullptr,
             "_openmp_static_kernel$size");
         RecSize->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
         llvm::Value *Ld = CGF.EmitLoadOfScalar(
-            Address(RecSize, CGM.getPointerAlign()), /*Volatile=*/false,
+            Address(RecSize, CGM.getSizeAlign()), /*Volatile=*/false,
             CGM.getContext().getSizeType(), Loc);
         llvm::Value *ResAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
             KernelStaticGlobalized, CGM.VoidPtrPtrTy);
-        llvm::Value *GlobalRecordSizeArg[] = {
-            Buffer.getPointer(), Ld,
-            llvm::ConstantInt::getNullValue(CGM.Int16Ty), ResAddr};
+        llvm::Value *GlobalRecordSizeArg[] = {StaticGlobalized, Ld,
+                                              IsInSharedMemory, ResAddr};
         CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
                                 OMPRTL_NVPTX__kmpc_get_team_static_memory),
                             GlobalRecordSizeArg);
+        GlobalizedRecords.back().Buffer = StaticGlobalized;
         GlobalizedRecords.back().RecSize = RecSize;
+        GlobalizedRecords.back().UseSharedMemory = UseSharedMemory;
+        GlobalizedRecords.back().Loc = Loc;
       }
       assert(KernelStaticGlobalized && "Global address must be set already.");
       Address FrameAddr = CGF.EmitLoadOfPointer(
@@ -2336,10 +2336,16 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa
         --GlobalizedRecords.back().RegionCounter;
         // Emit the restore function only in the target region.
         if (GlobalizedRecords.back().RegionCounter == 0) {
+          QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth(
+              /*DestWidth=*/16, /*Signed=*/0);
+          llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar(
+              Address(GlobalizedRecords.back().UseSharedMemory,
+                      CGM.getContext().getTypeAlignInChars(Int16Ty)),
+              /*Volatile=*/false, Int16Ty, GlobalizedRecords.back().Loc);
           CGF.EmitRuntimeCall(
               createNVPTXRuntimeFunction(
                   OMPRTL_NVPTX__kmpc_restore_team_static_memory),
-              llvm::ConstantInt::getNullValue(CGM.Int16Ty));
+              IsInSharedMemory);
         }
       } else {
         CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
@@ -4507,21 +4513,24 @@ static std::pair<unsigned, unsigned> get
 void CGOpenMPRuntimeNVPTX::clear() {
   if (!GlobalizedRecords.empty()) {
     ASTContext &C = CGM.getContext();
+    llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> GlobalRecs;
+    llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> SharedRecs;
     RecordDecl *StaticRD = C.buildImplicitRecord(
         "_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union);
     StaticRD->startDefinition();
+    RecordDecl *SharedStaticRD = C.buildImplicitRecord(
+        "_shared_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union);
+    SharedStaticRD->startDefinition();
     for (const GlobalPtrSizeRecsTy &Records : GlobalizedRecords) {
       if (Records.Records.empty())
         continue;
       unsigned Size = 0;
       unsigned RecAlignment = 0;
       for (const RecordDecl *RD : Records.Records) {
-        QualType RDTy = CGM.getContext().getRecordType(RD);
-        unsigned Alignment =
-            CGM.getContext().getTypeAlignInChars(RDTy).getQuantity();
+        QualType RDTy = C.getRecordType(RD);
+        unsigned Alignment = C.getTypeAlignInChars(RDTy).getQuantity();
         RecAlignment = std::max(RecAlignment, Alignment);
-        unsigned RecSize =
-            CGM.getContext().getTypeSizeInChars(RDTy).getQuantity();
+        unsigned RecSize = C.getTypeSizeInChars(RDTy).getQuantity();
         Size =
             llvm::alignTo(llvm::alignTo(Size, Alignment) + RecSize, Alignment);
       }
@@ -4529,32 +4538,67 @@ void CGOpenMPRuntimeNVPTX::clear() {
       llvm::APInt ArySize(/*numBits=*/64, Size);
       QualType SubTy = C.getConstantArrayType(
           C.CharTy, ArySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
-      auto *Field = FieldDecl::Create(
-          C, StaticRD, SourceLocation(), SourceLocation(), nullptr, SubTy,
-          C.getTrivialTypeSourceInfo(SubTy, SourceLocation()),
-          /*BW=*/nullptr, /*Mutable=*/false,
-          /*InitStyle=*/ICIS_NoInit);
+      const bool UseSharedMemory = Size <= SharedMemorySize;
+      auto *Field =
+          FieldDecl::Create(C, UseSharedMemory ? SharedStaticRD : StaticRD,
+                            SourceLocation(), SourceLocation(), nullptr, SubTy,
+                            C.getTrivialTypeSourceInfo(SubTy, SourceLocation()),
+                            /*BW=*/nullptr, /*Mutable=*/false,
+                            /*InitStyle=*/ICIS_NoInit);
       Field->setAccess(AS_public);
-      StaticRD->addDecl(Field);
+      if (UseSharedMemory) {
+        SharedStaticRD->addDecl(Field);
+        SharedRecs.push_back(&Records);
+      } else {
+        StaticRD->addDecl(Field);
+        GlobalRecs.push_back(&Records);
+      }
       Records.RecSize->setInitializer(llvm::ConstantInt::get(CGM.SizeTy, Size));
+      Records.UseSharedMemory->setInitializer(
+          llvm::ConstantInt::get(CGM.Int16Ty, UseSharedMemory ? 1 : 0));
+    }
+    SharedStaticRD->completeDefinition();
+    if (!SharedStaticRD->field_empty()) {
+      QualType StaticTy = C.getRecordType(SharedStaticRD);
+      llvm::Type *LLVMStaticTy = CGM.getTypes().ConvertTypeForMem(StaticTy);
+      auto *GV = new llvm::GlobalVariable(
+          CGM.getModule(), LLVMStaticTy,
+          /*isConstant=*/false, llvm::GlobalValue::WeakAnyLinkage,
+          llvm::Constant::getNullValue(LLVMStaticTy),
+          "_openmp_shared_static_glob_rd_$_", /*InsertBefore=*/nullptr,
+          llvm::GlobalValue::NotThreadLocal,
+          C.getTargetAddressSpace(LangAS::cuda_shared));
+      auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+          GV, CGM.VoidPtrTy);
+      for (const GlobalPtrSizeRecsTy *Rec : SharedRecs) {
+        Rec->Buffer->replaceAllUsesWith(Replacement);
+        Rec->Buffer->eraseFromParent();
+      }
     }
     StaticRD->completeDefinition();
-    QualType StaticTy = C.getRecordType(StaticRD);
-    std::pair<unsigned, unsigned> SMsBlockPerSM = getSMsBlocksPerSM(CGM);
-    llvm::APInt Size1(32, SMsBlockPerSM.second);
-    QualType Arr1Ty = C.getConstantArrayType(StaticTy, Size1, ArrayType::Normal,
-                                             /*IndexTypeQuals=*/0);
-    llvm::APInt Size2(32, SMsBlockPerSM.first);
-    QualType Arr2Ty = C.getConstantArrayType(Arr1Ty, Size2, ArrayType::Normal,
-                                             /*IndexTypeQuals=*/0);
-    llvm::Type *LLVMArr2Ty = CGM.getTypes().ConvertTypeForMem(Arr2Ty);
-    auto *GV = new llvm::GlobalVariable(
-        CGM.getModule(), LLVMArr2Ty,
-        /*isConstant=*/false, llvm::GlobalValue::WeakAnyLinkage,
-        llvm::Constant::getNullValue(LLVMArr2Ty), "_openmp_static_glob_rd_$_");
-    StaticGlobalized->setInitializer(
-        llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV,
-                                                             CGM.VoidPtrTy));
+    if (!StaticRD->field_empty()) {
+      QualType StaticTy = C.getRecordType(StaticRD);
+      std::pair<unsigned, unsigned> SMsBlockPerSM = getSMsBlocksPerSM(CGM);
+      llvm::APInt Size1(32, SMsBlockPerSM.second);
+      QualType Arr1Ty =
+          C.getConstantArrayType(StaticTy, Size1, ArrayType::Normal,
+                                 /*IndexTypeQuals=*/0);
+      llvm::APInt Size2(32, SMsBlockPerSM.first);
+      QualType Arr2Ty = C.getConstantArrayType(Arr1Ty, Size2, ArrayType::Normal,
+                                               /*IndexTypeQuals=*/0);
+      llvm::Type *LLVMArr2Ty = CGM.getTypes().ConvertTypeForMem(Arr2Ty);
+      auto *GV = new llvm::GlobalVariable(
+          CGM.getModule(), LLVMArr2Ty,
+          /*isConstant=*/false, llvm::GlobalValue::WeakAnyLinkage,
+          llvm::Constant::getNullValue(LLVMArr2Ty),
+          "_openmp_static_glob_rd_$_");
+      auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+          GV, CGM.VoidPtrTy);
+      for (const GlobalPtrSizeRecsTy *Rec : GlobalRecs) {
+        Rec->Buffer->replaceAllUsesWith(Replacement);
+        Rec->Buffer->eraseFromParent();
+      }
+    }
   }
   CGOpenMPRuntime::clear();
 }

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h?rev=346507&r1=346506&r2=346507&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h Fri Nov  9 08:18:04 2018
@@ -420,14 +420,14 @@ private:
   /// union. This resulting union (one per CU) is the entry point for the static
   /// memory management runtime functions.
   struct GlobalPtrSizeRecsTy {
+    llvm::GlobalVariable *UseSharedMemory = nullptr;
     llvm::GlobalVariable *RecSize = nullptr;
+    llvm::GlobalVariable *Buffer = nullptr;
+    SourceLocation Loc;
     llvm::SmallVector<const RecordDecl *, 2> Records;
     unsigned RegionCounter = 0;
   };
   llvm::SmallVector<GlobalPtrSizeRecsTy, 8> GlobalizedRecords;
-  /// Global variable used for staticlly allocated global memoryused for
-  /// globalization in target/teams/distribute regions.
-  llvm::GlobalVariable *StaticGlobalized = nullptr;
   /// Shared pointer for the global memory in the global memory buffer used for
   /// the given kernel.
   llvm::GlobalVariable *KernelStaticGlobalized = nullptr;

Modified: cfe/trunk/test/OpenMP/nvptx_data_sharing.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_data_sharing.cpp?rev=346507&r1=346506&r2=346507&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_data_sharing.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_data_sharing.cpp Fri Nov  9 08:18:04 2018
@@ -27,10 +27,10 @@ void test_ds(){
   }
 }
 // CK1: [[MEM_TY:%.+]] = type { [8 x i8] }
-// CK1-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer
-// CK1-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0)
+// CK1-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer
 // CK1-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
 // CK1-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i64 8
+// CK1-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
 
 /// ========= In the worker function ========= ///
 // CK1: {{.*}}define internal void @__omp_offloading{{.*}}test_ds{{.*}}_worker()
@@ -44,9 +44,9 @@ void test_ds(){
 // CK1: [[SHAREDARGS2:%.+]] = alloca i8**
 // CK1: call void @__kmpc_kernel_init
 // CK1: call void @__kmpc_data_sharing_init_stack
-// CK1: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]],
+// CK1: [[SHARED_MEM_FLAG:%.+]] = load i16, i16* [[KERNEL_SHARED]],
 // CK1: [[SIZE:%.+]] = load i64, i64* [[KERNEL_SIZE]],
-// CK1: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i64 [[SIZE]], i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
+// CK1: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i64 [[SIZE]], i16 [[SHARED_MEM_FLAG]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
 // CK1: [[KERNEL_RD:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
 // CK1: [[GLOBALSTACK:%.+]] = getelementptr inbounds i8, i8* [[KERNEL_RD]], i64 0
 // CK1: [[GLOBALSTACK2:%.+]] = bitcast i8* [[GLOBALSTACK]] to %struct._globalized_locals_ty*
@@ -75,7 +75,8 @@ void test_ds(){
 // CK1: call void @llvm.nvvm.barrier0()
 // CK1: call void @llvm.nvvm.barrier0()
 // CK1: call void @__kmpc_end_sharing_variables()
-// CK1: call void @__kmpc_restore_team_static_memory(i16 0)
+// CK1: [[SHARED_MEM_FLAG:%.+]] = load i16, i16* [[KERNEL_SHARED]],
+// CK1: call void @__kmpc_restore_team_static_memory(i16 [[SHARED_MEM_FLAG]])
 // CK1: call void @__kmpc_kernel_deinit(i16 1)
 
 /// ========= In the data sharing wrapper function ========= ///

Modified: cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp?rev=346507&r1=346506&r2=346507&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp Fri Nov  9 08:18:04 2018
@@ -22,8 +22,7 @@ int main(int argc, char **argv) {
 }
 
 // CHECK: [[MEM_TY:%.+]] = type { [84 x i8] }
-// CHECK-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer
-// CHECK-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0)
+// CHECK-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer
 // CHECK-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
 // CHECK-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 84
 // CHECK-DAG: @__omp_offloading_{{.*}}_main_l17_exec_mode = weak constant i8 1
@@ -31,8 +30,7 @@ int main(int argc, char **argv) {
 // CHECK-LABEL: define internal void @__omp_offloading_{{.*}}_main_l17_worker(
 
 // CHECK: define weak void @__omp_offloading_{{.*}}_main_l17([10 x i32]* dereferenceable(40) %{{.+}}, [10 x i32]* dereferenceable(40) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i{{64|32}} %{{.+}}, [10 x i32]* dereferenceable(40) %{{.+}})
-// CHECK: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]],
-// CHECK: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} 84, i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
+// CHECK: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} 84, i16 1, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
 // CHECK: [[PTR:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
 // CHECK: [[STACK:%.+]] = bitcast i8* [[PTR]] to %struct._globalized_locals_ty*
 // CHECK: [[ARGC:%.+]] = load i32, i32* %{{.+}}, align
@@ -48,7 +46,7 @@ int main(int argc, char **argv) {
 
 // CHECK: call void @__kmpc_for_static_fini(%struct.ident_t* @
 
-// CHECK: call void @__kmpc_restore_team_static_memory(i16 0)
+// CHECK: call void @__kmpc_restore_team_static_memory(i16 1)
 
 // CHECK: define internal void [[PARALLEL]](
 // CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack(

Modified: cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp?rev=346507&r1=346506&r2=346507&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp Fri Nov  9 08:18:04 2018
@@ -72,10 +72,10 @@ int bar(int n){
 }
 
 // CHECK: [[MEM_TY:%.+]] = type { [4 x i8] }
-// CHECK-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer
-// CHECK-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0)
+// CHECK-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer
 // CHECK-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
 // CHECK-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4
+// CHECK-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
 
 // CHECK-NOT: define {{.*}}void {{@__omp_offloading_.+template.+l17}}_worker()
 
@@ -324,9 +324,9 @@ int bar(int n){
 // CHECK-32: [[A_ADDR:%.+]] = alloca i32,
 // CHECK-64: [[A_ADDR:%.+]] = alloca i64,
 // CHECK-64: [[CONV:%.+]] = bitcast i64* [[A_ADDR]] to i32*
-// CHECK: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]],
+// CHECK: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED]],
 // CHECK: [[SIZE:%.+]] = load i{{64|32}}, i{{64|32}}* [[KERNEL_SIZE]],
-// CHECK: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} [[SIZE]], i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
+// CHECK: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} [[SIZE]], i16 [[IS_SHARED]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
 // CHECK: [[KERNEL_RD:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
 // CHECK: [[STACK:%.+]] = getelementptr inbounds i8, i8* [[KERNEL_RD]], i{{64|32}} 0
 // CHECK: [[BC:%.+]] = bitcast i8* [[STACK]] to %struct._globalized_locals_ty*
@@ -334,7 +334,8 @@ int bar(int n){
 // CHECK-64: [[A:%.+]] = load i32, i32* [[CONV]],
 // CHECK: [[GLOBAL_A_ADDR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: store i32 [[A]], i32* [[GLOBAL_A_ADDR]],
-// CHECK: call void @__kmpc_restore_team_static_memory(i16 0)
+// CHECK: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED]],
+// CHECK: call void @__kmpc_restore_team_static_memory(i16 [[IS_SHARED]])
 
 // CHECK-LABEL: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable{{.*}})
 // CHECK:  [[CC:%.+]] = alloca i32,

Modified: cfe/trunk/test/OpenMP/nvptx_parallel_for_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_parallel_for_codegen.cpp?rev=346507&r1=346506&r2=346507&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_parallel_for_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_parallel_for_codegen.cpp Fri Nov  9 08:18:04 2018
@@ -31,10 +31,10 @@ int bar(int n){
 }
 
 // CHECK: [[MEM_TY:%.+]] = type { [4 x i8] }
-// CHECK-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer
-// CHECK-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0)
+// CHECK-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer
 // CHECK-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
 // CHECK-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4
+// CHECK-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
 
 // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l12}}_worker()
 // CHECK: call void @llvm.nvvm.barrier0()
@@ -45,9 +45,9 @@ int bar(int n){
 // CHECK: call void @__omp_offloading_{{.*}}l12_worker()
 // CHECK: call void @__kmpc_kernel_init(
 // CHECK: call void @__kmpc_data_sharing_init_stack()
-// CHECK: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]],
+// CHECK: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED]],
 // CHECK: [[SIZE:%.+]] = load i{{64|32}}, i{{64|32}}* [[KERNEL_SIZE]],
-// CHECK: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} [[SIZE]], i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
+// CHECK: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i64 %7, i16 %6, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
 // CHECK: [[KERNEL_RD:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
 // CHECK: [[STACK:%.+]] = getelementptr inbounds i8, i8* [[KERNEL_RD]], i{{64|32}} 0
 // CHECK: call void @__kmpc_kernel_prepare_parallel(
@@ -55,7 +55,8 @@ int bar(int n){
 // CHECK: call void @llvm.nvvm.barrier0()
 // CHECK: call void @llvm.nvvm.barrier0()
 // CHECK: call void @__kmpc_end_sharing_variables()
-// CHECK: call void @__kmpc_restore_team_static_memory(i16 0)
+// CHECK: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED]],
+// CHECK: call void @__kmpc_restore_team_static_memory(i16 [[IS_SHARED]])
 // CHECK: call void @__kmpc_kernel_deinit(i16 1)
 
 // CHECK: define internal void @__omp_outlined__(

Modified: cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp?rev=346507&r1=346506&r2=346507&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp Fri Nov  9 08:18:04 2018
@@ -68,23 +68,22 @@ int bar(int n){
 }
 
 // CHECK-DAG: [[MEM_TY:%.+]] = type { [4 x i8] }
-// CHECK-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer
-// CHECK-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0)
+// CHECK-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer
 // CHECK-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
 // CHECK-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4
+// CHECK-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
 
 // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l32(
 // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
 // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 1)
-// CHECK: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]],
-// CHECK: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} 4, i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
+// CHECK: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} 4, i16 1, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
 // CHECK: [[TEAM_ALLOC:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
 // CHECK: [[BC:%.+]] = bitcast i8* [[TEAM_ALLOC]] to [[REC:%.+]]*
 // CHECK: getelementptr inbounds [[REC]], [[REC]]* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
 // CHECK: {{call|invoke}} void [[OUTL1:@.+]](
 // CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_restore_team_static_memory(i16 0)
+// CHECK: call void @__kmpc_restore_team_static_memory(i16 1)
 // CHECK: call void @__kmpc_spmd_kernel_deinit()
 // CHECK: ret void
 

Modified: cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp?rev=346507&r1=346506&r2=346507&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp Fri Nov  9 08:18:04 2018
@@ -63,23 +63,22 @@ int bar(int n){
 }
 
 // CHECK-DAG: [[MEM_TY:%.+]] = type { [4 x i8] }
-// CHECK-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer
-// CHECK-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0)
+// CHECK-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer
 // CHECK-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
 // CHECK-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4
+// CHECK-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
 
 // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l30(
 // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
 // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 1)
-// CHECK: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]],
-// CHECK: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} 4, i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
+// CHECK: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} 4, i16 1, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
 // CHECK: [[TEAM_ALLOC:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
 // CHECK: [[BC:%.+]] = bitcast i8* [[TEAM_ALLOC]] to [[REC:%.+]]*
 // CHECK: getelementptr inbounds [[REC]], [[REC]]* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
 // CHECK: {{call|invoke}} void [[OUTL1:@.+]](
 // CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_restore_team_static_memory(i16 0)
+// CHECK: call void @__kmpc_restore_team_static_memory(i16 1)
 // CHECK: call void @__kmpc_spmd_kernel_deinit()
 // CHECK: ret void
 

Modified: cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp?rev=346507&r1=346506&r2=346507&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp Fri Nov  9 08:18:04 2018
@@ -28,11 +28,12 @@ int main (int argc, char **argv) {
 }
 
 // CK1: [[MEM_TY:%.+]] = type { [{{4|8}} x i8] }
-// CK1-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer
-// CK1-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0)
+// CK1-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer
 // CK1-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
 // CK1-DAG: [[KERNEL_SIZE1:@.+]] = internal unnamed_addr constant i{{64|32}} 4
 // CK1-DAG: [[KERNEL_SIZE2:@.+]] = internal unnamed_addr constant i{{64|32}} {{8|4}}
+// CK1-DAG: [[KERNEL_SHARED1:@.+]] = internal unnamed_addr constant i16 1
+// CK1-DAG: [[KERNEL_SHARED2:@.+]] = internal unnamed_addr constant i16 1
 
 // only nvptx side: do not outline teams region and do not call fork_teams
 // CK1:  define {{.*}}void @{{[^,]+}}(i{{[0-9]+}} [[ARGC:%.+]])
@@ -43,9 +44,9 @@ int main (int argc, char **argv) {
 // CK1:  store {{.+}} 0, {{.+}},
 // CK1:  store i{{[0-9]+}} [[ARGC]], i{{[0-9]+}}* [[ARGCADDR]],
 // CK1-64:  [[CONV:%.+]] = bitcast i{{[0-9]+}}* [[ARGCADDR]] to i{{[0-9]+}}*
-// CK1: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]],
+// CK1: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED1]],
 // CK1: [[SIZE:%.+]] = load i{{64|32}}, i{{64|32}}* [[KERNEL_SIZE1]],
-// CK1: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} [[SIZE]], i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
+// CK1: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} [[SIZE]], i16 [[IS_SHARED]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
 // CK1: [[KERNEL_RD:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
 // CK1: [[GLOBALSTACK:%.+]] = getelementptr inbounds i8, i8* [[KERNEL_RD]], i{{64|32}} 0
 // CK1-64:  [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV]]
@@ -64,9 +65,9 @@ int main (int argc, char **argv) {
 // CK1: [[ARGCADDR_PTR:%.+]] = alloca i{{.+}}***,
 // CK1: [[ARGCADDR:%.+]] = alloca i{{.+}}**,
 // CK1: store i{{.+}}** [[ARGC]], i{{.+}}*** [[ARGCADDR]]
-// CK1: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]],
+// CK1: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED2]],
 // CK1: [[SIZE:%.+]] = load i{{64|32}}, i{{64|32}}* [[KERNEL_SIZE2]],
-// CK1: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} [[SIZE]], i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
+// CK1: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} [[SIZE]], i16 [[IS_SHARED]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
 // CK1: [[KERNEL_RD:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
 // CK1: [[GLOBALSTACK:%.+]] = getelementptr inbounds i8, i8* [[KERNEL_RD]], i{{64|32}} 0
 // CK1: [[ARG:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** [[ARGCADDR]]
@@ -114,11 +115,12 @@ int main (int argc, char **argv) {
 }
 
 // CK2: [[MEM_TY:%.+]] = type { [{{4|8}} x i8] }
-// CK2-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer
-// CK2-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0)
+// CK2-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer
 // CK2-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
 // CK2-DAG: [[KERNEL_SIZE1:@.+]] = internal unnamed_addr constant i{{64|32}} 4
 // CK2-DAG: [[KERNEL_SIZE2:@.+]] = internal unnamed_addr constant i{{64|32}} {{8|4}}
+// CK2-DAG: [[KERNEL_SHARED1:@.+]] = internal unnamed_addr constant i16 1
+// CK2-DAG: [[KERNEL_SHARED2:@.+]] = internal unnamed_addr constant i16 1
 
 // CK2: define {{.*}}void @{{[^,]+}}(i{{[0-9]+}} [[A_IN:%.+]], i{{[0-9]+}} [[B_IN:%.+]], i{{[0-9]+}} [[ARGC_IN:.+]])
 // CK2: {{.}} = alloca i{{[0-9]+}}*,
@@ -133,9 +135,9 @@ int main (int argc, char **argv) {
 // CK2-64: [[ACONV:%.+]] = bitcast i64* [[AADDR]] to i32*
 // CK2-64: [[BCONV:%.+]] = bitcast i64* [[BADDR]] to i32*
 // CK2-64: [[CONV:%.+]] = bitcast i64* [[ARGCADDR]] to i32*
-// CK2: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]],
+// CK2: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED1]],
 // CK2: [[SIZE:%.+]] = load i{{64|32}}, i{{64|32}}* [[KERNEL_SIZE1]],
-// CK2: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} [[SIZE]], i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
+// CK2: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} [[SIZE]], i16 [[IS_SHARED]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
 // CK2: [[KERNEL_RD:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
 // CK2: [[GLOBALSTACK:%.+]] = getelementptr inbounds i8, i8* [[KERNEL_RD]], i{{64|32}} 0
 // CK2-64:  [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV]]
@@ -158,9 +160,9 @@ int main (int argc, char **argv) {
 // CK2: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[AADDR]],
 // CK2: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[BADDR]],
 // CK2: store i{{[0-9]+}}** [[ARGC]], i{{[0-9]+}}*** [[ARGCADDR]],
-// CK2: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]],
+// CK2: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED2]],
 // CK2: [[SIZE:%.+]] = load i{{64|32}}, i{{64|32}}* [[KERNEL_SIZE2]],
-// CK2: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} [[SIZE]], i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
+// CK2: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} [[SIZE]], i16 [[IS_SHARED]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
 // CK2: [[KERNEL_RD:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
 // CK2: [[GLOBALSTACK:%.+]] = getelementptr inbounds i8, i8* [[KERNEL_RD]], i{{64|32}} 0
 // CK2: [[ARG:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** [[ARGCADDR]]




More information about the cfe-commits mailing list