r336567 - [OPENMP, NVPTX] Do not globalize local variables in parallel regions.

Alexey Bataev via cfe-commits cfe-commits at lists.llvm.org
Mon Jul 9 10:43:58 PDT 2018


Author: abataev
Date: Mon Jul  9 10:43:58 2018
New Revision: 336567

URL: http://llvm.org/viewvc/llvm-project?rev=336567&view=rev
Log:
[OPENMP, NVPTX] Do not globalize local variables in parallel regions.

In generic data-sharing mode we are allowed to not globalize local
variables that escape their declaration context iff they are declared
inside of the parallel region. We can do this because L2 parallel
regions are executed sequentially and, thus, we do not need to put
shared local variables in the global memory.

Modified:
    cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
    cfe/trunk/test/OpenMP/declare_target_codegen_globalization.cpp
    cfe/trunk/test/OpenMP/nvptx_data_sharing.cpp
    cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
    cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=336567&r1=336566&r2=336567&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Mon Jul  9 10:43:58 2018
@@ -1554,29 +1554,22 @@ void CGOpenMPRuntimeNVPTX::emitNumTeamsC
 llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
-  SourceLocation Loc = D.getLocStart();
-
   // Emit target region as a standalone region.
   class NVPTXPrePostActionTy : public PrePostActionTy {
-    SourceLocation &Loc;
     bool &IsInParallelRegion;
     bool PrevIsInParallelRegion;
 
   public:
-    NVPTXPrePostActionTy(SourceLocation &Loc, bool &IsInParallelRegion)
-        : Loc(Loc), IsInParallelRegion(IsInParallelRegion) {}
+    NVPTXPrePostActionTy(bool &IsInParallelRegion)
+        : IsInParallelRegion(IsInParallelRegion) {}
     void Enter(CodeGenFunction &CGF) override {
-      static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
-          .emitGenericVarsProlog(CGF, Loc);
       PrevIsInParallelRegion = IsInParallelRegion;
       IsInParallelRegion = true;
     }
     void Exit(CodeGenFunction &CGF) override {
       IsInParallelRegion = PrevIsInParallelRegion;
-      static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
-          .emitGenericVarsEpilog(CGF);
     }
-  } Action(Loc, IsInParallelRegion);
+  } Action(IsInParallelRegion);
   CodeGen.setAction(Action);
   bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion;
   IsInTargetMasterThreadRegion = false;

Modified: cfe/trunk/test/OpenMP/declare_target_codegen_globalization.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/declare_target_codegen_globalization.cpp?rev=336567&r1=336566&r2=336567&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/declare_target_codegen_globalization.cpp (original)
+++ cfe/trunk/test/OpenMP/declare_target_codegen_globalization.cpp Mon Jul  9 10:43:58 2018
@@ -24,12 +24,11 @@ int maini1() {
 
 // parallel region
 // CHECK: define {{.*}}void @{{.*}}(i32* noalias {{.*}}, i32* noalias {{.*}}, i32* dereferenceable{{.*}})
-// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0)
-// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_ST:%struct[.].*]]*
-// CHECK: [[B_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack(
+// CHECK: [[B_ADDR:%.+]] = alloca i32,
 // CHECK: call {{.*}}[[FOO:@.*foo.*]](i32* dereferenceable{{.*}} [[B_ADDR]])
 // CHECK: call {{.*}}[[BAR:@.*bar.*]]()
-// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]])
+// CHECK-NOT: call void @__kmpc_data_sharing_pop_stack(
 // CHECK: ret void
 
 // CHECK: define {{.*}}[[FOO]](i32* dereferenceable{{.*}})

Modified: cfe/trunk/test/OpenMP/nvptx_data_sharing.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_data_sharing.cpp?rev=336567&r1=336566&r2=336567&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_data_sharing.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_data_sharing.cpp Mon Jul  9 10:43:58 2018
@@ -83,11 +83,10 @@ void test_ds(){
 /// outlined function for the second parallel region ///
 
 // CK1: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable{{.+}}, i32* dereferenceable{{.+}})
-// CK1: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0)
-// CK1: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_TY:%.+]]*
-// CK1: [[C_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_TY]], [[GLOBAL_TY]]* [[GLOBALS]], i32 0, i32 0
+// CK1-NOT: call i8* @__kmpc_data_sharing_push_stack(
+// CK1: [[C_ADDR:%.+]] = alloca i32,
 // CK1: store i32* [[C_ADDR]], i32** %
-// CK1: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]])
+// CK1i-NOT: call void @__kmpc_data_sharing_pop_stack(
 
 /// ========= In the data sharing wrapper function ========= ///
 

Modified: cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp?rev=336567&r1=336566&r2=336567&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp Mon Jul  9 10:43:58 2018
@@ -44,8 +44,8 @@ int main(int argc, char **argv) {
 // CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[PTR]])
 
 // CHECK: define internal void [[PARALLEL]](
-// CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 4, i16 0)
+// CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack(
 
-// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[PTR]])
+// CHECK-NOT: call void @__kmpc_data_sharing_pop_stack(
 
 #endif

Modified: cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp?rev=336567&r1=336566&r2=336567&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp Mon Jul  9 10:43:58 2018
@@ -34,20 +34,33 @@ int main(int argc, char **argv) {
 
 // CHECK: call void @__kmpc_spmd_kernel_deinit()
 
-// CHECK: define internal void [[PARALLEL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i{{64|32}} %{{.+}}, i{{64|32}} %{{.+}}, i{{64|32}} %{{.+}}, i32* dereferenceable{{.*}})
-// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 8, i16 0)
-// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_TY:%.+]]*
-// CHECK: [[I:%.+]] = getelementptr inbounds [[GLOBAL_TY]], [[GLOBAL_TY]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[ARGC_VAL:%.+]] = load i32, i32* %
-// CHECK: [[ARGC:%.+]] = getelementptr inbounds [[GLOBAL_TY]], [[GLOBAL_TY]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-// CHECK: store i32 [[ARGC_VAL]], i32* [[ARGC]],
+// CHECK: define internal void [[PARALLEL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i{{64|32}} %{{.+}}, i{{64|32}} %{{.+}}, i{{64|32}} [[ARGC:%.+]], i32* dereferenceable{{.*}})
+// CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack(
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[ARGC_ADDR:%.+]] = alloca i{{32|64}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[I:%.+]] = alloca i32,
+// CHECK-32: store i32 [[ARGC]], i32* [[ARGC_ADDR]],
+// CHECK-64: store i{{64|32}} [[ARGC]], i{{64|32}}* [[ARGC_ADDR]],
+// CHECK-64: [[ARGC:%.+]] = bitcast i64* [[ARGC_ADDR]] to i32*
 
 // CHECK: call void @__kmpc_for_static_init_4(
 // CHECK: call i32 [[FOO:@.+foo.+]](i32* [[I]])
 // CHECK: call i32 [[FOO]](i32* %{{.+}})
-// CHECK: call i32 [[FOO]](i32* [[ARGC]])
+// CHECK-32: call i32 [[FOO]](i32* [[ARGC_ADDR]])
+// CHECK-64: call i32 [[FOO]](i32* [[ARGC]])
 // CHECK: call void @__kmpc_for_static_fini(
 
-// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]])
+// CHECK-NOT: call void @__kmpc_data_sharing_pop_stack(
 
 #endif




More information about the cfe-commits mailing list