r375017 - [OPENMP]Use different addresses for zeroed thread_id/bound_id.

Alexey Bataev via cfe-commits cfe-commits at lists.llvm.org
Wed Oct 16 09:59:01 PDT 2019


Author: abataev
Date: Wed Oct 16 09:59:01 2019
New Revision: 375017

URL: http://llvm.org/viewvc/llvm-project?rev=375017&view=rev
Log:
[OPENMP]Use different addresses for zeroed thread_id/bound_id.

When the parallel region is called directly in the sequential region,
the zeroed tid/bound id are used. But they must point to the different
memory locations as the parameters are marked as noalias.

Modified:
    cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
    cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
    cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp
    cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp
    cfe/trunk/test/OpenMP/parallel_if_codegen.cpp

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=375017&r1=375016&r2=375017&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Wed Oct 16 09:59:01 2019
@@ -3075,14 +3075,18 @@ void CGOpenMPRuntime::emitParallelCall(C
     CGF.EmitRuntimeCall(
         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
 
-    // OutlinedFn(&GTid, &zero, CapturedStruct);
+    // OutlinedFn(&zero, &zero_bound, CapturedStruct);
     Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
-                                                        /*Name*/ ".zero.addr");
+                                                        /*Name=*/".zero.addr");
     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+    Address ZeroAddrBound =
+        CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+                                         /*Name=*/".bound.zero.addr");
+    CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
     // ThreadId for serialized parallels is 0.
     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
-    OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+    OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
 

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=375017&r1=375016&r2=375017&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Wed Oct 16 09:59:01 2019
@@ -2459,9 +2459,8 @@ void CGOpenMPRuntimeNVPTX::emitTeamsCall
   if (!CGF.HaveInsertPoint())
     return;
 
-  Address ZeroAddr = CGF.CreateMemTemp(
-      CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
-      /*Name*/ ".zero.addr");
+  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+                                                      /*Name=*/".zero.addr");
   CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
   llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
   OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
@@ -2490,16 +2489,19 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDPa
   // Force inline this outlined function at its call site.
   Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
 
-  Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth(
-                                           /*DestWidth=*/32, /*Signed=*/1),
-                                       ".zero.addr");
+  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+                                                      /*Name=*/".zero.addr");
   CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
   // ThreadId for serialized parallels is 0.
   Address ThreadIDAddr = ZeroAddr;
-  auto &&CodeGen = [this, Fn, CapturedVars, Loc, ZeroAddr, &ThreadIDAddr](
+  auto &&CodeGen = [this, Fn, CapturedVars, Loc, &ThreadIDAddr](
                        CodeGenFunction &CGF, PrePostActionTy &Action) {
     Action.Enter(CGF);
 
+    Address ZeroAddr =
+        CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+                                         /*Name=*/".bound.zero.addr");
+    CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
@@ -2656,17 +2658,19 @@ void CGOpenMPRuntimeNVPTX::emitSPMDParal
   //
   llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
 
-  Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth(
-                                           /*DestWidth=*/32, /*Signed=*/1),
-                                       ".zero.addr");
+  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+                                                      /*Name=*/".zero.addr");
   CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
   // ThreadId for serialized parallels is 0.
   Address ThreadIDAddr = ZeroAddr;
-  auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, ZeroAddr,
-                    &ThreadIDAddr](CodeGenFunction &CGF,
-                                   PrePostActionTy &Action) {
+  auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, &ThreadIDAddr](
+                       CodeGenFunction &CGF, PrePostActionTy &Action) {
     Action.Enter(CGF);
 
+    Address ZeroAddr =
+        CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+                                         /*Name=*/".bound.zero.addr");
+    CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
@@ -4567,9 +4571,8 @@ llvm::Function *CGOpenMPRuntimeNVPTX::cr
   const auto *RD = CS.getCapturedRecordDecl();
   auto CurField = RD->field_begin();
 
-  Address ZeroAddr = CGF.CreateMemTemp(
-      CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
-      /*Name*/ ".zero.addr");
+  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+                                                      /*Name=*/".zero.addr");
   CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
   // Get the array of arguments.
   SmallVector<llvm::Value *, 8> Args;

Modified: cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp?rev=375017&r1=375016&r2=375017&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp Wed Oct 16 09:59:01 2019
@@ -577,6 +577,8 @@ int baz(int f, double &a) {
   // CHECK: alloca i32,
   // CHECK: [[LOCAL_F_PTR:%.+]] = alloca i32,
   // CHECK: [[ZERO_ADDR:%.+]] = alloca i32,
+  // CHECK: [[BND_ZERO_ADDR:%.+]] = alloca i32,
+  // CHECK: store i32 0, i32* [[BND_ZERO_ADDR]]
   // CHECK: store i32 0, i32* [[ZERO_ADDR]]
   // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[UNKNOWN]]
   // CHECK: [[PAR_LEVEL:%.+]] = call i16 @__kmpc_parallel_level(%struct.ident_t* [[UNKNOWN]], i32 [[GTID]])
@@ -609,7 +611,7 @@ int baz(int f, double &a) {
   // CHECK: br i1
 
   // CHECK: call void @__kmpc_serialized_parallel(%struct.ident_t* [[UNKNOWN]], i32 [[GTID]])
-  // CHECK: call void [[OUTLINED:@.+]](i32* [[ZERO_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}})
+  // CHECK: call void [[OUTLINED:@.+]](i32* [[ZERO_ADDR]], i32* [[BND_ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}})
   // CHECK: call void @__kmpc_end_serialized_parallel(%struct.ident_t* [[UNKNOWN]], i32 [[GTID]])
   // CHECK: br label
 

Modified: cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp?rev=375017&r1=375016&r2=375017&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp Wed Oct 16 09:59:01 2019
@@ -712,6 +712,7 @@ int bar(int n){
 
   // CHECK-NOT: call void @__kmpc_get_team_static_memory
   // CHECK: store i32 0,
+  // CHECK: store i32 0,
   // CHECK: store i32 0, i32* [[A_ADDR:%.+]], align
   // CHECK: store i16 -32768, i16* [[B_ADDR:%.+]], align
   // CHECK: call void [[OUTLINED:@.+]](i32* {{.+}}, i32* {{.+}}, i32* [[A_ADDR]], i16* [[B_ADDR]])

Modified: cfe/trunk/test/OpenMP/parallel_if_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/parallel_if_codegen.cpp?rev=375017&r1=375016&r2=375017&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/parallel_if_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/parallel_if_codegen.cpp Wed Oct 16 09:59:01 2019
@@ -29,12 +29,13 @@ void gtid_test() {
 }
 
 // CHECK: define internal {{.*}}void [[GTID_TEST_REGION1]](i{{.+}}* noalias [[GTID_PARAM:%.+]], i32* noalias
+// CHECK: store i32 0, i32* [[BND_ZERO_ADDR:%.+]],
 // CHECK: store i32 0, i32* [[ZERO_ADDR:%.+]],
 // CHECK: store i{{[0-9]+}}* [[GTID_PARAM]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]],
 // CHECK: [[GTID_ADDR:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]]
 // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_ADDR]]
 // CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]])
-// CHECK: call void [[GTID_TEST_REGION2:@.+]](i{{[0-9]+}}* [[ZERO_ADDR]]
+// CHECK: call void [[GTID_TEST_REGION2:@.+]](i{{[0-9]+}}* [[ZERO_ADDR]], i{{[0-9]+}}* [[BND_ZERO_ADDR]]
 // CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]])
 // CHECK: ret void
 
@@ -55,14 +56,16 @@ int tmain(T Arg) {
 
 // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
 int main() {
+// CHECK: store i32 0, i32* [[BND_ZERO_ADDR2:%.+]],
 // CHECK: store i32 0, i32* [[ZERO_ADDR2:%.+]],
+// CHECK: store i32 0, i32* [[BND_ZERO_ADDR1:%.+]],
 // CHECK: store i32 0, i32* [[ZERO_ADDR1:%.+]],
 // CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num(
 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 0, void {{.+}}* [[CAP_FN4:@.+]] to void
 #pragma omp parallel if (true)
   fn4();
 // CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
-// CHECK: call void [[CAP_FN5:@.+]](i32* [[ZERO_ADDR1]], i32* [[ZERO_ADDR1]])
+// CHECK: call void [[CAP_FN5:@.+]](i32* [[ZERO_ADDR1]], i32* [[BND_ZERO_ADDR1]])
 // CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
 #pragma omp parallel if (false)
   fn5();
@@ -73,7 +76,7 @@ int main() {
 // CHECK: br label %[[OMP_END:.+]]
 // CHECK: [[OMP_ELSE]]
 // CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
-// CHECK: call void [[CAP_FN6]](i32* [[ZERO_ADDR2]], i32* [[ZERO_ADDR2]])
+// CHECK: call void [[CAP_FN6]](i32* [[ZERO_ADDR2]], i32* [[BND_ZERO_ADDR2]])
 // CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
 // CHECK: br label %[[OMP_END]]
 // CHECK: [[OMP_END]]
@@ -96,12 +99,14 @@ int main() {
 // CHECK: ret void
 
 // CHECK-LABEL: define {{.+}} @{{.+}}tmain
+// CHECK: store i32 0, i32* [[BND_ZERO_ADDR2:%.+]],
 // CHECK: store i32 0, i32* [[ZERO_ADDR2:%.+]],
+// CHECK: store i32 0, i32* [[BND_ZERO_ADDR1:%.+]],
 // CHECK: store i32 0, i32* [[ZERO_ADDR1:%.+]],
 // CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num(
 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 0, void {{.+}}* [[CAP_FN1:@.+]] to void
 // CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
-// CHECK: call void [[CAP_FN2:@.+]](i32* [[ZERO_ADDR1]], i32* [[ZERO_ADDR1]])
+// CHECK: call void [[CAP_FN2:@.+]](i32* [[ZERO_ADDR1]], i32* [[BND_ZERO_ADDR1]])
 // CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
 // CHECK: br i1 %{{.+}}, label %[[OMP_THEN:.+]], label %[[OMP_ELSE:.+]]
 // CHECK: [[OMP_THEN]]
@@ -109,7 +114,7 @@ int main() {
 // CHECK: br label %[[OMP_END:.+]]
 // CHECK: [[OMP_ELSE]]
 // CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
-// CHECK: call void [[CAP_FN3]](i32* [[ZERO_ADDR2]], i32* [[ZERO_ADDR2]])
+// CHECK: call void [[CAP_FN3]](i32* [[ZERO_ADDR2]], i32* [[BND_ZERO_ADDR2]])
 // CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
 // CHECK: br label %[[OMP_END]]
 // CHECK: [[OMP_END]]




More information about the cfe-commits mailing list