r337957 - [OPENMP] ThreadId in serialized parallel regions is 0.

Alexey Bataev via cfe-commits cfe-commits at lists.llvm.org
Wed Jul 25 13:03:01 PDT 2018


Author: abataev
Date: Wed Jul 25 13:03:01 2018
New Revision: 337957

URL: http://llvm.org/viewvc/llvm-project?rev=337957&view=rev
Log:
[OPENMP] ThreadId in serialized parallel regions is 0.

The first argument for the parallel outlined functions, called as
serialized parallel regions, should be a pointer to the global thread id
that always is 0.

Modified:
    cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
    cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
    cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp
    cfe/trunk/test/OpenMP/parallel_if_codegen.cpp

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=337957&r1=337956&r2=337957&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Wed Jul 25 13:03:01 2018
@@ -2839,12 +2839,12 @@ void CGOpenMPRuntime::emitParallelCall(C
         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
 
     // OutlinedFn(&GTid, &zero, CapturedStruct);
-    Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
     Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
                                                         /*Name*/ ".zero.addr");
     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
-    OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
+    // ThreadId for serialized parallels is 0.
+    OutlinedFnArgs.push_back(ZeroAddr.getPointer());
     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=337957&r1=337956&r2=337957&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Wed Jul 25 13:03:01 2018
@@ -1784,8 +1784,9 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDPa
                                            /*DestWidth=*/32, /*Signed=*/1),
                                        ".zero.addr");
   CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
-  Address ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
-  auto &&CodeGen = [this, Fn, CapturedVars, Loc, ZeroAddr, ThreadIDAddr](
+  // ThreadId for serialized parallels is 0.
+  Address ThreadIDAddr = ZeroAddr;
+  auto &&CodeGen = [this, Fn, CapturedVars, Loc, ZeroAddr, &ThreadIDAddr](
                        CodeGenFunction &CGF, PrePostActionTy &Action) {
     Action.Enter(CGF);
 
@@ -1883,8 +1884,9 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDPa
     Work.emplace_back(WFn);
   };
 
-  auto &&LNParallelGen = [this, Loc, &SeqGen, &L0ParallelGen, &CodeGen](
-                             CodeGenFunction &CGF, PrePostActionTy &Action) {
+  auto &&LNParallelGen = [this, Loc, &SeqGen, &L0ParallelGen, &CodeGen,
+                          &ThreadIDAddr](CodeGenFunction &CGF,
+                                         PrePostActionTy &Action) {
     RegionCodeGenTy RCG(CodeGen);
     if (IsInParallelRegion) {
       SeqGen(CGF, Action);
@@ -1936,6 +1938,8 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDPa
       // There is no need to emit line number for unconditional branch.
       (void)ApplyDebugLocation::CreateEmpty(CGF);
       CGF.EmitBlock(ElseBlock);
+      // In the worker need to use the real thread id.
+      ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
       RCG(CGF);
       // There is no need to emit line number for unconditional branch.
       (void)ApplyDebugLocation::CreateEmpty(CGF);
@@ -1965,10 +1969,11 @@ void CGOpenMPRuntimeNVPTX::emitSPMDParal
                                            /*DestWidth=*/32, /*Signed=*/1),
                                        ".zero.addr");
   CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
-  Address ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
+  // ThreadId for serialized parallels is 0.
+  Address ThreadIDAddr = ZeroAddr;
   auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, ZeroAddr,
-                    ThreadIDAddr](CodeGenFunction &CGF,
-                                  PrePostActionTy &Action) {
+                    &ThreadIDAddr](CodeGenFunction &CGF,
+                                   PrePostActionTy &Action) {
     Action.Enter(CGF);
 
     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
@@ -1995,6 +2000,8 @@ void CGOpenMPRuntimeNVPTX::emitSPMDParal
   };
 
   if (IsInTargetMasterThreadRegion) {
+    // In the worker need to use the real thread id.
+    ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
     RegionCodeGenTy RCG(CodeGen);
     RCG(CGF);
   } else {

Modified: cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp?rev=337957&r1=337956&r2=337957&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp Wed Jul 25 13:03:01 2018
@@ -562,7 +562,6 @@ int baz(int f, double &a) {
   // CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to %struct._globalized_locals_ty*
   // CHECK: [[F_PTR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[REC_ADDR]], i32 0, i32 0
   // CHECK: store i32 %{{.+}}, i32* [[F_PTR]],
-  // CHECK: store i32 [[GTID]], i32* [[GTID_ADDR]],
 
   // CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode()
   // CHECK: icmp ne i8 [[RES]], 0
@@ -573,7 +572,7 @@ int baz(int f, double &a) {
   // CHECK: br i1
 
   // CHECK: call void @__kmpc_serialized_parallel(%struct.ident_t* @{{.+}}, i32 [[GTID]])
-  // CHECK: call void [[OUTLINED:@.+]](i32* [[GTID_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}})
+  // CHECK: call void [[OUTLINED:@.+]](i32* [[ZERO_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}})
   // CHECK: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @{{.+}}, i32 [[GTID]])
   // CHECK: br label
 
@@ -591,6 +590,7 @@ int baz(int f, double &a) {
   // CHECK: call void @__kmpc_end_sharing_variables()
   // CHECK: br label
 
+  // CHECK: store i32 [[GTID]], i32* [[GTID_ADDR]],
   // CHECK: call void [[OUTLINED]](i32* [[GTID_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}})
   // CHECK: br label
 

Modified: cfe/trunk/test/OpenMP/parallel_if_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/parallel_if_codegen.cpp?rev=337957&r1=337956&r2=337957&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/parallel_if_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/parallel_if_codegen.cpp Wed Jul 25 13:03:01 2018
@@ -29,12 +29,12 @@ void gtid_test() {
 }
 
 // CHECK: define internal {{.*}}void [[GTID_TEST_REGION1]](i{{.+}}* noalias [[GTID_PARAM:%.+]], i32* noalias
+// CHECK: store i32 0, i32* [[ZERO_ADDR:%.+]],
 // CHECK: store i{{[0-9]+}}* [[GTID_PARAM]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]],
 // CHECK: [[GTID_ADDR:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]]
 // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_ADDR]]
 // CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]])
-// CHECK: [[GTID_ADDR:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]]
-// CHECK: call void [[GTID_TEST_REGION2:@.+]](i{{[0-9]+}}* [[GTID_ADDR]]
+// CHECK: call void [[GTID_TEST_REGION2:@.+]](i{{[0-9]+}}* [[ZERO_ADDR]]
 // CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]])
 // CHECK: ret void
 
@@ -56,12 +56,13 @@ int tmain(T Arg) {
 // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
 int main() {
 // CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num(
+// CHECK: store i32 0, i32* [[ZERO_ADDR2:%.+]],
+// CHECK: store i32 0, i32* [[ZERO_ADDR1:%.+]],
 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 0, void {{.+}}* [[CAP_FN4:@.+]] to void
 #pragma omp parallel if (true)
   fn4();
 // CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
-// CHECK: store i32 [[GTID]], i32* [[GTID_ADDR:%.+]],
-// CHECK: call void [[CAP_FN5:@.+]](i32* [[GTID_ADDR]],
+// CHECK: call void [[CAP_FN5:@.+]](i32* [[ZERO_ADDR1]], i32* [[ZERO_ADDR1]])
 // CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
 #pragma omp parallel if (false)
   fn5();
@@ -72,8 +73,7 @@ int main() {
 // CHECK: br label %[[OMP_END:.+]]
 // CHECK: [[OMP_ELSE]]
 // CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
-// CHECK: store i32 [[GTID]], i32* [[GTID_ADDR:%.+]],
-// CHECK: call void [[CAP_FN6]](i32* [[GTID_ADDR]],
+// CHECK: call void [[CAP_FN6]](i32* [[ZERO_ADDR2]], i32* [[ZERO_ADDR2]])
 // CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
 // CHECK: br label %[[OMP_END]]
 // CHECK: [[OMP_END]]
@@ -97,10 +97,11 @@ int main() {
 
 // CHECK-LABEL: define {{.+}} @{{.+}}tmain
 // CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num(
+// CHECK: store i32 0, i32* [[ZERO_ADDR2:%.+]],
+// CHECK: store i32 0, i32* [[ZERO_ADDR1:%.+]],
 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 0, void {{.+}}* [[CAP_FN1:@.+]] to void
 // CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
-// CHECK: store i32 [[GTID]], i32* [[GTID_ADDR:%.+]],
-// CHECK: call void [[CAP_FN2:@.+]](i32* [[GTID_ADDR]],
+// CHECK: call void [[CAP_FN2:@.+]](i32* [[ZERO_ADDR1]], i32* [[ZERO_ADDR1]])
 // CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
 // CHECK: br i1 %{{.+}}, label %[[OMP_THEN:.+]], label %[[OMP_ELSE:.+]]
 // CHECK: [[OMP_THEN]]
@@ -108,8 +109,7 @@ int main() {
 // CHECK: br label %[[OMP_END:.+]]
 // CHECK: [[OMP_ELSE]]
 // CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
-// CHECK: store i32 [[GTID]], i32* [[GTID_ADDR:%.+]],
-// CHECK: call void [[CAP_FN3]](i32* [[GTID_ADDR]],
+// CHECK: call void [[CAP_FN3]](i32* [[ZERO_ADDR2]], i32* [[ZERO_ADDR2]])
 // CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
 // CHECK: br label %[[OMP_END]]
 // CHECK: [[OMP_END]]




More information about the cfe-commits mailing list