[clang] [llvm] [openmp] [OpenMP] OpenMP 6.0 taskgraph support (WIP) (PR #188765)
via cfe-commits
cfe-commits at lists.llvm.org
Thu Mar 26 08:09:47 PDT 2026
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff origin/main HEAD --extensions h,cpp -- clang/test/OpenMP/taskgraph_ast_print.cpp clang/test/OpenMP/taskgraph_codegen.cpp openmp/runtime/test/taskgraph/taskgraph_deps_1.cpp openmp/runtime/test/taskgraph/taskgraph_deps_10.cpp openmp/runtime/test/taskgraph/taskgraph_deps_11.cpp openmp/runtime/test/taskgraph/taskgraph_deps_12.cpp openmp/runtime/test/taskgraph/taskgraph_deps_13.cpp openmp/runtime/test/taskgraph/taskgraph_deps_14.cpp openmp/runtime/test/taskgraph/taskgraph_deps_15.cpp openmp/runtime/test/taskgraph/taskgraph_deps_16.cpp openmp/runtime/test/taskgraph/taskgraph_deps_17.cpp openmp/runtime/test/taskgraph/taskgraph_deps_18.cpp openmp/runtime/test/taskgraph/taskgraph_deps_19.cpp openmp/runtime/test/taskgraph/taskgraph_deps_2.cpp openmp/runtime/test/taskgraph/taskgraph_deps_20.cpp openmp/runtime/test/taskgraph/taskgraph_deps_21.cpp openmp/runtime/test/taskgraph/taskgraph_deps_22.cpp openmp/runtime/test/taskgraph/taskgraph_deps_23.cpp openmp/runtime/test/taskgraph/taskgraph_deps_24.cpp openmp/runtime/test/taskgraph/taskgraph_deps_25.cpp openmp/runtime/test/taskgraph/taskgraph_deps_26.cpp openmp/runtime/test/taskgraph/taskgraph_deps_27.cpp openmp/runtime/test/taskgraph/taskgraph_deps_3.cpp openmp/runtime/test/taskgraph/taskgraph_deps_4.cpp openmp/runtime/test/taskgraph/taskgraph_deps_5.cpp openmp/runtime/test/taskgraph/taskgraph_deps_6.cpp openmp/runtime/test/taskgraph/taskgraph_deps_7.cpp openmp/runtime/test/taskgraph/taskgraph_deps_8.cpp openmp/runtime/test/taskgraph/taskgraph_deps_9.cpp openmp/runtime/test/tasking/omp_record_replay_random_id.cpp openmp/runtime/test/tasking/omp_record_replay_reset.cpp openmp/runtime/test/tasking/omp_taskgraph.cpp openmp/runtime/test/tasking/omp_taskgraph_deps.cpp openmp/runtime/test/tasking/omp_taskgraph_multiTDGs.cpp openmp/runtime/test/tasking/omp_taskgraph_taskloop.cpp clang/include/clang-c/Index.h clang/include/clang/AST/OpenMPClause.h clang/include/clang/AST/RecursiveASTVisitor.h clang/include/clang/AST/StmtOpenMP.h clang/include/clang/Sema/SemaOpenMP.h clang/include/clang/Serialization/ASTBitCodes.h clang/lib/AST/OpenMPClause.cpp clang/lib/AST/StmtOpenMP.cpp clang/lib/AST/StmtPrinter.cpp clang/lib/AST/StmtProfile.cpp clang/lib/Basic/OpenMPKinds.cpp clang/lib/CodeGen/CGOpenMPRuntime.cpp clang/lib/CodeGen/CGOpenMPRuntime.h clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp clang/lib/CodeGen/CGStmt.cpp clang/lib/CodeGen/CGStmtOpenMP.cpp clang/lib/CodeGen/CodeGenFunction.h clang/lib/Parse/ParseOpenMP.cpp clang/lib/Sema/SemaExceptionSpec.cpp clang/lib/Sema/SemaOpenMP.cpp clang/lib/Sema/TreeTransform.h clang/lib/Serialization/ASTReader.cpp clang/lib/Serialization/ASTReaderStmt.cpp clang/lib/Serialization/ASTWriter.cpp clang/lib/Serialization/ASTWriterStmt.cpp clang/lib/StaticAnalyzer/Core/ExprEngine.cpp clang/tools/libclang/CIndex.cpp clang/tools/libclang/CXCursor.cpp openmp/runtime/src/kmp.h openmp/runtime/src/kmp_debug.h openmp/runtime/src/kmp_global.cpp openmp/runtime/src/kmp_settings.cpp openmp/runtime/src/kmp_taskdeps.cpp openmp/runtime/src/kmp_taskdeps.h openmp/runtime/src/kmp_tasking.cpp --diff_from_common_commit
``````````
:warning:
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing `origin/main` to the base branch/commit you want to compare against.
:warning:
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index c327617c2..872ab0e18 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -3635,7 +3635,8 @@ bool RecursiveASTVisitor<Derived>::VisitOMPNowaitClause(OMPNowaitClause *C) {
}
template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPReplayableClause(OMPReplayableClause *C) {
+bool RecursiveASTVisitor<Derived>::VisitOMPReplayableClause(
+ OMPReplayableClause *C) {
TRY_TO(TraverseStmt(C->getCondition()));
return true;
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 85342658e..00353ea12 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -2251,8 +2251,7 @@ void CGOpenMPRuntime::emitTaskgraphCall(CodeGenFunction &CGF,
return;
// The nogroup clause doesn't support an argument yet. FIXME.
- const OMPNogroupClause *NoGroupClause =
- D.getSingleClause<OMPNogroupClause>();
+ const OMPNogroupClause *NoGroupClause = D.getSingleClause<OMPNogroupClause>();
llvm::Value *NoGroup;
if (NoGroupClause) {
NoGroup = CGF.Builder.getInt32(1);
@@ -2267,7 +2266,7 @@ void CGOpenMPRuntime::emitTaskgraphCall(CodeGenFunction &CGF,
const Expr *Cond = GraphResetClause->getCondition();
llvm::Value *CondVal = CGF.EvaluateExprAsBool(Cond);
GraphReset =
- CGF.Builder.CreateIntCast(CondVal, CGF.IntTy, /*isSigned=*/true);
+ CGF.Builder.CreateIntCast(CondVal, CGF.IntTy, /*isSigned=*/true);
} else {
GraphReset = CGF.Builder.getInt32(0);
}
@@ -2278,7 +2277,7 @@ void CGOpenMPRuntime::emitTaskgraphCall(CodeGenFunction &CGF,
const auto *E = GraphIdClause->getCondition();
auto *GraphIdVal = CGF.EmitScalarExpr(E);
GraphId =
- CGF.Builder.CreateIntCast(GraphIdVal, CGM.Int32Ty, /*isSigned=*/false);
+ CGF.Builder.CreateIntCast(GraphIdVal, CGM.Int32Ty, /*isSigned=*/false);
}
CodeGenFunction OutlinedCGF(CGM, /*suppressNewContext=*/true);
@@ -2299,12 +2298,10 @@ void CGOpenMPRuntime::emitTaskgraphCall(CodeGenFunction &CGF,
// Create an internal-linkage global variable to hold the taskgraph handle.
std::string GraphHandleName = getName({"omp", "taskgraph", "handle"});
- auto *GraphHandle =
- new llvm::GlobalVariable(CGM.getModule(), CGM.VoidPtrTy,
- /*IsConstant=*/false,
- llvm::GlobalValue::InternalLinkage,
- llvm::Constant::getNullValue(CGM.VoidPtrTy),
- GraphHandleName);
+ auto *GraphHandle = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.VoidPtrTy,
+ /*IsConstant=*/false, llvm::GlobalValue::InternalLinkage,
+ llvm::Constant::getNullValue(CGM.VoidPtrTy), GraphHandleName);
std::array<llvm::Value *, 8> Args{
emitUpdateLocation(CGF, Loc),
@@ -2319,19 +2316,19 @@ void CGOpenMPRuntime::emitTaskgraphCall(CodeGenFunction &CGF,
auto &&ThenGen = [&CGF, this, &Args](CodeGenFunction &, PrePostActionTy &) {
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), OMPRTL___kmpc_taskgraph),
- Args);
+ CGM.getModule(), OMPRTL___kmpc_taskgraph),
+ Args);
};
- auto &&ElseGen = [&CGF, this, &FnT, &CapStruct, &Loc, &OutlinedCGF]
- (CodeGenFunction &, PrePostActionTy &) {
- llvm::Value *CapturedArgsPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CapStruct.getPointer(OutlinedCGF), CGM.VoidPtrTy);
+ auto &&ElseGen = [&CGF, this, &FnT, &CapStruct, &Loc,
+ &OutlinedCGF](CodeGenFunction &, PrePostActionTy &) {
+ llvm::Value *CapturedArgsPtr =
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CapStruct.getPointer(OutlinedCGF), CGM.VoidPtrTy);
auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
- CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc,
- FnT,
- CapturedArgsPtr);
+ CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, FnT,
+ CapturedArgsPtr);
};
RegionCodeGenTy RCG(CodeGen);
RCG(CGF);
@@ -3878,13 +3875,11 @@ static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
}
}
-CGOpenMPRuntime::TaskResultTy
-CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
- const OMPExecutableDirective &D,
- llvm::Function *TaskFunction, QualType SharedsTy,
- Address Shareds, const OMPTaskDataTy &Data,
- bool ForTaskgraph,
- std::array<llvm::Value *, 3> &TaskAllocArgs) {
+CGOpenMPRuntime::TaskResultTy CGOpenMPRuntime::emitTaskInit(
+ CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
+ llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
+ const OMPTaskDataTy &Data, bool ForTaskgraph,
+ std::array<llvm::Value *, 3> &TaskAllocArgs) {
ASTContext &C = CGM.getContext();
llvm::SmallVector<PrivateDataTy, 4> Privates;
// Aggregate privates and sort them by the alignment.
@@ -4784,35 +4779,31 @@ void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
}
-void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
- const OMPExecutableDirective &D,
- llvm::Function *TaskFunction,
- QualType SharedsTy, Address Shareds,
- const Expr *IfCond,
- const Expr *ReplayableCond,
- const OMPTaskDataTy &Data) {
+void CGOpenMPRuntime::emitTaskCall(
+ CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
+ llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
+ const Expr *IfCond, const Expr *ReplayableCond, const OMPTaskDataTy &Data) {
if (!CGF.HaveInsertPoint())
return;
- auto &&TaskgraphTaskCodeGen =
- [this, &Loc, &D, TaskFunction, &SharedsTy, &Shareds, &Data]
- (CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&TaskgraphTaskCodeGen = [this, &Loc, &D, TaskFunction, &SharedsTy,
+ &Shareds, &Data](CodeGenFunction &CGF,
+ PrePostActionTy &) {
llvm::Value *ThreadId = getThreadID(CGF, Loc);
llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
std::array<llvm::Value *, 9> TGTaskArgs;
std::array<llvm::Value *, 3> TaskAllocArgs;
- TaskResultTy Result =
- emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data, true,
- TaskAllocArgs);
+ TaskResultTy Result = emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy,
+ Shareds, Data, true, TaskAllocArgs);
Address DependenciesArray = Address::invalid();
llvm::Value *NumOfElements;
std::tie(NumOfElements, DependenciesArray) =
emitDependClause(CGF, Data.Dependences, Loc);
- //llvm::dbgs() << "SharedsTy:\n";
+ // llvm::dbgs() << "SharedsTy:\n";
TGTaskArgs[0] = UpLoc;
TGTaskArgs[1] = ThreadId;
TGTaskArgs[2] = Result.NewTask;
- //TGTaskArgs[2] = TaskgraphRegion->getTaskgraphValue();
+ // TGTaskArgs[2] = TaskgraphRegion->getTaskgraphValue();
TGTaskArgs[3] = TaskAllocArgs[0]; // TaskFlags
TGTaskArgs[4] = TaskAllocArgs[1]; // KmpTaskTWithPrivatesTySize
TGTaskArgs[5] = Shareds.emitRawPointer(CGF);
@@ -4832,16 +4823,17 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
TGTaskArgs[8] = DependenciesArray.emitRawPointer(CGF);
}
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), OMPRTL___kmpc_taskgraph_task),
- TGTaskArgs);
+ CGM.getModule(), OMPRTL___kmpc_taskgraph_task),
+ TGTaskArgs);
};
- auto &&NonTaskgraphTaskCodeGen =
- [this, &Loc, &D, TaskFunction, &SharedsTy, &Shareds, IfCond, &Data]
- (CodeGenFunction &CGF, PrePostActionTy &) {
- std::array<llvm::Value*, 3> DummyArray;
- TaskResultTy Result =
- emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data, false, DummyArray);
+ auto &&NonTaskgraphTaskCodeGen = [this, &Loc, &D, TaskFunction, &SharedsTy,
+ &Shareds, IfCond,
+ &Data](CodeGenFunction &CGF,
+ PrePostActionTy &) {
+ std::array<llvm::Value *, 3> DummyArray;
+ TaskResultTy Result = emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy,
+ Shareds, Data, false, DummyArray);
llvm::Value *NewTask = Result.NewTask;
llvm::Function *TaskEntry = Result.TaskEntry;
llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
@@ -4853,15 +4845,15 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
std::tie(NumOfElements, DependenciesArray) =
emitDependClause(CGF, Data.Dependences, Loc);
- // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
- // libcall.
- // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
- // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
- // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
- // list is not empty
+ // NOTE: routine and part_id fields are initialized by
+ // __kmpc_omp_task_alloc() libcall. Build kmp_int32
+ // __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, kmp_task_t
+ // *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
+ // ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence list is
+ // not empty
llvm::Value *ThreadID = getThreadID(CGF, Loc);
llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
- llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
+ llvm::Value *TaskArgs[] = {UpLoc, ThreadID, NewTask};
llvm::Value *DepTaskArgs[7];
if (!Data.Dependences.empty()) {
DepTaskArgs[0] = UpLoc;
@@ -4873,7 +4865,8 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
}
auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
- &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
+ &DepTaskArgs](CodeGenFunction &CGF,
+ PrePostActionTy &) {
if (!Data.Tied) {
auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
@@ -4969,13 +4962,10 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
}
}
-void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
- const OMPLoopDirective &D,
- llvm::Function *TaskFunction,
- QualType SharedsTy, Address Shareds,
- const Expr *IfCond,
- const Expr *ReplayableCond,
- const OMPTaskDataTy &Data) {
+void CGOpenMPRuntime::emitTaskLoopCall(
+ CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
+ llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
+ const Expr *IfCond, const Expr *ReplayableCond, const OMPTaskDataTy &Data) {
if (!CGF.HaveInsertPoint())
return;
@@ -4989,16 +4979,16 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
- auto &&TaskgraphTaskloopCodeGen =
- [this, &Loc, &D, TaskFunction, &SharedsTy, &Shareds, IfVal, &Data]
- (CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&TaskgraphTaskloopCodeGen = [this, &Loc, &D, TaskFunction, &SharedsTy,
+ &Shareds, IfVal,
+ &Data](CodeGenFunction &CGF,
+ PrePostActionTy &) {
llvm::Value *ThreadId = getThreadID(CGF, Loc);
llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
std::array<llvm::Value *, 16> TGTaskLoopArgs;
std::array<llvm::Value *, 3> TaskAllocArgs;
- TaskResultTy Result =
- emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data, true,
- TaskAllocArgs);
+ TaskResultTy Result = emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy,
+ Shareds, Data, true, TaskAllocArgs);
// This is all copy/pasted from below. Refactor!
LValue LBLVal = CGF.EmitLValueForField(
@@ -5006,22 +4996,25 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
const auto *LBVar =
cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
- CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
- /*IsInitializer=*/true);
+ CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(),
+ LBLVal.getQuals(),
+ /*IsInitializer=*/true);
LValue UBLVal = CGF.EmitLValueForField(
Result.TDBase,
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
const auto *UBVar =
cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
- CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
- /*IsInitializer=*/true);
+ CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(),
+ UBLVal.getQuals(),
+ /*IsInitializer=*/true);
LValue StLVal = CGF.EmitLValueForField(
Result.TDBase,
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
const auto *StVar =
cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
- CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
- /*IsInitializer=*/true);
+ CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(),
+ StLVal.getQuals(),
+ /*IsInitializer=*/true);
// Store reductions address.
LValue RedLVal = CGF.EmitLValueForField(
Result.TDBase,
@@ -5030,7 +5023,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
} else {
CGF.EmitNullInitialization(RedLVal.getAddress(),
- CGF.getContext().VoidPtrTy);
+ CGF.getContext().VoidPtrTy);
}
TGTaskLoopArgs[0] = UpLoc;
@@ -5044,34 +5037,40 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
TGTaskLoopArgs[8] = LBLVal.getPointer(CGF);
TGTaskLoopArgs[9] = UBLVal.getPointer(CGF);
TGTaskLoopArgs[10] = CGF.EmitLoadOfScalar(StLVal, Loc);
- TGTaskLoopArgs[11] = llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0);
- TGTaskLoopArgs[12] = llvm::ConstantInt::getSigned(CGF.IntTy, Data.Schedule.getPointer()
- ? Data.Schedule.getInt() ? NumTasks : Grainsize
- : NoSchedule);
- TGTaskLoopArgs[13] = Data.Schedule.getPointer()
- ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, /*isSigned=*/false)
- : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0);
- TGTaskLoopArgs[14] = llvm::ConstantInt::getSigned(CGF.IntTy, Data.HasModifier ? 1 : 0);
+ TGTaskLoopArgs[11] =
+ llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0);
+ TGTaskLoopArgs[12] = llvm::ConstantInt::getSigned(
+ CGF.IntTy, Data.Schedule.getPointer()
+ ? Data.Schedule.getInt() ? NumTasks : Grainsize
+ : NoSchedule);
+ TGTaskLoopArgs[13] =
+ Data.Schedule.getPointer()
+ ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
+ /*isSigned=*/false)
+ : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0);
+ TGTaskLoopArgs[14] =
+ llvm::ConstantInt::getSigned(CGF.IntTy, Data.HasModifier ? 1 : 0);
TGTaskLoopArgs[15] = Result.TaskDupFn
- ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- Result.TaskDupFn, CGF.VoidPtrTy)
- : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+ ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Result.TaskDupFn, CGF.VoidPtrTy)
+ : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), OMPRTL___kmpc_taskgraph_taskloop),
- TGTaskLoopArgs);
+ CGM.getModule(), OMPRTL___kmpc_taskgraph_taskloop),
+ TGTaskLoopArgs);
};
- auto &&NonTaskgraphTaskloopCodeGen =
- [this, &Loc, &D, TaskFunction, &SharedsTy, &Shareds, IfVal, &Data]
- (CodeGenFunction &CGF, PrePostActionTy &) {
- std::array<llvm::Value*, 3> DummyArray;
- TaskResultTy Result =
- emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data, false, DummyArray);
- // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
- // libcall.
- // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
- // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
- // sched, kmp_uint64 grainsize, void *task_dup);
+ auto &&NonTaskgraphTaskloopCodeGen = [this, &Loc, &D, TaskFunction,
+ &SharedsTy, &Shareds, IfVal,
+ &Data](CodeGenFunction &CGF,
+ PrePostActionTy &) {
+ std::array<llvm::Value *, 3> DummyArray;
+ TaskResultTy Result = emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy,
+ Shareds, Data, false, DummyArray);
+ // NOTE: routine and part_id fields are initialized by
+ // __kmpc_omp_task_alloc() libcall. Call to void __kmpc_taskloop(ident_t
+ // *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64
+ // *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void
+ // *task_dup);
llvm::Value *ThreadID = getThreadID(CGF, Loc);
llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
@@ -5080,22 +5079,25 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
const auto *LBVar =
cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
- CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
- /*IsInitializer=*/true);
+ CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(),
+ LBLVal.getQuals(),
+ /*IsInitializer=*/true);
LValue UBLVal = CGF.EmitLValueForField(
Result.TDBase,
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
const auto *UBVar =
cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
- CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
- /*IsInitializer=*/true);
+ CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(),
+ UBLVal.getQuals(),
+ /*IsInitializer=*/true);
LValue StLVal = CGF.EmitLValueForField(
Result.TDBase,
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
const auto *StVar =
cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
- CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
- /*IsInitializer=*/true);
+ CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(),
+ StLVal.getQuals(),
+ /*IsInitializer=*/true);
// Store reductions address.
LValue RedLVal = CGF.EmitLValueForField(
Result.TDBase,
@@ -5104,7 +5106,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
} else {
CGF.EmitNullInitialization(RedLVal.getAddress(),
- CGF.getContext().VoidPtrTy);
+ CGF.getContext().VoidPtrTy);
}
llvm::SmallVector<llvm::Value *, 12> TaskArgs{
UpLoc,
@@ -5118,8 +5120,8 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
llvm::ConstantInt::getSigned(
CGF.IntTy, Data.Schedule.getPointer()
- ? Data.Schedule.getInt() ? NumTasks : Grainsize
- : NoSchedule),
+ ? Data.Schedule.getInt() ? NumTasks : Grainsize
+ : NoSchedule),
Data.Schedule.getPointer()
? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
/*isSigned=*/false)
@@ -5128,13 +5130,13 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));
TaskArgs.push_back(Result.TaskDupFn
- ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- Result.TaskDupFn, CGF.VoidPtrTy)
- : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
+ ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Result.TaskDupFn, CGF.VoidPtrTy)
+ : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), Data.HasModifier
- ? OMPRTL___kmpc_taskloop_5
- : OMPRTL___kmpc_taskloop),
+ ? OMPRTL___kmpc_taskloop_5
+ : OMPRTL___kmpc_taskloop),
TaskArgs);
};
@@ -6290,8 +6292,8 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
Args);
else
return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), OMPRTL___kmpc_taskred_init),
- Args);
+ CGM.getModule(), OMPRTL___kmpc_taskred_init),
+ Args);
}
void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
@@ -6368,57 +6370,59 @@ void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
emitDependClause(CGF, Data.Dependences, Loc);
auto &&TaskgraphTaskwaitCodeGen =
- [this, UpLoc, ThreadID, NumOfElements, &DependenciesArray, &Data]
- (CodeGenFunction &CGF, PrePostActionTy &) {
- llvm::Value *TGTaskWaitArgs[5];
- TGTaskWaitArgs[0] = UpLoc;
- TGTaskWaitArgs[1] = ThreadID;
- TGTaskWaitArgs[2] = NumOfElements;
- if (Data.Dependences.empty()) {
- // This should be a proper error
- fprintf(stderr, "*** Taskwait inside taskgraph with no depend clause is not task-generating\n");
- exit(1);
- }
- TGTaskWaitArgs[3] = DependenciesArray.emitRawPointer(CGF);
- TGTaskWaitArgs[4] =
- llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
- CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), OMPRTL___kmpc_taskgraph_taskwait),
- TGTaskWaitArgs);
- };
+ [this, UpLoc, ThreadID, NumOfElements, &DependenciesArray,
+ &Data](CodeGenFunction &CGF, PrePostActionTy &) {
+ llvm::Value *TGTaskWaitArgs[5];
+ TGTaskWaitArgs[0] = UpLoc;
+ TGTaskWaitArgs[1] = ThreadID;
+ TGTaskWaitArgs[2] = NumOfElements;
+ if (Data.Dependences.empty()) {
+ // This should be a proper error
+ fprintf(stderr, "*** Taskwait inside taskgraph with no depend "
+ "clause is not task-generating\n");
+ exit(1);
+ }
+ TGTaskWaitArgs[3] = DependenciesArray.emitRawPointer(CGF);
+ TGTaskWaitArgs[4] =
+ llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
+ CGF.EmitRuntimeCall(
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_taskgraph_taskwait),
+ TGTaskWaitArgs);
+ };
auto &&NonTaskgraphTaskwaitCodeGen =
- [this, UpLoc, ThreadID, NumOfElements, &DependenciesArray, &M, &Data]
- (CodeGenFunction &CGF, PrePostActionTy &) {
- if (!Data.Dependences.empty()) {
- llvm::Value *DepWaitTaskArgs[7];
- DepWaitTaskArgs[0] = UpLoc;
- DepWaitTaskArgs[1] = ThreadID;
- DepWaitTaskArgs[2] = NumOfElements;
- DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
- DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
- DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
- DepWaitTaskArgs[6] =
- llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
-
- CodeGenFunction::RunCleanupsScope LocalScope(CGF);
-
- // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
- // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
- // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
- // kmp_int32 has_no_wait); if dependence info is specified.
- CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
- M, OMPRTL___kmpc_omp_taskwait_deps_51),
- DepWaitTaskArgs);
- } else {
- // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
- // global_tid);
- llvm::Value *Args[] = {UpLoc, ThreadID};
- // Ignore return result until untied tasks are supported.
- CGF.EmitRuntimeCall(
- OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
- Args);
- }
- };
+ [this, UpLoc, ThreadID, NumOfElements, &DependenciesArray, &M,
+ &Data](CodeGenFunction &CGF, PrePostActionTy &) {
+ if (!Data.Dependences.empty()) {
+ llvm::Value *DepWaitTaskArgs[7];
+ DepWaitTaskArgs[0] = UpLoc;
+ DepWaitTaskArgs[1] = ThreadID;
+ DepWaitTaskArgs[2] = NumOfElements;
+ DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
+ DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
+ DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+ DepWaitTaskArgs[6] =
+ llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
+
+ CodeGenFunction::RunCleanupsScope LocalScope(CGF);
+
+ // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
+ // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
+ // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
+ // kmp_int32 has_no_wait); if dependence info is specified.
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___kmpc_omp_taskwait_deps_51),
+ DepWaitTaskArgs);
+ } else {
+ // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
+ // global_tid);
+ llvm::Value *Args[] = {UpLoc, ThreadID};
+ // Ignore return result until untied tasks are supported.
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___kmpc_omp_taskwait),
+ Args);
+ }
+ };
if (CGF.getOMPWithinTaskgraph()) {
// Lexically within taskgraph, always replayable.
@@ -13587,13 +13591,10 @@ void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
llvm_unreachable("Not supported in SIMD-only mode");
}
-void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
- const OMPExecutableDirective &D,
- llvm::Function *TaskFunction,
- QualType SharedsTy, Address Shareds,
- const Expr *IfCond,
- const Expr *ReplayableCond,
- const OMPTaskDataTy &Data) {
+void CGOpenMPSIMDRuntime::emitTaskCall(
+ CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
+ llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
+ const Expr *IfCond, const Expr *ReplayableCond, const OMPTaskDataTy &Data) {
llvm_unreachable("Not supported in SIMD-only mode");
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 7ac06547a..abcc86fcb 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -585,7 +585,7 @@ protected:
llvm::Function *TaskFunction, QualType SharedsTy,
Address Shareds, const OMPTaskDataTy &Data,
bool ForTaskgraph,
- std::array<llvm::Value*, 3> &TaskAllocArgs);
+ std::array<llvm::Value *, 3> &TaskAllocArgs);
/// Emit update for lastprivate conditional data.
void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal,
@@ -1388,7 +1388,8 @@ public:
/// Emit code for 'taskgraph' directive.
virtual void emitTaskgraphCall(CodeGenFunction &CGF, SourceLocation Loc,
- const OMPExecutableDirective &D, const Expr *IfCond);
+ const OMPExecutableDirective &D,
+ const Expr *IfCond);
/// Emit code for 'cancellation point' construct.
/// \param CancelRegion Region kind for which the cancellation point must be
@@ -2097,8 +2098,8 @@ public:
void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
const OMPLoopDirective &D, llvm::Function *TaskFunction,
QualType SharedsTy, Address Shareds, const Expr *IfCond,
- const Expr *ReplayableCond, const OMPTaskDataTy &Data)
- override;
+ const Expr *ReplayableCond,
+ const OMPTaskDataTy &Data) override;
/// Emit a code for reduction clause. Next code should be emitted for
/// reduction:
@@ -2218,16 +2219,15 @@ public:
/// Emit code for 'taskwait' directive.
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
- const Expr *ReplayableCond, const OMPTaskDataTy &Data)
- override;
+ const Expr *ReplayableCond,
+ const OMPTaskDataTy &Data) override;
/// Emit code for 'taskgraph' directive.
/// \param IfCond Expression evaluated in if clause associated with the target
/// \param D Directive to emit.
void emitTaskgraphCall(CodeGenFunction &CGF, SourceLocation Loc,
const OMPExecutableDirective &D,
- const Expr *IfCond
- ) override;
+ const Expr *IfCond) override;
/// Emit code for 'cancellation point' construct.
/// \param CancelRegion Region kind for which the cancellation point must be
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 30728e5ad..98eed360f 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -5707,9 +5707,9 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
IntegerLiteral IfCond(getContext(), TrueOrFalse,
getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
SourceLocation());
- IntegerLiteral ReplayableCond(getContext(), llvm::APInt(32, 1),
- getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
- SourceLocation());
+ IntegerLiteral ReplayableCond(
+ getContext(), llvm::APInt(32, 1),
+ getContext().getIntTypeForBitwidth(32, /*Signed=*/0), SourceLocation());
CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
SharedsTy, CapturedStruct, &IfCond,
&ReplayableCond, Data);
@@ -5825,20 +5825,18 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
if (auto *RC = S.getSingleClause<OMPReplayableClause>()) {
ReplayableCond = RC->getCondition();
if (!ReplayableCond) {
- ReplayableCond =
- IntegerLiteral::Create(
- getContext(), llvm::APInt(32, 1),
- getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
- SourceLocation());
+ ReplayableCond = IntegerLiteral::Create(
+ getContext(), llvm::APInt(32, 1),
+ getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
+ SourceLocation());
}
}
auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitStmt(CS->getCapturedStmt());
};
- auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
- IfCond, ReplayableCond](CodeGenFunction &CGF,
- llvm::Function *OutlinedFn,
- const OMPTaskDataTy &Data) {
+ auto &&TaskGen = [&S, SharedsTy, CapturedStruct, IfCond, ReplayableCond](
+ CodeGenFunction &CGF, llvm::Function *OutlinedFn,
+ const OMPTaskDataTy &Data) {
CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
SharedsTy, CapturedStruct, IfCond,
ReplayableCond, Data);
@@ -5876,11 +5874,10 @@ void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
if (auto *RC = S.getSingleClause<OMPReplayableClause>()) {
ReplayableCond = RC->getCondition();
if (!ReplayableCond) {
- ReplayableCond =
- IntegerLiteral::Create(
- getContext(), llvm::APInt(32, 1),
- getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
- SourceLocation());
+ ReplayableCond = IntegerLiteral::Create(
+ getContext(), llvm::APInt(32, 1),
+ getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
+ SourceLocation());
}
}
CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(),
@@ -8243,11 +8240,10 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
if (auto *RC = S.getSingleClause<OMPReplayableClause>()) {
ReplayableCond = RC->getCondition();
if (!ReplayableCond) {
- ReplayableCond =
- IntegerLiteral::Create(
- getContext(), llvm::APInt(32, 1),
- getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
- SourceLocation());
+ ReplayableCond = IntegerLiteral::Create(
+ getContext(), llvm::APInt(32, 1),
+ getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
+ SourceLocation());
}
}
@@ -8370,18 +8366,16 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
(*LIP)->getType(), S.getBeginLoc()));
});
};
- auto &&TaskGen =
- [&S, SharedsTy, CapturedStruct, IfCond, ReplayableCond]
- (CodeGenFunction &CGF, llvm::Function *OutlinedFn,
- const OMPTaskDataTy &Data) {
- auto &&CodeGen =
- [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, ReplayableCond,
- &Data](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&TaskGen = [&S, SharedsTy, CapturedStruct, IfCond, ReplayableCond](
+ CodeGenFunction &CGF, llvm::Function *OutlinedFn,
+ const OMPTaskDataTy &Data) {
+ auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
+ ReplayableCond,
+ &Data](CodeGenFunction &CGF, PrePostActionTy &) {
OMPLoopScope PreInitScope(CGF, S);
- CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S,
- OutlinedFn, SharedsTy,
- CapturedStruct, IfCond,
- ReplayableCond, Data);
+ CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(
+ CGF, S.getBeginLoc(), S, OutlinedFn, SharedsTy, CapturedStruct,
+ IfCond, ReplayableCond, Data);
};
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
CodeGen);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 0ac35ebe3..b199a1355 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -819,13 +819,12 @@ public:
class OMPWithinTaskgraphRAII {
CodeGenFunction &CGF;
+
public:
OMPWithinTaskgraphRAII(CodeGenFunction &CGF_) : CGF(CGF_) {
CGF.setOMPWithinTaskgraph(true);
}
- ~OMPWithinTaskgraphRAII() {
- CGF.setOMPWithinTaskgraph(false);
- }
+ ~OMPWithinTaskgraphRAII() { CGF.setOMPWithinTaskgraph(false); }
};
template <class T>
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index da1555f02..f98f29fe0 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -3343,8 +3343,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
}
if ((CKind == OMPC_nowait || CKind == OMPC_replayable) &&
- PP.LookAhead(/*N=*/0).is(tok::l_paren) &&
- getLangOpts().OpenMP >= 60)
+ PP.LookAhead(/*N=*/0).is(tok::l_paren) && getLangOpts().OpenMP >= 60)
Clause = ParseOpenMPSingleExprClause(CKind, WrongDirective);
else
Clause = ParseOpenMPClause(CKind, WrongDirective);
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 19dc278e1..81f77c080 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -10843,9 +10843,8 @@ TreeTransform<Derived>::TransformOMPReplayableClause(OMPReplayableClause *C) {
if (Cond.isInvalid())
return nullptr;
}
- return getDerived().RebuildOMPReplayableClause(Cond.get(), C->getBeginLoc(),
- C->getLParenLoc(),
- C->getEndLoc());
+ return getDerived().RebuildOMPReplayableClause(
+ Cond.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 7c0a7ad58..0643df1b2 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -3127,7 +3127,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
kmp_uint8 th_task_state; // alternating 0/1 for task team identification
kmp_uint32 th_reap_state; // Non-zero indicates thread is not
// tasking, thus safe to reap
- //kmp_taskgraph_record_t *th_taskgraph_recording;
+ // kmp_taskgraph_record_t *th_taskgraph_recording;
/* More stuff for keeping track of active/sleeping threads (this part is
written by the worker thread) */
@@ -4486,37 +4486,25 @@ KMP_EXPORT void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
uintptr_t hint);
#if OMP_TASKGRAPH_EXPERIMENTAL
KMP_EXPORT void __kmpc_taskgraph(ident_t *loc_ref, kmp_int32 gtid,
- std::atomic<void*> *tdg_handle,
+ std::atomic<void *> *tdg_handle,
kmp_uint32 graph_id, kmp_int32 graph_reset,
kmp_int32 nogroup, void (*entry)(void *),
void *args);
-KMP_EXPORT kmp_uint32 __kmpc_taskgraph_task(ident_t *loc_ref, kmp_int32 gtid,
- kmp_task_t *new_task,
- kmp_int32 flags,
- size_t sizeof_kmp_task_t,
- void* shareds,
- size_t sizeof_shareds,
- kmp_int32 ndeps,
- kmp_depend_info_t *dep_list);
-KMP_EXPORT kmp_uint32 __kmpc_taskgraph_taskloop(ident_t *loc_ref,
- kmp_int32 gtid,
- kmp_task_t *new_task,
- kmp_int32 flags,
- size_t sizeof_kmp_task_t,
- void *shareds,
- size_t sizeof_shareds,
- kmp_int32 if_val,
- kmp_uint64 *lb, kmp_uint64 *ub,
- kmp_int64 st, kmp_int32 nogroup,
- kmp_int32 sched,
- kmp_uint64 grainsize,
- kmp_int32 modifier,
- void *task_dup);
+KMP_EXPORT kmp_uint32 __kmpc_taskgraph_task(
+ ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 flags,
+ size_t sizeof_kmp_task_t, void *shareds, size_t sizeof_shareds,
+ kmp_int32 ndeps, kmp_depend_info_t *dep_list);
+KMP_EXPORT kmp_uint32 __kmpc_taskgraph_taskloop(
+ ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 flags,
+ size_t sizeof_kmp_task_t, void *shareds, size_t sizeof_shareds,
+ kmp_int32 if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
+ kmp_int32 nogroup, kmp_int32 sched, kmp_uint64 grainsize,
+ kmp_int32 modifier, void *task_dup);
KMP_EXPORT void __kmpc_taskgraph_taskwait(ident_t *loc_ref, kmp_int32 gtid,
kmp_int32 ndeps,
kmp_depend_info_t *dep_list,
kmp_int32 has_no_wait);
-KMP_EXPORT void* __kmpc_taskgraph_taskred_init(kmp_int32 gtid, kmp_int32 num,
+KMP_EXPORT void *__kmpc_taskgraph_taskred_init(kmp_int32 gtid, kmp_int32 num,
void *data);
#endif
/* Interface to fast scalable reduce methods routines */
diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index 1f28f747c..0113e002f 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -299,7 +299,7 @@ __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread,
if (!dep->dn.successors || dep->dn.successors->node != node) {
__kmp_track_dependence(gtid, dep, node, task);
dep->dn.successors =
- __kmp_add_node<true>(thread, dep->dn.successors, node);
+ __kmp_add_node<true>(thread, dep->dn.successors, node);
KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
"%p\n",
gtid, KMP_TASK_TO_TASKDATA(dep->dn.task),
@@ -328,7 +328,8 @@ static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid,
if (sink->dn.task) {
if (!sink->dn.successors || sink->dn.successors->node != source) {
__kmp_track_dependence(gtid, sink, source, task);
- sink->dn.successors = __kmp_add_node<true>(thread, sink->dn.successors, source);
+ sink->dn.successors =
+ __kmp_add_node<true>(thread, sink->dn.successors, source);
KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
"%p\n",
gtid, KMP_TASK_TO_TASKDATA(sink->dn.task),
@@ -341,21 +342,23 @@ static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid,
return npredecessors;
}
-kmp_taskgraph_region_dep_t *__kmp_region_deplist_add(kmp_info_t *thread,
- kmp_taskgraph_region_dep_t **recycled_deps, kmp_taskgraph_region_t *region,
- kmp_taskgraph_region_dep_t *list) {
+kmp_taskgraph_region_dep_t *__kmp_region_deplist_add(
+ kmp_info_t *thread, kmp_taskgraph_region_dep_t **recycled_deps,
+ kmp_taskgraph_region_t *region, kmp_taskgraph_region_dep_t *list) {
kmp_taskgraph_region_dep_t *head;
if (*recycled_deps) {
head = *recycled_deps;
*recycled_deps = (*recycled_deps)->next;
} else
- head = (kmp_taskgraph_region_dep_t *)__kmp_fast_allocate(thread, sizeof(kmp_taskgraph_region_dep_t));
+ head = (kmp_taskgraph_region_dep_t *)__kmp_fast_allocate(
+ thread, sizeof(kmp_taskgraph_region_dep_t));
head->region = region;
head->next = list;
return head;
}
-kmp_taskgraph_region_t *__kmp_region_worklist_reverse(kmp_taskgraph_region_t *list) {
+kmp_taskgraph_region_t *
+__kmp_region_worklist_reverse(kmp_taskgraph_region_t *list) {
kmp_taskgraph_region_t *last = nullptr;
while (list) {
kmp_taskgraph_region_t *next = list->next;
@@ -366,7 +369,8 @@ kmp_taskgraph_region_t *__kmp_region_worklist_reverse(kmp_taskgraph_region_t *li
return last;
}
-static kmp_depnode_t *__kmp_find_in_depnode_list(kmp_depnode_t *node, kmp_depnode_list_t *list) {
+static kmp_depnode_t *__kmp_find_in_depnode_list(kmp_depnode_t *node,
+ kmp_depnode_list_t *list) {
for (; list; list = list->next)
if (list->node == node)
return list->node;
@@ -381,38 +385,36 @@ typedef struct kmp_bitset {
kmp_size_t num_chunks;
} kmp_bitset_t;
-static kmp_bitset_t *
-__kmp_bitset_alloc(kmp_info_t *thread, kmp_size_t bitsize) {
+static kmp_bitset_t *__kmp_bitset_alloc(kmp_info_t *thread,
+ kmp_size_t bitsize) {
kmp_size_t bytesize = (bitsize + 7) / 8;
- kmp_size_t num_chunks = (bytesize + sizeof(kmp_uint64) - 1) / sizeof(kmp_uint64);
- kmp_bitset_t *bitset = (kmp_bitset_t *) __kmp_fast_allocate(thread, sizeof(kmp_bitset_t) + sizeof(kmp_uint64) * num_chunks);
- bitset->bits = (kmp_uint64*) &bitset[1];
+ kmp_size_t num_chunks =
+ (bytesize + sizeof(kmp_uint64) - 1) / sizeof(kmp_uint64);
+ kmp_bitset_t *bitset = (kmp_bitset_t *)__kmp_fast_allocate(
+ thread, sizeof(kmp_bitset_t) + sizeof(kmp_uint64) * num_chunks);
+ bitset->bits = (kmp_uint64 *)&bitset[1];
memset(bitset->bits, 0, sizeof(kmp_uint64) * num_chunks);
bitset->bitsize = bitsize;
bitset->num_chunks = num_chunks;
return bitset;
}
-static void
-__kmp_bitset_free(kmp_info_t *thread, kmp_bitset_t *bitset) {
+static void __kmp_bitset_free(kmp_info_t *thread, kmp_bitset_t *bitset) {
__kmp_fast_free(thread, bitset);
}
-static void
-__kmp_bitset_set(kmp_bitset_t *bitset, kmp_size_t bitnum) {
+static void __kmp_bitset_set(kmp_bitset_t *bitset, kmp_size_t bitnum) {
kmp_size_t chunk = bitnum / (8 * sizeof(kmp_uint64));
if (bitnum < bitset->bitsize)
bitset->bits[chunk] |= (kmp_uint64)1 << (bitnum & 63);
}
-static void
-__kmp_bitset_clearall(kmp_bitset_t *bitset) {
+static void __kmp_bitset_clearall(kmp_bitset_t *bitset) {
if (bitset)
memset(bitset->bits, 0, sizeof(kmp_int64) * bitset->num_chunks);
}
-static void
-__kmp_bitset_setall(kmp_bitset_t *bitset) {
+static void __kmp_bitset_setall(kmp_bitset_t *bitset) {
for (kmp_int32 chunk = 0; chunk < bitset->num_chunks - 1; chunk++)
bitset->bits[chunk] = ~(kmp_uint64)0;
kmp_int32 last_chunk_numbits = bitset->bitsize & 63;
@@ -422,8 +424,7 @@ __kmp_bitset_setall(kmp_bitset_t *bitset) {
}
}
-static void
-__kmp_bitset_copy(kmp_bitset_t *dst, const kmp_bitset_t *src) {
+static void __kmp_bitset_copy(kmp_bitset_t *dst, const kmp_bitset_t *src) {
assert(dst->num_chunks == src->num_chunks);
assert(dst->bitsize == src->bitsize);
memcpy(dst->bits, src->bits, sizeof(kmp_uint64) * dst->num_chunks);
@@ -431,8 +432,8 @@ __kmp_bitset_copy(kmp_bitset_t *dst, const kmp_bitset_t *src) {
/// Return TRUE if \c b is a subset of \c a.
-static bool
-__kmp_bitset_subset_p(const kmp_bitset_t *a, const kmp_bitset_t *b) {
+static bool __kmp_bitset_subset_p(const kmp_bitset_t *a,
+ const kmp_bitset_t *b) {
if (!b)
return true;
kmp_size_t chunk_max = std::max(a->num_chunks, b->num_chunks);
@@ -445,8 +446,8 @@ __kmp_bitset_subset_p(const kmp_bitset_t *a, const kmp_bitset_t *b) {
return true;
}
-static void
-__kmp_bitset_and(kmp_bitset_t *a, kmp_bitset_t *b, kmp_bitset_t *c) {
+static void __kmp_bitset_and(kmp_bitset_t *a, kmp_bitset_t *b,
+ kmp_bitset_t *c) {
kmp_size_t chunk_max = std::max(b->num_chunks, c->num_chunks);
for (kmp_size_t chunk = 0; chunk < chunk_max; chunk++) {
kmp_uint64 b_bits = chunk < b->num_chunks ? b->bits[chunk] : 0;
@@ -455,8 +456,8 @@ __kmp_bitset_and(kmp_bitset_t *a, kmp_bitset_t *b, kmp_bitset_t *c) {
}
}
-static void
-__kmp_bitset_and_not(kmp_bitset_t *a, kmp_bitset_t *b, kmp_bitset_t *c) {
+static void __kmp_bitset_and_not(kmp_bitset_t *a, kmp_bitset_t *b,
+ kmp_bitset_t *c) {
if (!c)
__kmp_bitset_copy(a, b);
else {
@@ -469,8 +470,7 @@ __kmp_bitset_and_not(kmp_bitset_t *a, kmp_bitset_t *b, kmp_bitset_t *c) {
}
}
-static void
-__kmp_bitset_or(kmp_bitset_t *a, kmp_bitset_t *b, kmp_bitset_t *c) {
+static void __kmp_bitset_or(kmp_bitset_t *a, kmp_bitset_t *b, kmp_bitset_t *c) {
if (!b && !c)
__kmp_bitset_clearall(a);
else if (!b)
@@ -487,8 +487,7 @@ __kmp_bitset_or(kmp_bitset_t *a, kmp_bitset_t *b, kmp_bitset_t *c) {
}
}
-static bool
-__kmp_bitset_empty_p(kmp_bitset_t *bitset) {
+static bool __kmp_bitset_empty_p(kmp_bitset_t *bitset) {
if (!bitset)
return true;
for (kmp_size_t chunk = 0; chunk < bitset->num_chunks; chunk++) {
@@ -501,8 +500,7 @@ __kmp_bitset_empty_p(kmp_bitset_t *bitset) {
/// Test two bitsets for equality. Note that any unused bits at the end of the
/// last chunk are kept as zero.
-static bool
-__kmp_bitset_equal(kmp_bitset_t *a, kmp_bitset_t *b) {
+static bool __kmp_bitset_equal(kmp_bitset_t *a, kmp_bitset_t *b) {
if (!b)
return __kmp_bitset_empty_p(a);
kmp_size_t chunk_max = std::max(a->num_chunks, b->num_chunks);
@@ -515,8 +513,7 @@ __kmp_bitset_equal(kmp_bitset_t *a, kmp_bitset_t *b) {
return true;
}
-static bool
-__kmp_bitset_intersect_p(kmp_bitset_t *a, kmp_bitset_t *b) {
+static bool __kmp_bitset_intersect_p(kmp_bitset_t *a, kmp_bitset_t *b) {
if (!a || !b)
return false;
kmp_size_t chunk_max = std::max(a->num_chunks, b->num_chunks);
@@ -529,8 +526,7 @@ __kmp_bitset_intersect_p(kmp_bitset_t *a, kmp_bitset_t *b) {
return false;
}
-static kmp_int32
-__kmp_bitset_popcount(kmp_bitset_t *bitset) {
+static kmp_int32 __kmp_bitset_popcount(kmp_bitset_t *bitset) {
if (!bitset)
return 0;
kmp_int32 accum = 0;
@@ -546,9 +542,10 @@ static kmp_int32 __kmp_taskgraph_add_dep(kmp_info_t *thread,
kmp_int32 npredecessors = 0;
for (; plist; plist = plist->next) {
kmp_depnode_t *dep = plist->node;
- if (!dep->dn.successors || !__kmp_find_in_depnode_list(node, dep->dn.successors)) {
+ if (!dep->dn.successors ||
+ !__kmp_find_in_depnode_list(node, dep->dn.successors)) {
dep->dn.successors =
- __kmp_add_node<false>(thread, dep->dn.successors, node);
+ __kmp_add_node<false>(thread, dep->dn.successors, node);
npredecessors++;
}
}
@@ -563,21 +560,21 @@ static kmp_int32 __kmp_taskgraph_add_dep(kmp_info_t *thread,
kmp_int32 npredecessors = 0;
if (!sink->dn.successors || sink->dn.successors->node != source) {
if (!__kmp_find_in_depnode_list(source, sink->dn.successors)) {
- sink->dn.successors = __kmp_add_node<false>(thread, sink->dn.successors,
- source);
+ sink->dn.successors =
+ __kmp_add_node<false>(thread, sink->dn.successors, source);
npredecessors++;
}
}
return npredecessors;
}
-template<typename T>
+template <typename T>
static inline kmp_int32
__kmp_process_dep_all(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *h,
bool dep_barrier, kmp_task_t *task) {
KA_TRACE(30, ("__kmp_process_dep_all<%s>: T#%d processing dep_all, "
- "dep_barrier = %d\n", T::name,
- gtid, dep_barrier));
+ "dep_barrier = %d\n",
+ T::name, gtid, dep_barrier));
kmp_info_t *thread = __kmp_threads[gtid];
kmp_int32 npredecessors = 0;
@@ -622,19 +619,19 @@ __kmp_process_dep_all(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *h,
}
}
KA_TRACE(30, ("__kmp_process_dep_all<%s>: T#%d found %d predecessors\n",
- T::name, gtid, npredecessors));
+ T::name, gtid, npredecessors));
return npredecessors;
}
-template<typename T>
+template <typename T>
static inline kmp_int32
__kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
bool dep_barrier, kmp_int32 ndeps,
kmp_depend_info_t *dep_list, kmp_task_t *task,
kmp_int32 &next_mutex_set, bool filter = true) {
KA_TRACE(30, ("__kmp_process_deps<%s>: T#%d processing %d dependences : "
- "dep_barrier = %d, filter = %d\n", T::name,
- gtid, ndeps, dep_barrier, filter));
+ "dep_barrier = %d, filter = %d\n",
+ T::name, gtid, ndeps, dep_barrier, filter));
kmp_info_t *thread = __kmp_threads[gtid];
kmp_int32 npredecessors = 0;
@@ -713,8 +710,9 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
}
}
}
- KA_TRACE(30, ("__kmp_process_deps<%s>: T#%d found %d predecessors (filter: %d)\n",
- T::name, gtid, npredecessors, filter));
+ KA_TRACE(30,
+ ("__kmp_process_deps<%s>: T#%d found %d predecessors (filter: %d)\n",
+ T::name, gtid, npredecessors, filter));
return npredecessors;
}
@@ -789,7 +787,7 @@ struct taskgraph_deps {
kmp_task_t *task, kmp_depnode_t *node,
kmp_depnode_list_t *plist);
static kmp_depnode_t *ref(kmp_depnode_t *node) { return node; }
- static void deref(kmp_info_t *thread, kmp_depnode_t *node) { }
+ static void deref(kmp_info_t *thread, kmp_depnode_t *node) {}
static void mutex_dep(kmp_info_t *thread, kmp_dephash_entry_t *info,
kmp_depnode_t *node, kmp_int32 &next_mutex_set);
};
@@ -877,15 +875,13 @@ static size_t __kmp_round_up_to_val(size_t size, size_t val) {
} // __kmp_round_up_to_val
// FIXME: C++-ify this.
-static kmp_taskgraph_region_t *
-__kmp_taskgraph_region_alloc(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t **&alloc_chain,
- kmp_taskgraph_node_t *node,
- kmp_taskgraph_region_t *parent) {
+static kmp_taskgraph_region_t *__kmp_taskgraph_region_alloc(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t **&alloc_chain, kmp_taskgraph_node_t *node,
+ kmp_taskgraph_region_t *parent) {
kmp_taskgraph_region_t *region =
- (kmp_taskgraph_region_t *)__kmp_fast_allocate(thread,
- sizeof(kmp_taskgraph_region_t));
+ (kmp_taskgraph_region_t *)__kmp_fast_allocate(
+ thread, sizeof(kmp_taskgraph_region_t));
region->owner = taskgraph;
region->type = node ? TASKGRAPH_REGION_NODE : TASKGRAPH_REGION_WAIT;
region->task.node = node;
@@ -905,22 +901,18 @@ __kmp_taskgraph_region_alloc(kmp_info_t *thread,
}
// FIXME: This too.
-static kmp_taskgraph_region_t *
-__kmp_taskgraph_region_alloc(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t **&alloc_chain,
- enum kmp_taskgraph_region_type type,
- kmp_int32 num_nodes,
- kmp_taskgraph_region_t *parent) {
- kmp_size_t size =
- sizeof(kmp_taskgraph_region_t) +
- num_nodes * sizeof(kmp_taskgraph_region_t *);
+static kmp_taskgraph_region_t *__kmp_taskgraph_region_alloc(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t **&alloc_chain, enum kmp_taskgraph_region_type type,
+ kmp_int32 num_nodes, kmp_taskgraph_region_t *parent) {
+ kmp_size_t size = sizeof(kmp_taskgraph_region_t) +
+ num_nodes * sizeof(kmp_taskgraph_region_t *);
size = __kmp_round_up_to_val(size, sizeof(kmp_taskgraph_region_t *));
kmp_taskgraph_region_t *region =
- (kmp_taskgraph_region_t *)__kmp_fast_allocate(thread, size);
+ (kmp_taskgraph_region_t *)__kmp_fast_allocate(thread, size);
region->owner = taskgraph;
region->type = type;
- region->inner.children = (kmp_taskgraph_region**)®ion[1];
+ region->inner.children = (kmp_taskgraph_region **)®ion[1];
region->inner.num_children = num_nodes;
region->mark = TASKGRAPH_UNMARKED;
region->level = -1;
@@ -937,40 +929,36 @@ __kmp_taskgraph_region_alloc(kmp_info_t *thread,
return region;
}
-// This makes a mostly-deep copy of a region. The region itself and children nodes are
-// created new, but node pointers are shared.
-static kmp_taskgraph_region_t *
-__kmp_taskgraph_region_clone(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t **&alloc_chain,
- kmp_taskgraph_region_t *from,
- kmp_taskgraph_region_t *parent,
- kmp_int32 indent = 0) {
+// This makes a mostly-deep copy of a region. The region itself and children
+// nodes are created new, but node pointers are shared.
+static kmp_taskgraph_region_t *__kmp_taskgraph_region_clone(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t **&alloc_chain, kmp_taskgraph_region_t *from,
+ kmp_taskgraph_region_t *parent, kmp_int32 indent = 0) {
kmp_taskgraph_region_t *clone = nullptr;
switch (from->type) {
- case TASKGRAPH_REGION_ENTRY:
- case TASKGRAPH_REGION_EXIT:
- clone = __kmp_taskgraph_region_alloc(thread, taskgraph, alloc_chain,
- nullptr, parent);
- clone->type = from->type;
- break;
- case TASKGRAPH_REGION_NODE:
- case TASKGRAPH_REGION_WAIT:
- clone = __kmp_taskgraph_region_alloc(thread, taskgraph, alloc_chain,
- from->task.node, parent);
- break;
- default: {
- clone = __kmp_taskgraph_region_alloc(thread, taskgraph, alloc_chain,
- from->type, from->inner.num_children,
- parent);
- for (kmp_int32 n = 0; n < from->inner.num_children; n++) {
- clone->inner.children[n] =
- __kmp_taskgraph_region_clone(thread, taskgraph, alloc_chain,
- from->inner.children[n], clone,
- indent + 2);
- }
+ case TASKGRAPH_REGION_ENTRY:
+ case TASKGRAPH_REGION_EXIT:
+ clone = __kmp_taskgraph_region_alloc(thread, taskgraph, alloc_chain,
+ nullptr, parent);
+ clone->type = from->type;
+ break;
+ case TASKGRAPH_REGION_NODE:
+ case TASKGRAPH_REGION_WAIT:
+ clone = __kmp_taskgraph_region_alloc(thread, taskgraph, alloc_chain,
+ from->task.node, parent);
+ break;
+ default: {
+ clone =
+ __kmp_taskgraph_region_alloc(thread, taskgraph, alloc_chain, from->type,
+ from->inner.num_children, parent);
+ for (kmp_int32 n = 0; n < from->inner.num_children; n++) {
+ clone->inner.children[n] = __kmp_taskgraph_region_clone(
+ thread, taskgraph, alloc_chain, from->inner.children[n], clone,
+ indent + 2);
}
}
+ }
TGDBG("%*scloned region %p from region %p\n", indent, "", clone, from);
return clone;
}
@@ -987,11 +975,9 @@ __kmp_taskgraph_topological_order(kmp_taskgraph_region_t *region,
region->mark = TASKGRAPH_TEMP_MARK;
kmp_int32 max_level = -1;
- for (kmp_taskgraph_region_dep_t *s = region->predecessors;
- s;
- s = s->next) {
+ for (kmp_taskgraph_region_dep_t *s = region->predecessors; s; s = s->next) {
kmp_int32 pred_level =
- __kmp_taskgraph_topological_order(s->region, order_out, outidx);
+ __kmp_taskgraph_topological_order(s->region, order_out, outidx);
max_level = pred_level > max_level ? pred_level : max_level;
}
@@ -1038,9 +1024,8 @@ static kmp_int32 __kmp_region_deplist_len(kmp_taskgraph_region_dep_t *list) {
return len;
}
-static void
-__kmp_region_deplist_free(kmp_info_t *thread,
- kmp_taskgraph_region_dep_t *list) {
+static void __kmp_region_deplist_free(kmp_info_t *thread,
+ kmp_taskgraph_region_dep_t *list) {
while (list) {
kmp_taskgraph_region_dep_t *next = list->next;
__kmp_fast_free(thread, list);
@@ -1063,13 +1048,10 @@ static void __kmp_region_deplist_recycle(kmp_taskgraph_region_dep_t **recycled,
}
}
-static bool
-__kmp_taskgraph_collapse_sequence(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t **&alloc_chain,
- kmp_taskgraph_region_t **region_p,
- kmp_taskgraph_region_t *parent,
- kmp_int32 &stamp) {
+static bool __kmp_taskgraph_collapse_sequence(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t **&alloc_chain, kmp_taskgraph_region_t **region_p,
+ kmp_taskgraph_region_t *parent, kmp_int32 &stamp) {
kmp_taskgraph_region_t *region = *region_p;
kmp_taskgraph_region_t *chain_start = region;
kmp_taskgraph_region_t *chain_end = region;
@@ -1094,10 +1076,9 @@ __kmp_taskgraph_collapse_sequence(kmp_info_t *thread,
if (chain_len <= 1)
return false;
- kmp_taskgraph_region_t *seq_region =
- __kmp_taskgraph_region_alloc(thread, taskgraph, alloc_chain,
- TASKGRAPH_REGION_SEQUENTIAL, chain_len,
- parent);
+ kmp_taskgraph_region_t *seq_region = __kmp_taskgraph_region_alloc(
+ thread, taskgraph, alloc_chain, TASKGRAPH_REGION_SEQUENTIAL, chain_len,
+ parent);
TGDBG("allocated new seq region: %p (length %d)\n", seq_region, chain_len);
kmp_taskgraph_region_t **worklist_p = region_p;
*worklist_p = seq_region;
@@ -1121,7 +1102,7 @@ __kmp_taskgraph_collapse_sequence(kmp_info_t *thread,
seq_region->level = level;
seq_region->predecessors = seq_region->inner.children[0]->predecessors;
seq_region->successors =
- seq_region->inner.children[chain_len - 1]->successors;
+ seq_region->inner.children[chain_len - 1]->successors;
seq_region->inner.children[0]->predecessors = nullptr;
seq_region->inner.children[chain_len - 1]->successors = nullptr;
@@ -1150,21 +1131,20 @@ __kmp_taskgraph_collapse_sequence(kmp_info_t *thread,
return true;
}
-static const char*
+static const char *
__kmp_taskgraph_region_type_name(kmp_taskgraph_region_type type);
-static void
-__kmp_taskgraph_region_dfs(kmp_taskgraph_region_t *region,
- kmp_taskgraph_region_t **order,
- kmp_int32 &idx, bool use_preds) {
+static void __kmp_taskgraph_region_dfs(kmp_taskgraph_region_t *region,
+ kmp_taskgraph_region_t **order,
+ kmp_int32 &idx, bool use_preds) {
if (order) {
region->timestamp = --idx;
order[idx] = region;
}
region->mark = TASKGRAPH_TEMP_MARK;
for (kmp_taskgraph_region_dep_t *reg = use_preds ? region->predecessors
- : region->successors; reg;
- reg = reg->next) {
+ : region->successors;
+ reg; reg = reg->next) {
if (reg->region->mark == TASKGRAPH_UNMARKED)
__kmp_taskgraph_region_dfs(reg->region, order, idx, use_preds);
}
@@ -1172,12 +1152,10 @@ __kmp_taskgraph_region_dfs(kmp_taskgraph_region_t *region,
#if defined(DEBUG_TASKGRAPH) && defined(CHECK_WORKLIST)
-static void
-__kmp_taskgraph_region_gather_deps(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t *region,
- kmp_taskgraph_region_dep_t **deplist,
- bool &ok) {
+static void __kmp_taskgraph_region_gather_deps(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t *region, kmp_taskgraph_region_dep_t **deplist,
+ bool &ok) {
for (kmp_taskgraph_region_dep_t *dep = *deplist; dep; dep = dep->next) {
if (dep->region == region)
return;
@@ -1187,14 +1165,14 @@ __kmp_taskgraph_region_gather_deps(kmp_info_t *thread,
*deplist);
for (kmp_taskgraph_region_dep_t *pred = region->predecessors; pred;
- pred = pred->next) {
+ pred = pred->next) {
if (pred->region->mark == TASKGRAPH_DELETED) {
fprintf(stderr, "*** Region %p's predecessor %p is a deleted node\n",
region, pred->region);
ok = false;
}
- __kmp_taskgraph_region_gather_deps(thread, taskgraph, pred->region,
- deplist, ok);
+ __kmp_taskgraph_region_gather_deps(thread, taskgraph, pred->region, deplist,
+ ok);
}
for (kmp_taskgraph_region_dep_t *succ = region->successors; succ;
@@ -1204,16 +1182,14 @@ __kmp_taskgraph_region_gather_deps(kmp_info_t *thread,
region, succ->region);
ok = false;
}
- __kmp_taskgraph_region_gather_deps(thread, taskgraph, succ->region,
- deplist, ok);
+ __kmp_taskgraph_region_gather_deps(thread, taskgraph, succ->region, deplist,
+ ok);
}
}
-static bool
-__kmp_taskgraph_region_worklist_check(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t *region,
- const char *where) {
+static bool __kmp_taskgraph_region_worklist_check(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t *region, const char *where) {
kmp_taskgraph_region_dep_t *collected_nodes = nullptr;
bool ok = true;
__kmp_taskgraph_region_gather_deps(thread, taskgraph, region,
@@ -1246,8 +1222,8 @@ __kmp_taskgraph_region_worklist_check(kmp_info_t *thread,
}
if (!in_list) {
fprintf(stderr,
- "*** Region %p is in worklist but not dependency graph (%s)\n",
- r, where);
+ "*** Region %p is in worklist but not dependency graph (%s)\n", r,
+ where);
ok = false;
}
}
@@ -1257,20 +1233,16 @@ __kmp_taskgraph_region_worklist_check(kmp_info_t *thread,
return ok;
}
#else
-static bool
-__kmp_taskgraph_region_worklist_check(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t *region,
- const char *where) {
+static bool __kmp_taskgraph_region_worklist_check(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t *region, const char *where) {
return true;
}
#endif
-static kmp_taskgraph_region_t *
-__kmp_taskgraph_region_dom_intersect(kmp_taskgraph_region_t **order,
- kmp_taskgraph_region_t **doms,
- kmp_taskgraph_region_t *b1,
- kmp_taskgraph_region_t *b2) {
+static kmp_taskgraph_region_t *__kmp_taskgraph_region_dom_intersect(
+ kmp_taskgraph_region_t **order, kmp_taskgraph_region_t **doms,
+ kmp_taskgraph_region_t *b1, kmp_taskgraph_region_t *b2) {
kmp_int32 finger1 = b1->timestamp;
kmp_int32 finger2 = b2->timestamp;
while (finger1 != finger2) {
@@ -1282,10 +1254,10 @@ __kmp_taskgraph_region_dom_intersect(kmp_taskgraph_region_t **order,
return order[finger1];
}
-static void
-__kmp_taskgraph_region_doms(kmp_taskgraph_region_t **order,
- kmp_taskgraph_region_t **doms,
- kmp_int32 worklist_length, bool postdom) {
+static void __kmp_taskgraph_region_doms(kmp_taskgraph_region_t **order,
+ kmp_taskgraph_region_t **doms,
+ kmp_int32 worklist_length,
+ bool postdom) {
bool changed = true;
// Set doms[start_node] <- start_node
doms[worklist_length - 1] = order[worklist_length - 1];
@@ -1296,22 +1268,21 @@ __kmp_taskgraph_region_doms(kmp_taskgraph_region_t **order,
kmp_taskgraph_region_t *b = order[n];
kmp_taskgraph_region_t *new_idom = nullptr;
for (kmp_taskgraph_region_dep_t *pred = postdom ? b->successors
- : b->predecessors; pred;
- pred = pred->next) {
+ : b->predecessors;
+ pred; pred = pred->next) {
if (pred->region->mark == TASKGRAPH_PERMANENT_MARK) {
new_idom = pred->region;
break;
}
}
for (kmp_taskgraph_region_dep_t *pred = postdom ? b->successors
- : b->predecessors; pred;
- pred = pred->next) {
+ : b->predecessors;
+ pred; pred = pred->next) {
if (pred->region == new_idom)
continue;
if (doms[pred->region->timestamp]) {
- new_idom =
- __kmp_taskgraph_region_dom_intersect(order, doms, pred->region,
- new_idom);
+ new_idom = __kmp_taskgraph_region_dom_intersect(
+ order, doms, pred->region, new_idom);
}
}
if (doms[b->timestamp] != new_idom) {
@@ -1323,8 +1294,7 @@ __kmp_taskgraph_region_doms(kmp_taskgraph_region_t **order,
}
}
-static bool
-__kmp_taskgraph_region_mutex_p(kmp_taskgraph_region_t *reg) {
+static bool __kmp_taskgraph_region_mutex_p(kmp_taskgraph_region_t *reg) {
if (reg->type == TASKGRAPH_REGION_NODE)
return reg->mutexset != nullptr;
return false;
@@ -1349,19 +1319,16 @@ __kmp_taskgraph_region_mutex_p(kmp_taskgraph_region_t *reg) {
// We choose the pp the the highest level ("furthest down the graph"), and
// collapse the subgraph into a parallel region.
-static bool
-__kmp_taskgraph_collapse_par_exclusive(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t **&alloc_chain,
- kmp_taskgraph_region_t **region_p,
- kmp_taskgraph_region_t *parent,
- kmp_int32 &stamp) {
+static bool __kmp_taskgraph_collapse_par_exclusive(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t **&alloc_chain, kmp_taskgraph_region_t **region_p,
+ kmp_taskgraph_region_t *parent, kmp_int32 &stamp) {
kmp_taskgraph_region_t *region = *region_p;
kmp_int32 num_predecessors = __kmp_region_deplist_len(region->predecessors);
TGDBG("predecessors %d, successors %d\n",
- __kmp_region_deplist_len(region->predecessors),
- __kmp_region_deplist_len(region->successors));
+ __kmp_region_deplist_len(region->predecessors),
+ __kmp_region_deplist_len(region->successors));
if (num_predecessors <= 1)
return false;
@@ -1371,7 +1338,7 @@ __kmp_taskgraph_collapse_par_exclusive(kmp_info_t *thread,
kmp_int32 highest_level = -1;
for (kmp_taskgraph_region_dep_t *pred = region->predecessors; pred;
- pred = pred->next) {
+ pred = pred->next) {
TGDBG("consider predecessor: %p\n", pred->region);
TGDBG("-- successors %d, predecessors %d\n",
__kmp_region_deplist_len(pred->region->successors),
@@ -1385,7 +1352,8 @@ __kmp_taskgraph_collapse_par_exclusive(kmp_info_t *thread,
continue;
bool in_list = false;
TGDBG("pp region: %p (%s)\n", pred_region->predecessors->region,
- __kmp_taskgraph_region_type_name(pred_region->predecessors->region->type));
+ __kmp_taskgraph_region_type_name(
+ pred_region->predecessors->region->type));
kmp_taskgraph_region_t *pp_region = pred_region->predecessors->region;
for (kmp_taskgraph_region_dep_t *pp = pred_preds; pp; pp = pp->next) {
if (pp->region == pp_region) {
@@ -1440,10 +1408,9 @@ __kmp_taskgraph_collapse_par_exclusive(kmp_info_t *thread,
if (preds_for_pp < 2)
continue;
kmp_taskgraph_region_type region_type =
- any_mutex_p ? TASKGRAPH_REGION_EXCLUSIVE : TASKGRAPH_REGION_PARALLEL;
- kmp_taskgraph_region_t *par_region =
- __kmp_taskgraph_region_alloc(thread, taskgraph, alloc_chain, region_type,
- preds_for_pp, parent);
+ any_mutex_p ? TASKGRAPH_REGION_EXCLUSIVE : TASKGRAPH_REGION_PARALLEL;
+ kmp_taskgraph_region_t *par_region = __kmp_taskgraph_region_alloc(
+ thread, taskgraph, alloc_chain, region_type, preds_for_pp, parent);
changed = true;
TGDBG("allocated %s region: %p\n",
region_type == TASKGRAPH_REGION_EXCLUSIVE ? "exclusive" : "parallel",
@@ -1467,7 +1434,7 @@ __kmp_taskgraph_collapse_par_exclusive(kmp_info_t *thread,
TGDBG("bailing (non-unit pred/succ list length)\n");
continue;
}
- TGDBG("process region %p (%d/%d), level %d\n", pred->region, i+1,
+ TGDBG("process region %p (%d/%d), level %d\n", pred->region, i + 1,
preds_for_pp, pred_region->level);
par_region->inner.children[i] = pred_region;
pred_region->mark = TASKGRAPH_COMBINED;
@@ -1506,8 +1473,7 @@ __kmp_taskgraph_collapse_par_exclusive(kmp_info_t *thread,
par_region->predecessors = par_preds;
par_region->successors = par_succs;
- if (region->type == TASKGRAPH_REGION_WAIT &&
- !found_reduction_data) {
+ if (region->type == TASKGRAPH_REGION_WAIT && !found_reduction_data) {
// If we have no reduction data, we will not create a taskgroup for this
// parallel region at replay time, so we don't need to terminate/discard
// that region when we're done. Clear the taskloop_task flag.
@@ -1579,8 +1545,8 @@ __kmp_taskgraph_collapse_par_exclusive(kmp_info_t *thread,
return changed;
}
-static void
-__kmp_taskgraph_region_dot(kmp_taskgraph_region_t *region, const char *name) {
+static void __kmp_taskgraph_region_dot(kmp_taskgraph_region_t *region,
+ const char *name) {
fprintf(stderr, "digraph %s {\n", name);
for (kmp_taskgraph_region_t *r = region; r; r = r->next) {
if (r->mark == TASKGRAPH_DELETED) {
@@ -1677,16 +1643,13 @@ __kmp_taskgraph_count_edges_to_dominator(kmp_taskgraph_region_t *reg,
// critical point is what it means to clone a task node in this way: that is
// discussed in the commentary of __kmp_taskgraph_rewrite_irreducible.
-static void
-__kmp_taskgraph_clone_subgraph(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t **&alloc_chain,
- kmp_taskgraph_region_t *cloned_nodes[],
- kmp_taskgraph_region_t *orig_region,
- kmp_taskgraph_region_t *doms[],
- kmp_taskgraph_region_dep_t *preds_with_dom,
- kmp_taskgraph_region_t *region_dom,
- kmp_taskgraph_region_t ***added_worklist) {
+static void __kmp_taskgraph_clone_subgraph(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t **&alloc_chain,
+ kmp_taskgraph_region_t *cloned_nodes[], kmp_taskgraph_region_t *orig_region,
+ kmp_taskgraph_region_t *doms[], kmp_taskgraph_region_dep_t *preds_with_dom,
+ kmp_taskgraph_region_t *region_dom,
+ kmp_taskgraph_region_t ***added_worklist) {
for (kmp_taskgraph_region_dep_t *pred = preds_with_dom; pred;
pred = pred->next) {
kmp_taskgraph_region_t *pred_region = pred->region;
@@ -1700,9 +1663,8 @@ __kmp_taskgraph_clone_subgraph(kmp_info_t *thread,
pred->region = cloned_nodes[pred_region->timestamp];
continue;
}
- kmp_taskgraph_region_t *cloned_region =
- __kmp_taskgraph_region_clone(thread, taskgraph, alloc_chain,
- pred_region, nullptr);
+ kmp_taskgraph_region_t *cloned_region = __kmp_taskgraph_region_clone(
+ thread, taskgraph, alloc_chain, pred_region, nullptr);
cloned_nodes[pred_region->timestamp] = cloned_region;
**added_worklist = cloned_region;
@@ -1713,9 +1675,8 @@ __kmp_taskgraph_clone_subgraph(kmp_info_t *thread,
kmp_taskgraph_region_dep_t *cloned_preds = nullptr;
for (kmp_taskgraph_region_dep_t *p = pred_region->predecessors; p;
p = p->next) {
- cloned_preds =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
- p->region, cloned_preds);
+ cloned_preds = __kmp_region_deplist_add(
+ thread, &taskgraph->recycled_deps, p->region, cloned_preds);
}
cloned_region->predecessors = cloned_preds;
// Note pred_region is the original predecessor region here, not the
@@ -1785,12 +1746,10 @@ __kmp_taskgraph_clone_subgraph(kmp_info_t *thread,
//
// For host execution, this is handled by __kmp_exec_descr_link_instances, etc.
-static bool
-__kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t **alloc_chain,
- kmp_taskgraph_region_t **region_p,
- kmp_taskgraph_region_t *exitregion) {
+static bool __kmp_taskgraph_rewrite_irreducible(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t **alloc_chain, kmp_taskgraph_region_t **region_p,
+ kmp_taskgraph_region_t *exitregion) {
kmp_taskgraph_region_t *entryregion = *region_p;
bool changed = false;
@@ -1811,11 +1770,11 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
#endif
kmp_taskgraph_region_t **order =
- (kmp_taskgraph_region_t **)__kmp_fast_allocate(thread,
- worklist_length * sizeof(kmp_taskgraph_region_t *));
+ (kmp_taskgraph_region_t **)__kmp_fast_allocate(
+ thread, worklist_length * sizeof(kmp_taskgraph_region_t *));
kmp_taskgraph_region_t **doms =
- (kmp_taskgraph_region_t **)__kmp_fast_allocate(thread,
- worklist_length * sizeof(kmp_taskgraph_region_t *));
+ (kmp_taskgraph_region_t **)__kmp_fast_allocate(
+ thread, worklist_length * sizeof(kmp_taskgraph_region_t *));
memset(doms, 0, worklist_length * sizeof(kmp_taskgraph_region_t *));
kmp_int32 cursor = worklist_length;
assert(entryregion->type == TASKGRAPH_REGION_ENTRY);
@@ -1879,8 +1838,8 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
if (passes_pred) {
// We can drop this predecessor.
- TGDBG("dropping pred %p from region %p, dom %p\n",
- pred->region, region, doms[pred->region->timestamp]);
+ TGDBG("dropping pred %p from region %p, dom %p\n", pred->region, region,
+ doms[pred->region->timestamp]);
kmp_taskgraph_region_dep_t *next = pred->next;
kmp_taskgraph_region_dep_t **succp = &pred->region->successors;
while (*succp) {
@@ -1925,7 +1884,7 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
kmp_taskgraph_region_t *this_dom = doms[pred_region->timestamp];
#ifdef DEBUG_TASKGRAPH
kmp_int32 edges_to_dom =
- __kmp_taskgraph_count_edges_to_dominator(pred_region, this_dom);
+ __kmp_taskgraph_count_edges_to_dominator(pred_region, this_dom);
TGDBG("this pred: %p, edges_to_dom=%d\n", pred_region, edges_to_dom);
#endif
bool found = false;
@@ -1947,10 +1906,10 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
TGDBG("region %p: all predecessors have a single dominator\n", region);
if (!pred_bitsets) {
- pred_bitsets = (kmp_bitset_t **) __kmp_fast_allocate(thread,
- sizeof(kmp_bitset_t *) * worklist_length);
- succ_bitsets = (kmp_bitset_t **) __kmp_fast_allocate(thread,
- sizeof(kmp_bitset_t *) * worklist_length);
+ pred_bitsets = (kmp_bitset_t **)__kmp_fast_allocate(
+ thread, sizeof(kmp_bitset_t *) * worklist_length);
+ succ_bitsets = (kmp_bitset_t **)__kmp_fast_allocate(
+ thread, sizeof(kmp_bitset_t *) * worklist_length);
for (kmp_int32 i = 0; i < worklist_length; i++) {
pred_bitsets[i] = __kmp_bitset_alloc(thread, worklist_length);
@@ -1985,21 +1944,19 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
TGDBG("regions %p and %p share all predecessors/successors\n",
order[i], order[j]);
same_preds_and_succs++;
- equal_deps_chain =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
- order[j], equal_deps_chain);
+ equal_deps_chain = __kmp_region_deplist_add(
+ thread, &taskgraph->recycled_deps, order[j], equal_deps_chain);
if (__kmp_taskgraph_region_mutex_p(order[j]))
any_mutex_p = true;
}
}
if (same_preds_and_succs > 1) {
kmp_taskgraph_region_type region_type =
- any_mutex_p ? TASKGRAPH_REGION_EXCLUSIVE
- : TASKGRAPH_REGION_PARALLEL;
- kmp_taskgraph_region_t *par_region =
- __kmp_taskgraph_region_alloc(thread, taskgraph, alloc_chain,
- region_type, same_preds_and_succs,
- nullptr);
+ any_mutex_p ? TASKGRAPH_REGION_EXCLUSIVE
+ : TASKGRAPH_REGION_PARALLEL;
+ kmp_taskgraph_region_t *par_region = __kmp_taskgraph_region_alloc(
+ thread, taskgraph, alloc_chain, region_type, same_preds_and_succs,
+ nullptr);
par_region->inner.children[0] = region;
region->mark = TASKGRAPH_COMBINED;
region->parent = par_region;
@@ -2013,7 +1970,7 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
equal_deps_chain = next;
}
par_region->predecessors =
- par_region->inner.children[0]->predecessors;
+ par_region->inner.children[0]->predecessors;
par_region->inner.children[0]->predecessors = nullptr;
par_region->successors = par_region->inner.children[0]->successors;
par_region->inner.children[0]->successors = nullptr;
@@ -2084,11 +2041,10 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
if (regions_combined_p)
continue;
- assert (num_groups >= 1);
+ assert(num_groups >= 1);
TGDBG("should split region %p (%d)\n", region, region->timestamp);
- TGDBG("clone graph to dominator: %p (%d, %s)\n",
- doms[region->timestamp],
+ TGDBG("clone graph to dominator: %p (%d, %s)\n", doms[region->timestamp],
doms[region->timestamp]->timestamp,
__kmp_taskgraph_region_type_name(doms[region->timestamp]->type));
kmp_taskgraph_region_t *region_dom = doms[region->timestamp];
@@ -2134,9 +2090,8 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
region->predecessors = preds_with_dom;
for (kmp_taskgraph_region_dep_t **rp = ®ion->predecessors; *rp;
rp = &(*rp)->next) {
- kmp_int32 count =
- __kmp_taskgraph_count_edges_to_dominator((*rp)->region,
- dom_groups[grp].dom);
+ kmp_int32 count = __kmp_taskgraph_count_edges_to_dominator(
+ (*rp)->region, dom_groups[grp].dom);
TGDBG("for pred %p, outgoing edges to dom = %d\n", (*rp)->region,
count);
if (count > highest) {
@@ -2166,8 +2121,8 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
__kmp_region_dep_recycle(&taskgraph->recycled_deps, succ);
TGDBG("unlinking successor %p -> %p\n", pred->region, region);
unlinked_successors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
- pred->region, unlinked_successors);
+ __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
+ pred->region, unlinked_successors);
*succp = next;
} else {
succp = &succ->next;
@@ -2194,8 +2149,8 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
pred; pred = pred->next) {
kmp_taskgraph_region_t *pred_region = pred->region;
pred_region->successors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
- cloned_region, pred_region->successors);
+ __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
+ cloned_region, pred_region->successors);
}
}
@@ -2228,13 +2183,12 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
// the region.
for (kmp_taskgraph_region_dep_t *succ = unlinked_successors; succ;) {
kmp_taskgraph_region_t *cloned_reg =
- cloned_nodes[succ->region->timestamp];
+ cloned_nodes[succ->region->timestamp];
kmp_taskgraph_region_dep_t *next = succ->next;
__kmp_region_dep_recycle(&taskgraph->recycled_deps, succ);
TGDBG("add successor to cloned region: %p -> %p\n", cloned_reg, region);
- cloned_reg->successors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps, region,
- cloned_reg->successors);
+ cloned_reg->successors = __kmp_region_deplist_add(
+ thread, &taskgraph->recycled_deps, region, cloned_reg->successors);
succ = next;
}
@@ -2330,7 +2284,7 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
for (kmp_int32 i = 0; i < worklist_length; i++, r = r->next) {
if (r->mark == TASKGRAPH_UNMARKED) {
kmp_int32 level =
- __kmp_taskgraph_topological_order(r, order_out, &outidx);
+ __kmp_taskgraph_topological_order(r, order_out, &outidx);
max_level = level > max_level ? level : max_level;
}
}
@@ -2367,12 +2321,10 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
// much of the heavier processing involved in step (2), so the common case
// should be relatively fast.
-static kmp_taskgraph_region_t *
-__kmp_taskgraph_build_regions(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t **&alloc_chain,
- kmp_taskgraph_region_t *entryregion,
- kmp_taskgraph_region_t *exitregion) {
+static kmp_taskgraph_region_t *__kmp_taskgraph_build_regions(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t **&alloc_chain, kmp_taskgraph_region_t *entryregion,
+ kmp_taskgraph_region_t *exitregion) {
bool changed;
kmp_int32 phase = 0;
@@ -2387,17 +2339,17 @@ __kmp_taskgraph_build_regions(kmp_info_t *thread,
changed = false;
TGDBG("starting seq pass\n");
for (kmp_taskgraph_region_t **seq_head = &entryregion; *seq_head;
- seq_head = &(*seq_head)->next) {
+ seq_head = &(*seq_head)->next) {
TGDBG("consider %s region: %p\n",
__kmp_taskgraph_region_type_name((*seq_head)->type), *seq_head);
if ((*seq_head)->mark == TASKGRAPH_COMBINED) {
TGDBG("already combined\n");
continue;
}
- changed |=
- __kmp_taskgraph_collapse_sequence(thread, taskgraph, alloc_chain, seq_head,
- /*parent=*/nullptr, phase);
- TGDBG("changed: %s\n", changed ? "true" : "false");
+ changed |= __kmp_taskgraph_collapse_sequence(thread, taskgraph,
+ alloc_chain, seq_head,
+ /*parent=*/nullptr, phase);
+ TGDBG("changed: %s\n", changed ? "true" : "false");
}
++phase;
__kmp_taskgraph_region_chain_prune(&entryregion);
@@ -2405,17 +2357,16 @@ __kmp_taskgraph_build_regions(kmp_info_t *thread,
"after seq collapse");
TGDBG("starting par/unordered pass\n");
for (kmp_taskgraph_region_t **par_head = &entryregion; *par_head;
- par_head = &(*par_head)->next) {
+ par_head = &(*par_head)->next) {
TGDBG("consider %s region: %p\n",
__kmp_taskgraph_region_type_name((*par_head)->type), *par_head);
if ((*par_head)->mark == TASKGRAPH_COMBINED) {
TGDBG("already combined\n");
continue;
}
- changed |=
- __kmp_taskgraph_collapse_par_exclusive(thread, taskgraph, alloc_chain,
- par_head, /*parent=*/nullptr,
- phase);
+ changed |= __kmp_taskgraph_collapse_par_exclusive(
+ thread, taskgraph, alloc_chain, par_head, /*parent=*/nullptr,
+ phase);
TGDBG("changed: %s\n", changed ? "true" : "false");
}
++phase;
@@ -2440,9 +2391,8 @@ __kmp_taskgraph_build_regions(kmp_info_t *thread,
TGDBG("attempting to collapse irreducible regions\n");
- changed |=
- __kmp_taskgraph_rewrite_irreducible(thread, taskgraph, alloc_chain,
- &entryregion, exitregion);
+ changed |= __kmp_taskgraph_rewrite_irreducible(
+ thread, taskgraph, alloc_chain, &entryregion, exitregion);
if (!changed) {
fprintf(stderr, "FIXME: Failed to transform irreducible graph\n");
@@ -2453,114 +2403,109 @@ __kmp_taskgraph_build_regions(kmp_info_t *thread,
return entryregion;
}
-static void
-__kmp_taskgraph_count_nodes(kmp_taskgraph_region_t *region) {
+static void __kmp_taskgraph_count_nodes(kmp_taskgraph_region_t *region) {
switch (region->type) {
- case TASKGRAPH_REGION_ENTRY:
- case TASKGRAPH_REGION_EXIT:
- return;
- case TASKGRAPH_REGION_NODE:
- case TASKGRAPH_REGION_WAIT: {
- TGDBG("process region %p\n", region);
- region->task.node->u.resolved.count++;
- kmp_taskgraph_region_t *last_region =
+ case TASKGRAPH_REGION_ENTRY:
+ case TASKGRAPH_REGION_EXIT:
+ return;
+ case TASKGRAPH_REGION_NODE:
+ case TASKGRAPH_REGION_WAIT: {
+ TGDBG("process region %p\n", region);
+ region->task.node->u.resolved.count++;
+ kmp_taskgraph_region_t *last_region =
region->task.node->u.resolved.last_region;
- TGDBG("last region: %p\n", last_region);
- if (last_region) {
- kmp_taskgraph_region_t *next = last_region->task.next_instance;
- TGDBG("next: %p\n", next);
- last_region->task.next_instance = region;
- region->task.next_instance = next;
- }
- region->task.node->u.resolved.last_region = region;
- return;
+ TGDBG("last region: %p\n", last_region);
+ if (last_region) {
+ kmp_taskgraph_region_t *next = last_region->task.next_instance;
+ TGDBG("next: %p\n", next);
+ last_region->task.next_instance = region;
+ region->task.next_instance = next;
+ }
+ region->task.node->u.resolved.last_region = region;
+ return;
+ }
+ default:
+ for (kmp_int32 n = 0; n < region->inner.num_children; n++) {
+ __kmp_taskgraph_count_nodes(region->inner.children[n]);
}
- default:
- for (kmp_int32 n = 0; n < region->inner.num_children; n++) {
- __kmp_taskgraph_count_nodes(region->inner.children[n]);
- }
}
}
-static void
-__kmp_taskgraph_gather_mutex_sets(kmp_info_t *thread,
- kmp_taskgraph_region_t *region,
- const kmp_bitset_t *held) {
+static void __kmp_taskgraph_gather_mutex_sets(kmp_info_t *thread,
+ kmp_taskgraph_region_t *region,
+ const kmp_bitset_t *held) {
switch (region->type) {
- case TASKGRAPH_REGION_ENTRY:
- case TASKGRAPH_REGION_EXIT:
- case TASKGRAPH_REGION_WAIT:
- return;
- case TASKGRAPH_REGION_NODE: {
+ case TASKGRAPH_REGION_ENTRY:
+ case TASKGRAPH_REGION_EXIT:
+ case TASKGRAPH_REGION_WAIT:
+ return;
+ case TASKGRAPH_REGION_NODE: {
#ifdef DEBUG_TASKGRAPH
- if (region->mutexset && __kmp_bitset_subset_p(held, region->mutexset)) {
- TGDBG("node is mutually exclusive with held: 0x%llx <: 0x%llx\n",
- (unsigned long long)region->mutexset->bits[0],
- (unsigned long long)held->bits[0]);
- }
-#endif
- return;
+ if (region->mutexset && __kmp_bitset_subset_p(held, region->mutexset)) {
+ TGDBG("node is mutually exclusive with held: 0x%llx <: 0x%llx\n",
+ (unsigned long long)region->mutexset->bits[0],
+ (unsigned long long)held->bits[0]);
}
- case TASKGRAPH_REGION_SEQUENTIAL: {
- kmp_bitset_t *seq_held = __kmp_bitset_alloc(thread, held->bitsize);
- __kmp_bitset_clearall(seq_held);
+#endif
+ return;
+ }
+ case TASKGRAPH_REGION_SEQUENTIAL: {
+ kmp_bitset_t *seq_held = __kmp_bitset_alloc(thread, held->bitsize);
+ __kmp_bitset_clearall(seq_held);
+ for (kmp_int32 child = 0; child < region->inner.num_children; child++) {
+ __kmp_taskgraph_gather_mutex_sets(thread, region->inner.children[child],
+ held);
+ if (region->inner.children[child]->mutexset)
+ __kmp_bitset_or(seq_held, seq_held,
+ region->inner.children[child]->mutexset);
+ }
+ region->mutexset = seq_held;
+ return;
+ }
+ case TASKGRAPH_REGION_PARALLEL:
+ case TASKGRAPH_REGION_EXCLUSIVE: {
+ kmp_bitset_t *par_held = __kmp_bitset_alloc(thread, held->bitsize);
+ kmp_bitset_t *conflicts = __kmp_bitset_alloc(thread, held->bitsize);
+ while (true) {
+ __kmp_bitset_clearall(par_held);
for (kmp_int32 child = 0; child < region->inner.num_children; child++) {
+ __kmp_bitset_clearall(conflicts);
+ for (kmp_int32 other = 0; other < region->inner.num_children; other++) {
+ if (other != child) {
+ if (!region->inner.children[other]->mutexset)
+ __kmp_taskgraph_gather_mutex_sets(
+ thread, region->inner.children[other], held);
+ if (region->inner.children[other]->mutexset)
+ __kmp_bitset_or(conflicts, conflicts,
+ region->inner.children[other]->mutexset);
+ }
+ }
__kmp_taskgraph_gather_mutex_sets(thread, region->inner.children[child],
- held);
+ conflicts);
if (region->inner.children[child]->mutexset)
- __kmp_bitset_or(seq_held, seq_held,
+ __kmp_bitset_or(par_held, par_held,
region->inner.children[child]->mutexset);
}
- region->mutexset = seq_held;
- return;
- }
- case TASKGRAPH_REGION_PARALLEL:
- case TASKGRAPH_REGION_EXCLUSIVE: {
- kmp_bitset_t *par_held = __kmp_bitset_alloc(thread, held->bitsize);
- kmp_bitset_t *conflicts = __kmp_bitset_alloc(thread, held->bitsize);
- while (true) {
- __kmp_bitset_clearall(par_held);
- for (kmp_int32 child = 0; child < region->inner.num_children; child++) {
- __kmp_bitset_clearall(conflicts);
- for (kmp_int32 other = 0; other < region->inner.num_children; other++) {
- if (other != child) {
- if (!region->inner.children[other]->mutexset)
- __kmp_taskgraph_gather_mutex_sets(thread,
- region->inner.children[other],
- held);
- if (region->inner.children[other]->mutexset)
- __kmp_bitset_or(conflicts, conflicts,
- region->inner.children[other]->mutexset);
- }
- }
- __kmp_taskgraph_gather_mutex_sets(thread,
- region->inner.children[child],
- conflicts);
- if (region->inner.children[child]->mutexset)
- __kmp_bitset_or(par_held, par_held,
- region->inner.children[child]->mutexset);
- }
- if (!region->mutexset) {
- region->mutexset = par_held;
- } else if (__kmp_bitset_equal(region->mutexset, par_held)) {
- TGDBG("par mutexes stabilized, exiting loop\n");
- break;
- } else {
- TGDBG("par mutexes not stable, iterating\n");
- __kmp_bitset_copy(region->mutexset, par_held);
- __kmp_bitset_free(thread, par_held);
- }
+ if (!region->mutexset) {
+ region->mutexset = par_held;
+ } else if (__kmp_bitset_equal(region->mutexset, par_held)) {
+ TGDBG("par mutexes stabilized, exiting loop\n");
+ break;
+ } else {
+ TGDBG("par mutexes not stable, iterating\n");
+ __kmp_bitset_copy(region->mutexset, par_held);
+ __kmp_bitset_free(thread, par_held);
}
- __kmp_bitset_free(thread, conflicts);
- return;
}
+ __kmp_bitset_free(thread, conflicts);
+ return;
+ }
}
}
-static int
-__kmp_popcount_cmp(const void *a, const void *b) {
- const kmp_taskgraph_region_t *reg_a = *(kmp_taskgraph_region_t **) a;
- const kmp_taskgraph_region_t *reg_b = *(kmp_taskgraph_region_t **) b;
+static int __kmp_popcount_cmp(const void *a, const void *b) {
+ const kmp_taskgraph_region_t *reg_a = *(kmp_taskgraph_region_t **)a;
+ const kmp_taskgraph_region_t *reg_b = *(kmp_taskgraph_region_t **)b;
kmp_int32 popc_a = 0, popc_b = 0;
if (reg_a->mutexset)
popc_a = __kmp_bitset_popcount(reg_a->mutexset);
@@ -2576,179 +2521,172 @@ __kmp_popcount_cmp(const void *a, const void *b) {
/// Find "mutexinoutset" regions that can be represented without explicit
// mutexes, i.e. using "TASKGRAPH_REGION_EXCLUSIVE".
-static void
-__kmp_taskgraph_find_exclusive_regions(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t **&alloc_chain,
- kmp_taskgraph_region_t **region_p) {
+static void __kmp_taskgraph_find_exclusive_regions(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t **&alloc_chain, kmp_taskgraph_region_t **region_p) {
kmp_taskgraph_region_t *region = *region_p;
switch (region->type) {
- case TASKGRAPH_REGION_ENTRY:
- case TASKGRAPH_REGION_EXIT:
- case TASKGRAPH_REGION_NODE:
- case TASKGRAPH_REGION_WAIT:
- break;
- case TASKGRAPH_REGION_SEQUENTIAL:
- case TASKGRAPH_REGION_PARALLEL: {
- for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
- __kmp_taskgraph_find_exclusive_regions(thread, taskgraph, alloc_chain,
- ®ion->inner.children[c]);
- }
- break;
- }
- case TASKGRAPH_REGION_EXCLUSIVE: {
- qsort(region->inner.children, region->inner.num_children,
- sizeof(kmp_taskgraph_region_t *), __kmp_popcount_cmp);
- for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
- TGDBG("building tree: region mutexset = 0x%llx\n",
- (unsigned long long) region->inner.children[c]->mutexset
- ? region->inner.children[c]->mutexset->bits[0] : 0);
- region->inner.children[c]->mark = TASKGRAPH_UNMARKED;
- }
- kmp_bitset_t *conflicts =
+ case TASKGRAPH_REGION_ENTRY:
+ case TASKGRAPH_REGION_EXIT:
+ case TASKGRAPH_REGION_NODE:
+ case TASKGRAPH_REGION_WAIT:
+ break;
+ case TASKGRAPH_REGION_SEQUENTIAL:
+ case TASKGRAPH_REGION_PARALLEL: {
+ for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
+ __kmp_taskgraph_find_exclusive_regions(thread, taskgraph, alloc_chain,
+ ®ion->inner.children[c]);
+ }
+ break;
+ }
+ case TASKGRAPH_REGION_EXCLUSIVE: {
+ qsort(region->inner.children, region->inner.num_children,
+ sizeof(kmp_taskgraph_region_t *), __kmp_popcount_cmp);
+ for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
+ TGDBG("building tree: region mutexset = 0x%llx\n",
+ (unsigned long long)region->inner.children[c]->mutexset
+ ? region->inner.children[c]->mutexset->bits[0]
+ : 0);
+ region->inner.children[c]->mark = TASKGRAPH_UNMARKED;
+ }
+ kmp_bitset_t *conflicts =
__kmp_bitset_alloc(thread, region->mutexset->bitsize);
- kmp_bitset_t *subsets_cover =
+ kmp_bitset_t *subsets_cover =
__kmp_bitset_alloc(thread, region->mutexset->bitsize);
- __kmp_bitset_copy(conflicts, region->mutexset);
- bool irregular = false;
- kmp_int32 combined_children = 0;
- for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
- kmp_bitset_t *candidate = region->inner.children[c]->mutexset;
- if (__kmp_bitset_empty_p(candidate))
- continue;
- __kmp_bitset_clearall(subsets_cover);
- bool found_subset = false;
- bool other_overlaps = false;
- for (kmp_int32 d = c + 1; d < region->inner.num_children; d++) {
- // This could test for a subset in some cases, but that adds
- // complication for later processing. Maybe revisit later if it
- // seems worthwhile.
- // E.g. if we have deps like this:
- //
- // #pragma omp task depend(mutexinoutset: deps[0], deps[1]) { /*a*/ }
- // #pragma omp task depend(mutexinoutset: deps[0]) { /*b*/ }
- // #pragma omp task depend(mutexinoutset: deps[1]) { /*c*/ }
- //
- // This could be represented as:
- //
- // exclusive {
- // node: a
- // parallel {
- // node: b
- // node: c
- // }
- // }
- //
- // We're not doing that yet though.
- if (__kmp_bitset_equal(candidate,
- region->inner.children[d]->mutexset)) {
- found_subset = true;
- __kmp_bitset_or(subsets_cover, subsets_cover,
- region->inner.children[d]->mutexset);
- } else if (__kmp_bitset_intersect_p(
+ __kmp_bitset_copy(conflicts, region->mutexset);
+ bool irregular = false;
+ kmp_int32 combined_children = 0;
+ for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
+ kmp_bitset_t *candidate = region->inner.children[c]->mutexset;
+ if (__kmp_bitset_empty_p(candidate))
+ continue;
+ __kmp_bitset_clearall(subsets_cover);
+ bool found_subset = false;
+ bool other_overlaps = false;
+ for (kmp_int32 d = c + 1; d < region->inner.num_children; d++) {
+ // This could test for a subset in some cases, but that adds
+ // complication for later processing. Maybe revisit later if it
+ // seems worthwhile.
+ // E.g. if we have deps like this:
+ //
+ // #pragma omp task depend(mutexinoutset: deps[0], deps[1]) { /*a*/ }
+ // #pragma omp task depend(mutexinoutset: deps[0]) { /*b*/ }
+ // #pragma omp task depend(mutexinoutset: deps[1]) { /*c*/ }
+ //
+ // This could be represented as:
+ //
+ // exclusive {
+ // node: a
+ // parallel {
+ // node: b
+ // node: c
+ // }
+ // }
+ //
+ // We're not doing that yet though.
+ if (__kmp_bitset_equal(candidate,
+ region->inner.children[d]->mutexset)) {
+ found_subset = true;
+ __kmp_bitset_or(subsets_cover, subsets_cover,
+ region->inner.children[d]->mutexset);
+ } else if (__kmp_bitset_intersect_p(
candidate, region->inner.children[d]->mutexset)) {
- other_overlaps = true;
- break;
- }
- }
- if (!found_subset || other_overlaps)
- continue;
- if (!__kmp_bitset_equal(subsets_cover, candidate)) {
- TGDBG("subsets cover: 0x%llx, candidate: 0x%llx\n",
- (unsigned long long)subsets_cover->bits[0],
- (unsigned long long)candidate->bits[0]);
- irregular = true;
+ other_overlaps = true;
break;
}
- for (kmp_int32 d = c + 1; d < region->inner.num_children; d++) {
- if (region->inner.children[d]->mutexset_parent)
- continue;
- // As above wrt. subsets.
- if (__kmp_bitset_equal(candidate,
- region->inner.children[d]->mutexset)) {
- TGDBG("set index %d's parent to index %d\n", d, c);
- region->inner.children[d]->mutexset_parent =
+ }
+ if (!found_subset || other_overlaps)
+ continue;
+ if (!__kmp_bitset_equal(subsets_cover, candidate)) {
+ TGDBG("subsets cover: 0x%llx, candidate: 0x%llx\n",
+ (unsigned long long)subsets_cover->bits[0],
+ (unsigned long long)candidate->bits[0]);
+ irregular = true;
+ break;
+ }
+ for (kmp_int32 d = c + 1; d < region->inner.num_children; d++) {
+ if (region->inner.children[d]->mutexset_parent)
+ continue;
+ // As above wrt. subsets.
+ if (__kmp_bitset_equal(candidate,
+ region->inner.children[d]->mutexset)) {
+ TGDBG("set index %d's parent to index %d\n", d, c);
+ region->inner.children[d]->mutexset_parent =
region->inner.children[c];
- combined_children++;
- __kmp_bitset_and_not(conflicts, conflicts, candidate);
- }
+ combined_children++;
+ __kmp_bitset_and_not(conflicts, conflicts, candidate);
}
}
- TGDBG("irregular: %s\n", irregular ? "true" : "false");
- TGDBG("final conflicts: 0x%llx\n",
- (unsigned long long)conflicts->bits[0]);
- __kmp_bitset_free(thread, subsets_cover);
- region->type = TASKGRAPH_REGION_PARALLEL;
- if (!irregular && __kmp_bitset_empty_p(conflicts)) {
- TGDBG("transforming exclusive region %p\n", region);
- TGDBG("orig region children: %d\n", region->inner.num_children);
- TGDBG("combined children: %d\n", combined_children);
- if (region->inner.num_children == combined_children + 1) {
- region->type = TASKGRAPH_REGION_EXCLUSIVE;
- } else {
- kmp_taskgraph_region_t *new_par =
- __kmp_taskgraph_region_alloc(thread, taskgraph, alloc_chain,
- TASKGRAPH_REGION_PARALLEL,
- region->inner.num_children -
- combined_children,
- nullptr);
- for (kmp_int32 c = region->inner.num_children - 1; c >= 0; c--) {
- kmp_taskgraph_region_t *child = region->inner.children[c];
- // Make mutex set into a circular list.
- if (child->mutexset_parent && child->mark != TASKGRAPH_TEMP_MARK) {
- if (!child->mutexset_parent->mutexset_parent) {
- // child <-> parent
- child->mutexset_parent->mutexset_parent = child;
- child->mutexset_parent->mark = TASKGRAPH_TEMP_MARK;
- } else {
- kmp_taskgraph_region_t *parent = child->mutexset_parent;
- child->mutexset_parent = parent->mutexset_parent;
- parent->mutexset_parent = child;
- parent->mark = TASKGRAPH_TEMP_MARK;
- }
+ }
+ TGDBG("irregular: %s\n", irregular ? "true" : "false");
+ TGDBG("final conflicts: 0x%llx\n", (unsigned long long)conflicts->bits[0]);
+ __kmp_bitset_free(thread, subsets_cover);
+ region->type = TASKGRAPH_REGION_PARALLEL;
+ if (!irregular && __kmp_bitset_empty_p(conflicts)) {
+ TGDBG("transforming exclusive region %p\n", region);
+ TGDBG("orig region children: %d\n", region->inner.num_children);
+ TGDBG("combined children: %d\n", combined_children);
+ if (region->inner.num_children == combined_children + 1) {
+ region->type = TASKGRAPH_REGION_EXCLUSIVE;
+ } else {
+ kmp_taskgraph_region_t *new_par = __kmp_taskgraph_region_alloc(
+ thread, taskgraph, alloc_chain, TASKGRAPH_REGION_PARALLEL,
+ region->inner.num_children - combined_children, nullptr);
+ for (kmp_int32 c = region->inner.num_children - 1; c >= 0; c--) {
+ kmp_taskgraph_region_t *child = region->inner.children[c];
+ // Make mutex set into a circular list.
+ if (child->mutexset_parent && child->mark != TASKGRAPH_TEMP_MARK) {
+ if (!child->mutexset_parent->mutexset_parent) {
+ // child <-> parent
+ child->mutexset_parent->mutexset_parent = child;
+ child->mutexset_parent->mark = TASKGRAPH_TEMP_MARK;
+ } else {
+ kmp_taskgraph_region_t *parent = child->mutexset_parent;
+ child->mutexset_parent = parent->mutexset_parent;
+ parent->mutexset_parent = child;
+ parent->mark = TASKGRAPH_TEMP_MARK;
}
}
- kmp_int32 idx = 0;
- for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
- kmp_taskgraph_region_t *child = region->inner.children[c];
- TGDBG("process child: %p\n", child);
- if (child->mutexset_parent && child->mark != TASKGRAPH_COMBINED) {
- kmp_int32 elems = 0;
- kmp_taskgraph_region_t *next = child;
- do {
- elems++;
- next = next->mutexset_parent;
- } while (next != child);
- TGDBG("make exclusive region with %d children\n", elems);
- kmp_taskgraph_region_t *excl_region =
- __kmp_taskgraph_region_alloc(thread, taskgraph, alloc_chain,
- TASKGRAPH_REGION_EXCLUSIVE, elems,
- nullptr);
- kmp_int32 excl_child = 0;
- next = child;
- do {
- excl_region->inner.children[excl_child++] = next;
- next->mark = TASKGRAPH_COMBINED;
- next = next->mutexset_parent;
- } while (next != child);
- assert(excl_child == excl_region->inner.num_children);
- new_par->inner.children[idx++] = excl_region;
- } else if (!child->mutexset_parent) {
- new_par->inner.children[idx++] = child;
- }
+ }
+ kmp_int32 idx = 0;
+ for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
+ kmp_taskgraph_region_t *child = region->inner.children[c];
+ TGDBG("process child: %p\n", child);
+ if (child->mutexset_parent && child->mark != TASKGRAPH_COMBINED) {
+ kmp_int32 elems = 0;
+ kmp_taskgraph_region_t *next = child;
+ do {
+ elems++;
+ next = next->mutexset_parent;
+ } while (next != child);
+ TGDBG("make exclusive region with %d children\n", elems);
+ kmp_taskgraph_region_t *excl_region = __kmp_taskgraph_region_alloc(
+ thread, taskgraph, alloc_chain, TASKGRAPH_REGION_EXCLUSIVE,
+ elems, nullptr);
+ kmp_int32 excl_child = 0;
+ next = child;
+ do {
+ excl_region->inner.children[excl_child++] = next;
+ next->mark = TASKGRAPH_COMBINED;
+ next = next->mutexset_parent;
+ } while (next != child);
+ assert(excl_child == excl_region->inner.num_children);
+ new_par->inner.children[idx++] = excl_region;
+ } else if (!child->mutexset_parent) {
+ new_par->inner.children[idx++] = child;
}
- TGDBG("idx=%d, supposed to be %d\n", idx,
- new_par->inner.num_children);
- assert(idx == new_par->inner.num_children);
- *region_p = new_par;
- region->mark = TASKGRAPH_DELETED;
}
+ TGDBG("idx=%d, supposed to be %d\n", idx, new_par->inner.num_children);
+ assert(idx == new_par->inner.num_children);
+ *region_p = new_par;
+ region->mark = TASKGRAPH_DELETED;
}
- __kmp_bitset_free(thread, conflicts);
- break;
}
- default:
- assert(false && "unreachable");
+ __kmp_bitset_free(thread, conflicts);
+ break;
+ }
+ default:
+ assert(false && "unreachable");
}
}
@@ -2760,59 +2698,55 @@ __kmp_taskgraph_strip_mutex_sets(kmp_info_t *thread,
bool in_exclusive = false) {
kmp_int32 mutexes_needed = 0;
switch (region->type) {
- case TASKGRAPH_REGION_ENTRY:
- case TASKGRAPH_REGION_EXIT:
- case TASKGRAPH_REGION_WAIT:
- assert(!region->mutexset);
- break;
- case TASKGRAPH_REGION_NODE:
- if (region->mutexset) {
- if (in_exclusive) {
- __kmp_bitset_free(thread, region->mutexset);
- region->mutexset = nullptr;
- } else {
- // FIXME: This might be pessimistic -- the remaining mutex sets might
- // have holes or duplicates. We could compact them.
- kmp_int32 m = region->mutexset->bitsize;
- mutexes_needed = std::max(mutexes_needed, m);
- }
- }
- break;
- case TASKGRAPH_REGION_EXCLUSIVE: {
- if (region->mutexset) {
+ case TASKGRAPH_REGION_ENTRY:
+ case TASKGRAPH_REGION_EXIT:
+ case TASKGRAPH_REGION_WAIT:
+ assert(!region->mutexset);
+ break;
+ case TASKGRAPH_REGION_NODE:
+ if (region->mutexset) {
+ if (in_exclusive) {
__kmp_bitset_free(thread, region->mutexset);
region->mutexset = nullptr;
- }
- for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
- kmp_int32 m =
- __kmp_taskgraph_strip_mutex_sets(thread, region->inner.children[c],
- true);
+ } else {
+ // FIXME: This might be pessimistic -- the remaining mutex sets might
+ // have holes or duplicates. We could compact them.
+ kmp_int32 m = region->mutexset->bitsize;
mutexes_needed = std::max(mutexes_needed, m);
}
- break;
}
- default: {
- if (region->mutexset) {
- __kmp_bitset_free(thread, region->mutexset);
- region->mutexset = nullptr;
- }
- for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
- kmp_int32 m =
- __kmp_taskgraph_strip_mutex_sets(thread, region->inner.children[c],
- in_exclusive);
- mutexes_needed = std::max(mutexes_needed, m);
- }
+ break;
+ case TASKGRAPH_REGION_EXCLUSIVE: {
+ if (region->mutexset) {
+ __kmp_bitset_free(thread, region->mutexset);
+ region->mutexset = nullptr;
+ }
+ for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
+ kmp_int32 m = __kmp_taskgraph_strip_mutex_sets(
+ thread, region->inner.children[c], true);
+ mutexes_needed = std::max(mutexes_needed, m);
+ }
+ break;
+ }
+ default: {
+ if (region->mutexset) {
+ __kmp_bitset_free(thread, region->mutexset);
+ region->mutexset = nullptr;
}
+ for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
+ kmp_int32 m = __kmp_taskgraph_strip_mutex_sets(
+ thread, region->inner.children[c], in_exclusive);
+ mutexes_needed = std::max(mutexes_needed, m);
+ }
+ }
}
return mutexes_needed;
}
-static void
-__kmp_taskgraph_exclusive_regions(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t **&alloc_chain,
- kmp_taskgraph_region_t **region_p,
- kmp_int32 max_mutex) {
+static void __kmp_taskgraph_exclusive_regions(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t **&alloc_chain, kmp_taskgraph_region_t **region_p,
+ kmp_int32 max_mutex) {
kmp_bitset_t *top = __kmp_bitset_alloc(thread, max_mutex);
__kmp_bitset_clearall(top);
__kmp_taskgraph_gather_mutex_sets(thread, *region_p, top);
@@ -2822,64 +2756,72 @@ __kmp_taskgraph_exclusive_regions(kmp_info_t *thread,
taskgraph->num_mutexes = num_mutexes;
}
-static const char*
+static const char *
__kmp_taskgraph_region_type_name(kmp_taskgraph_region_type type) {
switch (type) {
- case TASKGRAPH_REGION_ENTRY: return "entry";
- case TASKGRAPH_REGION_EXIT: return "exit";
- case TASKGRAPH_REGION_NODE: return "node";
- case TASKGRAPH_REGION_WAIT: return "wait";
- case TASKGRAPH_REGION_PARALLEL: return "parallel";
- case TASKGRAPH_REGION_EXCLUSIVE: return "exclusive";
- case TASKGRAPH_REGION_SEQUENTIAL: return "sequential";
- case TASKGRAPH_REGION_IRREDUCIBLE: return "irreducible";
- default: return "<unknown>";
+ case TASKGRAPH_REGION_ENTRY:
+ return "entry";
+ case TASKGRAPH_REGION_EXIT:
+ return "exit";
+ case TASKGRAPH_REGION_NODE:
+ return "node";
+ case TASKGRAPH_REGION_WAIT:
+ return "wait";
+ case TASKGRAPH_REGION_PARALLEL:
+ return "parallel";
+ case TASKGRAPH_REGION_EXCLUSIVE:
+ return "exclusive";
+ case TASKGRAPH_REGION_SEQUENTIAL:
+ return "sequential";
+ case TASKGRAPH_REGION_IRREDUCIBLE:
+ return "irreducible";
+ default:
+ return "<unknown>";
}
}
#if defined(KMP_DEBUG) || defined(DEBUG_TASKGRAPH)
-static void
-__kmp_dump_taskgraph_regions(FILE *f, kmp_taskgraph_region_t *region,
- int indent = 0) {
+static void __kmp_dump_taskgraph_regions(FILE *f,
+ kmp_taskgraph_region_t *region,
+ int indent = 0) {
switch (region->type) {
- case TASKGRAPH_REGION_ENTRY:
- case TASKGRAPH_REGION_EXIT:
- fprintf(f, "%*s%s node\n", indent, "",
- __kmp_taskgraph_region_type_name(region->type));
- break;
- case TASKGRAPH_REGION_NODE:
- case TASKGRAPH_REGION_WAIT: {
- char set_membership[40];
- if (region->mutexset)
- sprintf(set_membership, " [sets: 0x%llx]",
- (unsigned long long) region->mutexset->bits[0]);
- else
- strcpy(set_membership, "");
- if (region->task.node->u.resolved.count > 1)
- fprintf(f, "%*s%s: %p (* %d)%s\n", indent, "",
- __kmp_taskgraph_region_type_name(region->type),
- region->task.node, region->task.node->u.resolved.count,
- set_membership);
- else
- fprintf(f, "%*s%s: %p%s\n", indent, "",
- __kmp_taskgraph_region_type_name(region->type),
- region->task.node, set_membership);
- break;
- }
- default: {
- char set_membership[40];
- if (region->mutexset)
- sprintf(set_membership, " [sets: 0x%llx]",
- (unsigned long long) region->mutexset->bits[0]);
- else
- strcpy(set_membership, "");
- fprintf(f, "%*s%s%s {\n", indent, "",
- __kmp_taskgraph_region_type_name (region->type), set_membership);
- for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
- __kmp_dump_taskgraph_regions(f, region->inner.children[c], indent + 2);
- }
- fprintf(f, "%*s}\n", indent, "");
+ case TASKGRAPH_REGION_ENTRY:
+ case TASKGRAPH_REGION_EXIT:
+ fprintf(f, "%*s%s node\n", indent, "",
+ __kmp_taskgraph_region_type_name(region->type));
+ break;
+ case TASKGRAPH_REGION_NODE:
+ case TASKGRAPH_REGION_WAIT: {
+ char set_membership[40];
+ if (region->mutexset)
+ sprintf(set_membership, " [sets: 0x%llx]",
+ (unsigned long long)region->mutexset->bits[0]);
+ else
+ strcpy(set_membership, "");
+ if (region->task.node->u.resolved.count > 1)
+ fprintf(f, "%*s%s: %p (* %d)%s\n", indent, "",
+ __kmp_taskgraph_region_type_name(region->type), region->task.node,
+ region->task.node->u.resolved.count, set_membership);
+ else
+ fprintf(f, "%*s%s: %p%s\n", indent, "",
+ __kmp_taskgraph_region_type_name(region->type), region->task.node,
+ set_membership);
+ break;
+ }
+ default: {
+ char set_membership[40];
+ if (region->mutexset)
+ sprintf(set_membership, " [sets: 0x%llx]",
+ (unsigned long long)region->mutexset->bits[0]);
+ else
+ strcpy(set_membership, "");
+ fprintf(f, "%*s%s%s {\n", indent, "",
+ __kmp_taskgraph_region_type_name(region->type), set_membership);
+ for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
+ __kmp_dump_taskgraph_regions(f, region->inner.children[c], indent + 2);
}
+ fprintf(f, "%*s}\n", indent, "");
+ }
}
}
#endif
@@ -2903,18 +2845,17 @@ __kmp_dump_find_parent_regions(kmp_info *thd, kmp_taskgraph_record_t *taskgraph,
if (!in_list) {
list = __kmp_region_deplist_add(thd, &taskgraph->recycled_deps,
region[r].parent, list);
- list = __kmp_dump_find_parent_regions(thd, taskgraph, region[r].parent,
- 1, list);
+ list = __kmp_dump_find_parent_regions(thd, taskgraph, region[r].parent, 1,
+ list);
}
}
return list;
}
-static void
-__kmp_dump_raw_taskgraph_regions(FILE *f, kmp_info *thd,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t *region,
- int numregions, int indent = 0) {
+static void __kmp_dump_raw_taskgraph_regions(FILE *f, kmp_info *thd,
+ kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t *region,
+ int numregions, int indent = 0) {
kmp_taskgraph_region_dep_t *parentlist = nullptr;
kmp_taskgraph_region_dep_t *printedlist = nullptr;
for (int r = 0; r < numregions; r++) {
@@ -2924,24 +2865,23 @@ __kmp_dump_raw_taskgraph_regions(FILE *f, kmp_info *thd,
region[r].type == TASKGRAPH_REGION_EXCLUSIVE ||
region[r].type == TASKGRAPH_REGION_IRREDUCIBLE)
children = region[r].inner.num_children;
- fprintf(f,
- "%*sregion %d (%p): %s%s (%d children) parent %p succs %d preds %d\n",
- indent, "", r, ®ion[r],
- __kmp_taskgraph_region_type_name(region[r].type),
- region[r].mark == TASKGRAPH_COMBINED ? " (combined)" : "",
- children, region[r].parent,
- __kmp_region_deplist_len(region[r].successors),
- __kmp_region_deplist_len(region[r].predecessors));
+ fprintf(
+ f,
+ "%*sregion %d (%p): %s%s (%d children) parent %p succs %d preds %d\n",
+ indent, "", r, ®ion[r],
+ __kmp_taskgraph_region_type_name(region[r].type),
+ region[r].mark == TASKGRAPH_COMBINED ? " (combined)" : "", children,
+ region[r].parent, __kmp_region_deplist_len(region[r].successors),
+ __kmp_region_deplist_len(region[r].predecessors));
if (children > 0) {
for (int c = 0; c < children; c++)
- __kmp_dump_raw_taskgraph_regions(f, thd, taskgraph,
- region->inner.children[c], 1,
- indent + 2);
+ __kmp_dump_raw_taskgraph_regions(
+ f, thd, taskgraph, region->inner.children[c], 1, indent + 2);
}
}
if (indent == 0) {
- parentlist = __kmp_dump_find_parent_regions(thd, taskgraph, region,
- numregions);
+ parentlist =
+ __kmp_dump_find_parent_regions(thd, taskgraph, region, numregions);
fprintf(stderr, "%*sfound %d parent region(s):\n", indent, "",
__kmp_region_deplist_len(parentlist));
for (kmp_taskgraph_region_dep_t *p = parentlist; p; p = p->next) {
@@ -3079,7 +3019,7 @@ __kmp_dump_raw_taskgraph_regions(FILE *f, kmp_info *thd,
// number of places in the graph. Care must be taken at replay time that all
// nodes preceding a multiply-instantiated node execute before the node, and
// that all nodes succeeding each "instantiation point" are executed once the
-// task has executed.
+// task has executed.
//
// The final region type is "exclusive", which arises for "mutexinoutset"
// dependencies that are able to be abstracted away (we can't do this in all
@@ -3134,9 +3074,9 @@ __kmp_dump_raw_taskgraph_regions(FILE *f, kmp_info *thd,
// annotated with a set of mutexes that must be held while executing the task.
// (Shown with [sets: 0xN] in dump output).
-kmp_int32
-__kmp_build_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
- kmp_taskgraph_record_t *taskgraph) {
+kmp_int32 __kmp_build_taskgraph(kmp_int32 gtid,
+ kmp_taskdata_t *current_taskdata,
+ kmp_taskgraph_record_t *taskgraph) {
kmp_int32 numnodes = taskgraph->num_tasks;
kmp_int32 numregions = numnodes + 2;
kmp_taskgraph_node_t *nodes = taskgraph->record_map;
@@ -3144,9 +3084,8 @@ __kmp_build_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
kmp_dephash_t *hash = __kmp_dephash_create(thread, current_taskdata);
bool dep_barrier = false;
- kmp_depnode_t *all_depnodes =
- (kmp_depnode_t *)__kmp_thread_malloc(thread,
- numregions * sizeof(kmp_depnode_t));
+ kmp_depnode_t *all_depnodes = (kmp_depnode_t *)__kmp_thread_malloc(
+ thread, numregions * sizeof(kmp_depnode_t));
kmp_int32 next_mutex_set = 0;
@@ -3162,10 +3101,9 @@ __kmp_build_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
node->dn.task = nodes[i].task;
dep_barrier = !nodes[i].task && nodes[i].taskloop_task;
if (!dep_all) {
- __kmp_process_deps<taskgraph_deps>(gtid, node, &hash, dep_barrier,
- nodes[i].u.unresolved.ndeps,
- nodes[i].u.unresolved.dep_list,
- nodes[i].task, next_mutex_set);
+ __kmp_process_deps<taskgraph_deps>(
+ gtid, node, &hash, dep_barrier, nodes[i].u.unresolved.ndeps,
+ nodes[i].u.unresolved.dep_list, nodes[i].task, next_mutex_set);
} else {
__kmp_process_dep_all<taskgraph_deps>(gtid, node, hash, dep_barrier,
nodes[i].task);
@@ -3176,16 +3114,16 @@ __kmp_build_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
kmp_int32 outidx = 0;
kmp_taskgraph_region_t *initial_regions =
- (kmp_taskgraph_region_t *)__kmp_fast_allocate(thread,
- sizeof(kmp_taskgraph_region_t) * numregions);
+ (kmp_taskgraph_region_t *)__kmp_fast_allocate(
+ thread, sizeof(kmp_taskgraph_region_t) * numregions);
// FIXME: Something like 'placement new' here?
memset(initial_regions, 0, sizeof(kmp_taskgraph_region_t) * numregions);
kmp_taskgraph_region_t *cfg_barrier = nullptr;
for (kmp_int32 i = 0; i < numnodes; i++) {
- initial_regions[i].type = nodes[i].task ? TASKGRAPH_REGION_NODE
- : TASKGRAPH_REGION_WAIT;
+ initial_regions[i].type =
+ nodes[i].task ? TASKGRAPH_REGION_NODE : TASKGRAPH_REGION_WAIT;
initial_regions[i].task.node = &nodes[i];
initial_regions[i].task.next_instance = &initial_regions[i];
initial_regions[i].parent = nullptr;
@@ -3196,17 +3134,16 @@ __kmp_build_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
}
kmp_depnode_t *depnode = &all_depnodes[i];
initial_regions[i].mutexset = depnode->dn.set_membership;
- for (kmp_depnode_list_t *succ = depnode->dn.successors;
- succ;
+ for (kmp_depnode_list_t *succ = depnode->dn.successors; succ;
succ = succ->next) {
kmp_int32 succ_idx = succ->node - all_depnodes;
kmp_taskgraph_region_t *tg_succ = &initial_regions[succ_idx];
tg_succ->predecessors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
- &initial_regions[i], tg_succ->predecessors);
+ __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
+ &initial_regions[i], tg_succ->predecessors);
initial_regions[i].successors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps, tg_succ,
- initial_regions[i].successors);
+ __kmp_region_deplist_add(thread, &taskgraph->recycled_deps, tg_succ,
+ initial_regions[i].successors);
}
// Handle control flow dependencies. If a node (e.g. a taskloop task) has
// a wait after it corresponding to the end of an implicit taskgroup, join
@@ -3214,24 +3151,22 @@ __kmp_build_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
// it will depend on the barrier.
if (nodes[i].u.unresolved.cfg_successor != -1) {
kmp_int32 cfg_succ = nodes[i].u.unresolved.cfg_successor;
- initial_regions[i].successors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
- &initial_regions[cfg_succ],
- initial_regions[i].successors);
- initial_regions[cfg_succ].predecessors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
- &initial_regions[i],
- initial_regions[cfg_succ].predecessors);
+ initial_regions[i].successors = __kmp_region_deplist_add(
+ thread, &taskgraph->recycled_deps, &initial_regions[cfg_succ],
+ initial_regions[i].successors);
+ initial_regions[cfg_succ].predecessors = __kmp_region_deplist_add(
+ thread, &taskgraph->recycled_deps, &initial_regions[i],
+ initial_regions[cfg_succ].predecessors);
}
if (nodes[i].taskloop_task && !nodes[i].task) {
cfg_barrier = &initial_regions[i];
} else if (cfg_barrier) {
- cfg_barrier->successors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
- &initial_regions[i], cfg_barrier->successors);
- initial_regions[i].predecessors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
- cfg_barrier, initial_regions[i].predecessors);
+ cfg_barrier->successors = __kmp_region_deplist_add(
+ thread, &taskgraph->recycled_deps, &initial_regions[i],
+ cfg_barrier->successors);
+ initial_regions[i].predecessors = __kmp_region_deplist_add(
+ thread, &taskgraph->recycled_deps, cfg_barrier,
+ initial_regions[i].predecessors);
}
}
@@ -3261,21 +3196,19 @@ __kmp_build_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
kmp_int32 nsuccs = __kmp_region_deplist_len(region->successors);
if (npreds == 0) {
initial_regions[entryregion].successors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps, region,
- initial_regions[entryregion].successors);
- region->predecessors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
- &initial_regions[entryregion],
- region->predecessors);
+ __kmp_region_deplist_add(thread, &taskgraph->recycled_deps, region,
+ initial_regions[entryregion].successors);
+ region->predecessors = __kmp_region_deplist_add(
+ thread, &taskgraph->recycled_deps, &initial_regions[entryregion],
+ region->predecessors);
}
if (nsuccs == 0) {
initial_regions[exitregion].predecessors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps, region,
- initial_regions[exitregion].predecessors);
- region->successors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
- &initial_regions[exitregion],
- region->successors);
+ __kmp_region_deplist_add(thread, &taskgraph->recycled_deps, region,
+ initial_regions[exitregion].predecessors);
+ region->successors = __kmp_region_deplist_add(
+ thread, &taskgraph->recycled_deps, &initial_regions[exitregion],
+ region->successors);
}
region->owner = taskgraph;
}
@@ -3287,9 +3220,8 @@ __kmp_build_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
for (kmp_int32 i = 0; i < numregions; i++) {
if (initial_regions[i].mark == TASKGRAPH_UNMARKED) {
- kmp_int32 level =
- __kmp_taskgraph_topological_order(&initial_regions[i], order_out,
- &outidx);
+ kmp_int32 level = __kmp_taskgraph_topological_order(&initial_regions[i],
+ order_out, &outidx);
max_level = level > max_level ? level : max_level;
}
}
@@ -3307,10 +3239,9 @@ __kmp_build_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
kmp_taskgraph_region_t **alloc_chain = &initial_regions[0].alloc_chain;
- kmp_taskgraph_region_t *root_region =
- __kmp_taskgraph_build_regions(thread, taskgraph, alloc_chain,
- &initial_regions[entryregion],
- &initial_regions[exitregion]);
+ kmp_taskgraph_region_t *root_region = __kmp_taskgraph_build_regions(
+ thread, taskgraph, alloc_chain, &initial_regions[entryregion],
+ &initial_regions[exitregion]);
__kmp_taskgraph_count_nodes(root_region);
@@ -3351,14 +3282,14 @@ __kmp_build_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
taskgraph->recycled_deps = nullptr;
KG_TRACE(10, ("Processed taskgraph %p (graph_id %" PRIx64 "):\n", taskgraph,
- taskgraph->graph_id));
+ taskgraph->graph_id));
KG_DUMP(10, __kmp_dump_taskgraph_regions(stderr, root_region));
- #ifdef DEBUG_TASKGRAPH
- //__kmp_dump_taskgraph_regions(stderr, root_region);
- //__kmp_dump_raw_taskgraph_regions(stderr, thread, taskgraph,
- // &initial_regions[0], numregions);
- #endif
+#ifdef DEBUG_TASKGRAPH
+//__kmp_dump_taskgraph_regions(stderr, root_region);
+//__kmp_dump_raw_taskgraph_regions(stderr, thread, taskgraph,
+// &initial_regions[0], numregions);
+#endif
KMP_ATOMIC_ST_REL(&taskgraph->status, KMP_TDG_READY);
@@ -3395,16 +3326,14 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
kmp_int32 next_mutex = 0;
if (!dep_all) { // regular dependences
- npredecessors =
- __kmp_process_deps<normal_deps>(gtid, node, hash, dep_barrier,
- ndeps, dep_list, task, next_mutex);
- npredecessors +=
- __kmp_process_deps<normal_deps>(gtid, node, hash, dep_barrier,
- ndeps_noalias, noalias_dep_list, task,
- next_mutex, false);
+ npredecessors = __kmp_process_deps<normal_deps>(
+ gtid, node, hash, dep_barrier, ndeps, dep_list, task, next_mutex);
+ npredecessors += __kmp_process_deps<normal_deps>(
+ gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list, task,
+ next_mutex, false);
} else { // omp_all_memory dependence
- npredecessors =
- __kmp_process_dep_all<normal_deps>(gtid, node, *hash, dep_barrier, task);
+ npredecessors = __kmp_process_dep_all<normal_deps>(gtid, node, *hash,
+ dep_barrier, task);
}
node->dn.task = task;
diff --git a/openmp/runtime/src/kmp_taskdeps.h b/openmp/runtime/src/kmp_taskdeps.h
index e7df68c3f..e7f237459 100644
--- a/openmp/runtime/src/kmp_taskdeps.h
+++ b/openmp/runtime/src/kmp_taskdeps.h
@@ -130,8 +130,8 @@ static inline void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) {
gtid, task));
KMP_ACQUIRE_DEPNODE(gtid, node);
- node->dn.task =
- NULL; // mark this task as finished, so no new dependencies are generated
+ node->dn.task =
+ NULL; // mark this task as finished, so no new dependencies are generated
KMP_RELEASE_DEPNODE(gtid, node);
kmp_depnode_list_t *next;
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 962609e53..8d625cd93 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -755,9 +755,9 @@ static bool __kmp_track_children_task(kmp_taskdata_t *taskdata) {
return ret;
}
-static bool __kmp_taskgraph_exec_descr_finish(kmp_int32 gtid,
- kmp_info_t *thread,
- kmp_taskgraph_exec_descr_t *descr);
+static bool
+__kmp_taskgraph_exec_descr_finish(kmp_int32 gtid, kmp_info_t *thread,
+ kmp_taskgraph_exec_descr_t *descr);
// __kmp_task_finish: bookkeeping to do when a task finishes execution
//
@@ -2109,43 +2109,35 @@ __kmp_fill_exec_descr(kmp_int32, kmp_info_t *, kmp_taskgraph_record_t *,
kmp_taskgraph_exec_descr_t *, kmp_size_t &,
kmp_taskgraph_exec_descr_t **);
-static kmp_int32
-__kmp_pred_list_length(kmp_taskgraph_exec_descr_t *desc) {
+static kmp_int32 __kmp_pred_list_length(kmp_taskgraph_exec_descr_t *desc) {
kmp_int32 res = 0;
for (; desc; desc = desc->predecessor_chain)
++res;
return res;
}
-static kmp_taskgraph_exec_descr_t *
-__kmp_fill_sequential_descr(kmp_int32 gtid, kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t *region,
- kmp_taskdata_t *parent_taskdata,
- kmp_taskgraph_exec_descr_t *exec_descrs,
- kmp_size_t &next_idx,
- kmp_taskgraph_exec_descr_t **succs_to_fill_p) {
+static kmp_taskgraph_exec_descr_t *__kmp_fill_sequential_descr(
+ kmp_int32 gtid, kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t *region, kmp_taskdata_t *parent_taskdata,
+ kmp_taskgraph_exec_descr_t *exec_descrs, kmp_size_t &next_idx,
+ kmp_taskgraph_exec_descr_t **succs_to_fill_p) {
assert(region->type == TASKGRAPH_REGION_SEQUENTIAL);
kmp_taskgraph_exec_descr_t *first_node = nullptr;
for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
- kmp_taskgraph_exec_descr *descr =
- __kmp_fill_exec_descr(gtid, thread, taskgraph, region->inner.children[c],
- parent_taskdata, exec_descrs, next_idx,
- succs_to_fill_p);
+ kmp_taskgraph_exec_descr *descr = __kmp_fill_exec_descr(
+ gtid, thread, taskgraph, region->inner.children[c], parent_taskdata,
+ exec_descrs, next_idx, succs_to_fill_p);
if (!first_node)
first_node = descr;
}
return first_node;
}
-static kmp_taskgraph_exec_descr_t *
-__kmp_fill_par_or_excl_descr(kmp_int32 gtid, kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t *region,
- kmp_taskdata_t *parent_taskdata,
- kmp_taskgraph_exec_descr_t *exec_descrs,
- kmp_size_t &next_idx,
- kmp_taskgraph_exec_descr_t **succs_to_fill_p) {
+static kmp_taskgraph_exec_descr_t *__kmp_fill_par_or_excl_descr(
+ kmp_int32 gtid, kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t *region, kmp_taskdata_t *parent_taskdata,
+ kmp_taskgraph_exec_descr_t *exec_descrs, kmp_size_t &next_idx,
+ kmp_taskgraph_exec_descr_t **succs_to_fill_p) {
assert(region->type == TASKGRAPH_REGION_PARALLEL ||
region->type == TASKGRAPH_REGION_EXCLUSIVE);
@@ -2168,10 +2160,9 @@ __kmp_fill_par_or_excl_descr(kmp_int32 gtid, kmp_info_t *thread,
for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
kmp_taskgraph_exec_descr_t *succs_to_fill = nullptr;
- kmp_taskgraph_exec_descr_t *head =
- __kmp_fill_exec_descr(gtid, thread, taskgraph, region->inner.children[c],
- parent_taskdata, exec_descrs, next_idx,
- &succs_to_fill);
+ kmp_taskgraph_exec_descr_t *head = __kmp_fill_exec_descr(
+ gtid, thread, taskgraph, region->inner.children[c], parent_taskdata,
+ exec_descrs, next_idx, &succs_to_fill);
if (!sibling_list) {
sibling_list = head;
sibling_list->sibling = head;
@@ -2205,106 +2196,101 @@ __kmp_fill_par_or_excl_descr(kmp_int32 gtid, kmp_info_t *thread,
return exec_descr;
}
-static kmp_taskgraph_exec_descr_t *
-__kmp_fill_exec_descr(kmp_int32 gtid, kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t *region,
- kmp_taskdata_t *parent_taskdata,
- kmp_taskgraph_exec_descr_t *exec_descrs,
- kmp_size_t &next_idx,
- kmp_taskgraph_exec_descr_t **succs_to_fill_p) {
+static kmp_taskgraph_exec_descr_t *__kmp_fill_exec_descr(
+ kmp_int32 gtid, kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t *region, kmp_taskdata_t *parent_taskdata,
+ kmp_taskgraph_exec_descr_t *exec_descrs, kmp_size_t &next_idx,
+ kmp_taskgraph_exec_descr_t **succs_to_fill_p) {
switch (region->type) {
- case TASKGRAPH_REGION_ENTRY:
- case TASKGRAPH_REGION_EXIT:
- break;
- case TASKGRAPH_REGION_NODE:
- case TASKGRAPH_REGION_WAIT: {
- kmp_taskgraph_exec_descr_t *incoming_succs_to_fill = *succs_to_fill_p;
- kmp_taskgraph_exec_descr_t *exec_descr = &exec_descrs[next_idx++];
- exec_descr->region = region;
- exec_descr->region->exec_descr = exec_descr;
- exec_descr->nblocks = region->task.node->u.resolved.count - 1;
- exec_descr->npredecessors = __kmp_pred_list_length(incoming_succs_to_fill);
- exec_descr->sibling = exec_descr;
- exec_descr->predecessor_chain = nullptr;
- exec_descr->successor = nullptr;
- exec_descr->next_instance = nullptr;
-
- // Edit the taskdata for this specific instantiation. At present the
- // task/taskdata structures cannot be used simultaneously by different
- // threads. We could duplicate the structures to allow simultaneous issue,
- // but that's not done yet. The exec_descr can already by thread-local,
- // in principle, but for now it points to the taskgraph's single copy
- // of each task/taskdata structure.
- if (region->type == TASKGRAPH_REGION_NODE) {
- kmp_task_t *task = exec_descr->region->task.node->task;
- kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
- taskdata->exec_descr = exec_descr;
- }
+ case TASKGRAPH_REGION_ENTRY:
+ case TASKGRAPH_REGION_EXIT:
+ break;
+ case TASKGRAPH_REGION_NODE:
+ case TASKGRAPH_REGION_WAIT: {
+ kmp_taskgraph_exec_descr_t *incoming_succs_to_fill = *succs_to_fill_p;
+ kmp_taskgraph_exec_descr_t *exec_descr = &exec_descrs[next_idx++];
+ exec_descr->region = region;
+ exec_descr->region->exec_descr = exec_descr;
+ exec_descr->nblocks = region->task.node->u.resolved.count - 1;
+ exec_descr->npredecessors = __kmp_pred_list_length(incoming_succs_to_fill);
+ exec_descr->sibling = exec_descr;
+ exec_descr->predecessor_chain = nullptr;
+ exec_descr->successor = nullptr;
+ exec_descr->next_instance = nullptr;
+
+ // Edit the taskdata for this specific instantiation. At present the
+ // task/taskdata structures cannot be used simultaneously by different
+ // threads. We could duplicate the structures to allow simultaneous issue,
+ // but that's not done yet. The exec_descr can already by thread-local,
+ // in principle, but for now it points to the taskgraph's single copy
+ // of each task/taskdata structure.
+ if (region->type == TASKGRAPH_REGION_NODE) {
+ kmp_task_t *task = exec_descr->region->task.node->task;
+ kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
+ taskdata->exec_descr = exec_descr;
+ }
- for (kmp_taskgraph_exec_descr_t *pred = incoming_succs_to_fill; pred;
- pred = pred->predecessor_chain) {
- pred->successor = exec_descr;
- }
+ for (kmp_taskgraph_exec_descr_t *pred = incoming_succs_to_fill; pred;
+ pred = pred->predecessor_chain) {
+ pred->successor = exec_descr;
+ }
- *succs_to_fill_p = exec_descr;
+ *succs_to_fill_p = exec_descr;
- return exec_descr;
- }
- case TASKGRAPH_REGION_SEQUENTIAL:
- return __kmp_fill_sequential_descr(gtid, thread, taskgraph, region,
- parent_taskdata, exec_descrs,
- next_idx, succs_to_fill_p);
- case TASKGRAPH_REGION_PARALLEL:
- case TASKGRAPH_REGION_EXCLUSIVE:
- return __kmp_fill_par_or_excl_descr(gtid, thread, taskgraph, region,
- parent_taskdata, exec_descrs,
- next_idx, succs_to_fill_p);
+ return exec_descr;
+ }
+ case TASKGRAPH_REGION_SEQUENTIAL:
+ return __kmp_fill_sequential_descr(gtid, thread, taskgraph, region,
+ parent_taskdata, exec_descrs, next_idx,
+ succs_to_fill_p);
+ case TASKGRAPH_REGION_PARALLEL:
+ case TASKGRAPH_REGION_EXCLUSIVE:
+ return __kmp_fill_par_or_excl_descr(gtid, thread, taskgraph, region,
+ parent_taskdata, exec_descrs, next_idx,
+ succs_to_fill_p);
}
return nullptr;
}
#ifdef DEBUG_TASKGRAPH
-static void
-__kmp_debug_taskgraph_exec_descr(kmp_taskgraph_exec_descr_t *descrs,
- kmp_size_t count) {
+static void __kmp_debug_taskgraph_exec_descr(kmp_taskgraph_exec_descr_t *descrs,
+ kmp_size_t count) {
fprintf(stderr, "digraph ExecDescr {\n");
fprintf(stderr, " end [shape=diamond]\n");
for (kmp_size_t i = 0; i < count; i++) {
kmp_taskgraph_exec_descr_t *descr = &descrs[i];
fprintf(stderr, " \"%p\" [label=< <B>", descr->region);
switch (descr->region->type) {
- case TASKGRAPH_REGION_PARALLEL:
- fprintf(stderr, "par</B> %p<BR/>preds=%d", descr->region,
+ case TASKGRAPH_REGION_PARALLEL:
+ fprintf(stderr, "par</B> %p<BR/>preds=%d", descr->region,
+ descr->npredecessors.load());
+ break;
+ case TASKGRAPH_REGION_EXCLUSIVE:
+ fprintf(stderr, "excl</B> %p<BR/>preds=%d", descr->region,
+ descr->npredecessors.load());
+ break;
+ case TASKGRAPH_REGION_NODE:
+ if (descr->region->task.node->u.resolved.count > 1) {
+ fprintf(stderr, "task</B> %p<BR/>preds=%d instances=%d",
+ descr->region->task.node, descr->npredecessors.load(),
+ descr->region->task.node->u.resolved.count);
+ } else {
+ fprintf(stderr, "task</B> %p<BR/>preds=%d", descr->region->task.node,
descr->npredecessors.load());
- break;
- case TASKGRAPH_REGION_EXCLUSIVE:
- fprintf(stderr, "excl</B> %p<BR/>preds=%d", descr->region,
+ }
+ break;
+ case TASKGRAPH_REGION_WAIT:
+ if (descr->region->task.node->u.resolved.count > 1) {
+ fprintf(stderr, "wait</B> %p<BR/>preds=%d instances=%d", descr->region,
+ descr->npredecessors.load(),
+ descr->region->task.node->u.resolved.count);
+ } else {
+ fprintf(stderr, "wait</B> %p<BR/>preds=%d", descr->region,
descr->npredecessors.load());
- break;
- case TASKGRAPH_REGION_NODE:
- if (descr->region->task.node->u.resolved.count > 1) {
- fprintf(stderr, "task</B> %p<BR/>preds=%d instances=%d",
- descr->region->task.node,
- descr->npredecessors.load(),
- descr->region->task.node->u.resolved.count);
- } else {
- fprintf(stderr, "task</B> %p<BR/>preds=%d", descr->region->task.node,
- descr->npredecessors.load());
- }
- break;
- case TASKGRAPH_REGION_WAIT:
- if (descr->region->task.node->u.resolved.count > 1) {
- fprintf(stderr, "wait</B> %p<BR/>preds=%d instances=%d",
- descr->region, descr->npredecessors.load(),
- descr->region->task.node->u.resolved.count);
- } else {
- fprintf(stderr, "wait</B> %p<BR/>preds=%d", descr->region,
- descr->npredecessors.load());
- }
- break;
- default:
- fprintf(stderr, "???</B>");
+ }
+ break;
+ default:
+ fprintf(stderr, "???</B>");
}
fprintf(stderr, " >, shape=box]\n");
@@ -2312,9 +2298,10 @@ __kmp_debug_taskgraph_exec_descr(kmp_taskgraph_exec_descr_t *descrs,
descr->region->type == TASKGRAPH_REGION_WAIT) &&
descr->region->task.node->u.resolved.count > 1) {
kmp_taskgraph_region_t *region = descr->region;
- fprintf(stderr,
- " \"%p\" -> \"%p\" [style=dotted, color=blue, constraint=false]\n",
- region, region->task.next_instance);
+ fprintf(
+ stderr,
+ " \"%p\" -> \"%p\" [style=dotted, color=blue, constraint=false]\n",
+ region, region->task.next_instance);
}
if (descr->successor) {
@@ -2344,9 +2331,8 @@ __kmp_debug_taskgraph_exec_descr(kmp_taskgraph_exec_descr_t *descrs,
}
#endif
-static void
-__kmp_exec_descr_link_instances(kmp_taskgraph_exec_descr_t *descrs,
- kmp_size_t count) {
+static void __kmp_exec_descr_link_instances(kmp_taskgraph_exec_descr_t *descrs,
+ kmp_size_t count) {
for (kmp_size_t i = 0; i < count; i++) {
kmp_taskgraph_exec_descr_t *descr = &descrs[i];
if (descr->region->type == TASKGRAPH_REGION_NODE ||
@@ -2357,9 +2343,10 @@ __kmp_exec_descr_link_instances(kmp_taskgraph_exec_descr_t *descrs,
/// Reset, reparent and regroup the recorded task TASK and re-invoke it.
-static void
-__kmp_omp_tg_task(kmp_int32 gtid, kmp_task_t *task, kmp_taskgroup_t *taskgroup,
- kmp_taskdata_t *parent_taskdata, bool serialize_immediate) {
+static void __kmp_omp_tg_task(kmp_int32 gtid, kmp_task_t *task,
+ kmp_taskgroup_t *taskgroup,
+ kmp_taskdata_t *parent_taskdata,
+ bool serialize_immediate) {
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
taskdata->td_parent = parent_taskdata;
@@ -2368,8 +2355,8 @@ __kmp_omp_tg_task(kmp_int32 gtid, kmp_task_t *task, kmp_taskgroup_t *taskgroup,
taskdata->td_flags.freed = 0;
taskdata->td_flags.executing = 0;
taskdata->td_flags.task_serial =
- (parent_taskdata->td_flags.final ||
- taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser);
+ (parent_taskdata->td_flags.final || taskdata->td_flags.team_serial ||
+ taskdata->td_flags.tasking_ser);
KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
@@ -2389,79 +2376,76 @@ struct kmp_taskred_input;
template <typename T>
void *__kmp_task_reduction_init(int gtid, int num, T *data);
-static void
-__kmp_taskgraph_exec_descr_start(kmp_int32 gtid, kmp_info_t *thread,
- kmp_taskgraph_exec_descr_t *descr,
- kmp_taskgroup_t *taskgroup) {
+static void __kmp_taskgraph_exec_descr_start(kmp_int32 gtid, kmp_info_t *thread,
+ kmp_taskgraph_exec_descr_t *descr,
+ kmp_taskgroup_t *taskgroup) {
kmp_int32 npredecessors = KMP_ATOMIC_DEC(&descr->npredecessors) - 1;
if (npredecessors > 0)
return;
switch (descr->region->type) {
- case TASKGRAPH_REGION_NODE:
- case TASKGRAPH_REGION_WAIT: {
- kmp_taskgraph_exec_descr_t *lowest_descr = nullptr, *iter = descr;
- do {
- if (!lowest_descr || lowest_descr > iter)
- lowest_descr = iter;
- iter = iter->next_instance;
- } while (iter != descr);
- kmp_int32 nblocks = KMP_ATOMIC_DEC(&lowest_descr->nblocks);
- if (nblocks <= 0) {
- if (descr->region->type == TASKGRAPH_REGION_NODE) {
- kmp_task_t *task = descr->region->task.node->task;
- kmp_taskdata_t *current_taskdata = thread->th.th_current_task;
- __kmp_omp_tg_task(gtid, task, taskgroup, current_taskdata, false);
- } else {
- // There's no task for a 'taskwait', so start successors immediately.
- kmp_taskgraph_exec_descr_t *walk = descr;
- do {
- if (walk->successor) {
- __kmp_taskgraph_exec_descr_start(gtid, thread, walk->successor,
- taskgroup);
- }
- walk = walk->next_instance;
- } while (walk != descr);
-
- }
- }
- break;
- }
- case TASKGRAPH_REGION_PARALLEL: {
- if (descr->region->reduce_input) {
- // If there are reductions associated with this parallel region, we
- // start a new taskgroup here.
- __kmpc_taskgroup(/*loc=*/nullptr, gtid);
- // Update variable to the newly-created taskgroup.
- taskgroup = thread->th.th_current_task->td_taskgroup;
- __kmp_task_reduction_init(gtid,
- descr->region->reduce_input->reduce_num_data,
- (struct kmp_taskred_input *)
- descr->region->reduce_input->reduce_data);
+ case TASKGRAPH_REGION_NODE:
+ case TASKGRAPH_REGION_WAIT: {
+ kmp_taskgraph_exec_descr_t *lowest_descr = nullptr, *iter = descr;
+ do {
+ if (!lowest_descr || lowest_descr > iter)
+ lowest_descr = iter;
+ iter = iter->next_instance;
+ } while (iter != descr);
+ kmp_int32 nblocks = KMP_ATOMIC_DEC(&lowest_descr->nblocks);
+ if (nblocks <= 0) {
+ if (descr->region->type == TASKGRAPH_REGION_NODE) {
+ kmp_task_t *task = descr->region->task.node->task;
+ kmp_taskdata_t *current_taskdata = thread->th.th_current_task;
+ __kmp_omp_tg_task(gtid, task, taskgroup, current_taskdata, false);
+ } else {
+ // There's no task for a 'taskwait', so start successors immediately.
+ kmp_taskgraph_exec_descr_t *walk = descr;
+ do {
+ if (walk->successor) {
+ __kmp_taskgraph_exec_descr_start(gtid, thread, walk->successor,
+ taskgroup);
+ }
+ walk = walk->next_instance;
+ } while (walk != descr);
}
- kmp_taskgraph_exec_descr_t *head = descr->successor;
- kmp_taskgraph_exec_descr_t *item = head;
- do {
- __kmp_taskgraph_exec_descr_start(gtid, thread, item, taskgroup);
- item = item->sibling;
- } while (item != head);
- if (descr->region->reduce_input)
- __kmpc_end_taskgroup(/*loc=*/nullptr, gtid);
- break;
}
- case TASKGRAPH_REGION_EXCLUSIVE: {
- kmp_taskgraph_exec_descr_t *head = descr->successor;
- kmp_taskgraph_exec_descr_t *item = head;
- do {
- assert(item->region->type == TASKGRAPH_REGION_NODE);
- kmp_task_t *task = item->region->task.node->task;
- kmp_taskdata_t *current_taskdata = thread->th.th_current_task;
- __kmp_omp_tg_task(gtid, task, taskgroup, current_taskdata, true);
- item = item->sibling;
- } while (item != head);
- break;
+ break;
+ }
+ case TASKGRAPH_REGION_PARALLEL: {
+ if (descr->region->reduce_input) {
+ // If there are reductions associated with this parallel region, we
+ // start a new taskgroup here.
+ __kmpc_taskgroup(/*loc=*/nullptr, gtid);
+ // Update variable to the newly-created taskgroup.
+ taskgroup = thread->th.th_current_task->td_taskgroup;
+ __kmp_task_reduction_init(
+ gtid, descr->region->reduce_input->reduce_num_data,
+ (struct kmp_taskred_input *)descr->region->reduce_input->reduce_data);
}
- default: ;
+ kmp_taskgraph_exec_descr_t *head = descr->successor;
+ kmp_taskgraph_exec_descr_t *item = head;
+ do {
+ __kmp_taskgraph_exec_descr_start(gtid, thread, item, taskgroup);
+ item = item->sibling;
+ } while (item != head);
+ if (descr->region->reduce_input)
+ __kmpc_end_taskgroup(/*loc=*/nullptr, gtid);
+ break;
+ }
+ case TASKGRAPH_REGION_EXCLUSIVE: {
+ kmp_taskgraph_exec_descr_t *head = descr->successor;
+ kmp_taskgraph_exec_descr_t *item = head;
+ do {
+ assert(item->region->type == TASKGRAPH_REGION_NODE);
+ kmp_task_t *task = item->region->task.node->task;
+ kmp_taskdata_t *current_taskdata = thread->th.th_current_task;
+ __kmp_omp_tg_task(gtid, task, taskgroup, current_taskdata, true);
+ item = item->sibling;
+ } while (item != head);
+ break;
+ }
+ default:;
}
}
@@ -2469,60 +2453,58 @@ static bool
__kmp_taskgraph_exec_descr_finish(kmp_int32 gtid, kmp_info_t *thread,
kmp_taskgraph_exec_descr_t *descr) {
switch (descr->region->type) {
- case TASKGRAPH_REGION_NODE: {
- kmp_task_t *task = descr->region->task.node->task;
- kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
- taskdata->td_flags.started = 0;
- taskdata->td_flags.executing = 0;
- taskdata->td_flags.complete = 0;
- taskdata->td_flags.freed = 0;
- bool any_successors = false;
- kmp_taskgraph_exec_descr_t *walk = descr;
- do {
- if (walk->successor) {
- any_successors = true;
- __kmp_taskgraph_exec_descr_start(gtid, thread, walk->successor,
- taskdata->td_taskgroup);
- }
- walk = walk->next_instance;
- } while (walk != descr);
- return any_successors;
- }
- default:
- fprintf(stderr, "unexpected exec descr type for finish? (%p)\n", descr);
- exit(1);
+ case TASKGRAPH_REGION_NODE: {
+ kmp_task_t *task = descr->region->task.node->task;
+ kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
+ taskdata->td_flags.started = 0;
+ taskdata->td_flags.executing = 0;
+ taskdata->td_flags.complete = 0;
+ taskdata->td_flags.freed = 0;
+ bool any_successors = false;
+ kmp_taskgraph_exec_descr_t *walk = descr;
+ do {
+ if (walk->successor) {
+ any_successors = true;
+ __kmp_taskgraph_exec_descr_start(gtid, thread, walk->successor,
+ taskdata->td_taskgroup);
+ }
+ walk = walk->next_instance;
+ } while (walk != descr);
+ return any_successors;
+ }
+ default:
+ fprintf(stderr, "unexpected exec descr type for finish? (%p)\n", descr);
+ exit(1);
}
return false;
}
-static kmp_size_t
-__kmp_exec_descr_count(kmp_taskgraph_region_t *region) {
+static kmp_size_t __kmp_exec_descr_count(kmp_taskgraph_region_t *region) {
kmp_size_t sum = 0;
switch (region->type) {
- case TASKGRAPH_REGION_ENTRY:
- case TASKGRAPH_REGION_EXIT:
- return 0;
- case TASKGRAPH_REGION_NODE:
- case TASKGRAPH_REGION_WAIT:
- return 1;
- case TASKGRAPH_REGION_PARALLEL:
- case TASKGRAPH_REGION_EXCLUSIVE:
- sum++;
- KMP_FALLTHROUGH();
- case TASKGRAPH_REGION_SEQUENTIAL:
- for (kmp_int32 i = 0; i < region->inner.num_children; i++)
- sum += __kmp_exec_descr_count(region->inner.children[i]);
- break;
- default:
- fprintf(stderr, "unexpected region type\n");
- exit(1);
+ case TASKGRAPH_REGION_ENTRY:
+ case TASKGRAPH_REGION_EXIT:
+ return 0;
+ case TASKGRAPH_REGION_NODE:
+ case TASKGRAPH_REGION_WAIT:
+ return 1;
+ case TASKGRAPH_REGION_PARALLEL:
+ case TASKGRAPH_REGION_EXCLUSIVE:
+ sum++;
+ KMP_FALLTHROUGH();
+ case TASKGRAPH_REGION_SEQUENTIAL:
+ for (kmp_int32 i = 0; i < region->inner.num_children; i++)
+ sum += __kmp_exec_descr_count(region->inner.children[i]);
+ break;
+ default:
+ fprintf(stderr, "unexpected region type\n");
+ exit(1);
}
return sum;
}
-
// Task Reduction implementation
//
// Note: initial implementation didn't take into account the possibility
@@ -2710,8 +2692,8 @@ void *__kmpc_taskred_init(int gtid, int num, void *data) {
return __kmp_task_reduction_init(gtid, num, (kmp_taskred_input_t *)data);
}
-static kmp_taskgraph_record_t *__kmp_taskgraph_or_parent_recording(
- kmp_taskgroup_t *taskgroup) {
+static kmp_taskgraph_record_t *
+__kmp_taskgraph_or_parent_recording(kmp_taskgroup_t *taskgroup) {
kmp_taskgraph_record_t *rec = nullptr;
for (; taskgroup; taskgroup = taskgroup->parent) {
@@ -2732,13 +2714,15 @@ void *__kmpc_taskgraph_taskred_init(kmp_int32 gtid, kmp_int32 num, void *data) {
kmp_taskgraph_status_t status = KMP_ATOMIC_LD_ACQ(&rec->status);
if (status == KMP_TDG_RECORDING) {
kmp_taskgraph_reduce_input_data_t *input_data =
- (kmp_taskgraph_reduce_input_data_t *)
- __kmp_fast_allocate(thread,
- sizeof(kmp_taskgraph_reduce_input_data_t));
+ (kmp_taskgraph_reduce_input_data_t *)__kmp_fast_allocate(
+ thread, sizeof(kmp_taskgraph_reduce_input_data_t));
// The compiler might build the reduction input data on the stack, so
// we must make a copy.
- input_data->reduce_data = __kmp_fast_allocate(thread, sizeof(kmp_taskred_input_t) * num);
- KMP_MEMCPY(input_data->reduce_data, data, sizeof(kmp_taskred_input_t) * num);;
+ input_data->reduce_data =
+ __kmp_fast_allocate(thread, sizeof(kmp_taskred_input_t) * num);
+ KMP_MEMCPY(input_data->reduce_data, data,
+ sizeof(kmp_taskred_input_t) * num);
+ ;
input_data->reduce_num_data = num;
taskgroup->taskgraph.reduce_input = input_data;
} else if (status == KMP_TDG_READY)
@@ -3175,29 +3159,26 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
#endif
}
-void
-__kmp_replay_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
- kmp_taskgraph_record_t *taskgraph, kmp_uint32 graph_id,
- kmp_taskgroup_t *taskgroup) {
+void __kmp_replay_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
+ kmp_taskgraph_record_t *taskgraph,
+ kmp_uint32 graph_id, kmp_taskgroup_t *taskgroup) {
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskgraph_exec_descr_t *exec_descrs = taskgraph->exec_descrs;
if (!exec_descrs) {
kmp_int32 exec_descr_count = __kmp_exec_descr_count(taskgraph->root);
- exec_descrs =
- (kmp_taskgraph_exec_descr_t*)__kmp_thread_malloc(thread,
- exec_descr_count * sizeof(kmp_taskgraph_exec_descr_t));
+ exec_descrs = (kmp_taskgraph_exec_descr_t *)__kmp_thread_malloc(
+ thread, exec_descr_count * sizeof(kmp_taskgraph_exec_descr_t));
taskgraph->exec_descrs = exec_descrs;
taskgraph->exec_descr_size = exec_descr_count;
}
kmp_taskgraph_exec_descr_t *succs_to_fill = nullptr;
kmp_size_t next_idx = 0;
- kmp_taskgraph_exec_descr_t *head =
- __kmp_fill_exec_descr(gtid, thread, taskgraph, taskgraph->root,
- current_taskdata, exec_descrs, next_idx,
- &succs_to_fill);
+ kmp_taskgraph_exec_descr_t *head = __kmp_fill_exec_descr(
+ gtid, thread, taskgraph, taskgraph->root, current_taskdata, exec_descrs,
+ next_idx, &succs_to_fill);
assert(next_idx == taskgraph->exec_descr_size);
__kmp_exec_descr_link_instances(exec_descrs, taskgraph->exec_descr_size);
@@ -4984,9 +4965,9 @@ public:
}
};
-kmp_taskgraph_node_t* __kmp_taskgraph_node_alloc(kmp_taskgraph_record_t *rec,
- kmp_task_t *task,
- kmp_size_t *index_p = nullptr) {
+kmp_taskgraph_node_t *
+__kmp_taskgraph_node_alloc(kmp_taskgraph_record_t *rec, kmp_task_t *task,
+ kmp_size_t *index_p = nullptr) {
kmp_int32 gtid = rec->gtid;
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskgraph_node_t *new_task = nullptr;
@@ -4995,14 +4976,14 @@ kmp_taskgraph_node_t* __kmp_taskgraph_node_alloc(kmp_taskgraph_record_t *rec,
if (!rec->record_map) {
rec->nodes_allocated = 4;
- rec->record_map = (kmp_taskgraph_node_t *)__kmp_thread_malloc(thread,
- rec->nodes_allocated * sizeof(kmp_taskgraph_node_t));
+ rec->record_map = (kmp_taskgraph_node_t *)__kmp_thread_malloc(
+ thread, rec->nodes_allocated * sizeof(kmp_taskgraph_node_t));
}
if (rec->num_tasks >= rec->nodes_allocated) {
- rec->record_map =
- (kmp_taskgraph_node_t *)__kmp_thread_realloc(thread, rec->record_map,
- 2 * rec->nodes_allocated * sizeof(kmp_taskgraph_node_t));
+ rec->record_map = (kmp_taskgraph_node_t *)__kmp_thread_realloc(
+ thread, rec->record_map,
+ 2 * rec->nodes_allocated * sizeof(kmp_taskgraph_node_t));
rec->nodes_allocated *= 2;
}
@@ -5039,18 +5020,15 @@ kmp_taskgraph_node_t* __kmp_taskgraph_node_alloc(kmp_taskgraph_record_t *rec,
// tc Iterations count
// task_dup Tasks duplication routine
// codeptr_ra Return address for OMPT events
-static void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
- kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
- kmp_int32 nogroup, kmp_uint64 ub_glob,
- kmp_uint64 num_tasks, kmp_uint64 grainsize,
- kmp_uint64 extras, kmp_int64 last_chunk,
- kmp_uint64 tc,
+static void __kmp_taskloop_linear(
+ ident_t *loc, int gtid, kmp_task_t *task, kmp_uint64 *lb, kmp_uint64 *ub,
+ kmp_int64 st, kmp_int32 nogroup, kmp_uint64 ub_glob, kmp_uint64 num_tasks,
+ kmp_uint64 grainsize, kmp_uint64 extras, kmp_int64 last_chunk,
+ kmp_uint64 tc,
#if OMPT_SUPPORT
- void *codeptr_ra,
+ void *codeptr_ra,
#endif
- void *task_dup,
- kmp_taskgraph_record_t *taskgraph_rec =
- nullptr) {
+ void *task_dup, kmp_taskgraph_record_t *taskgraph_rec = nullptr) {
KMP_COUNT_BLOCK(OMP_TASKLOOP);
KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
@@ -5134,7 +5112,7 @@ static void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
kmp_size_t rec_index = -1;
// Record the task in the taskgraph.
kmp_taskgraph_node_t *node =
- __kmp_taskgraph_node_alloc(taskgraph_rec, next_task, &rec_index);
+ __kmp_taskgraph_node_alloc(taskgraph_rec, next_task, &rec_index);
kmp_taskgroup_t *taskgroup = current_task->td_taskgroup;
if (taskgroup->taskgraph.reduce_input) {
node->reduce_input = taskgroup->taskgraph.reduce_input;
@@ -5151,8 +5129,8 @@ static void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
if (nogroup)
taskgraph_rec->record_map[rec_index].u.unresolved.cfg_successor = -1;
else if (taskloop_prev_idx != -1)
- taskgraph_rec->record_map[taskloop_prev_idx].u.unresolved.cfg_successor =
- rec_index;
+ taskgraph_rec->record_map[taskloop_prev_idx]
+ .u.unresolved.cfg_successor = rec_index;
if (taskloop_first_idx == -1)
taskloop_first_idx = rec_index;
taskloop_prev_idx = rec_index;
@@ -5177,14 +5155,14 @@ static void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
// Create a node to act as an "end group" marker.
kmp_size_t endgroup_idx = -1;
kmp_taskgraph_node_t *endgrpnode =
- __kmp_taskgraph_node_alloc(taskgraph_rec, nullptr, &endgroup_idx);
+ __kmp_taskgraph_node_alloc(taskgraph_rec, nullptr, &endgroup_idx);
endgrpnode->taskloop_task = true;
// Point all the cfg_successor indices to this node now.
for (kmp_int32 looptask = taskloop_first_idx; looptask != -1;) {
kmp_int32 next_task =
- taskgraph_rec->record_map[looptask].u.unresolved.cfg_successor;
+ taskgraph_rec->record_map[looptask].u.unresolved.cfg_successor;
taskgraph_rec->record_map[looptask].u.unresolved.cfg_successor =
- endgroup_idx;
+ endgroup_idx;
looptask = next_task;
}
}
@@ -5391,7 +5369,7 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
#if OMP_TASKGRAPH_EXPERIMENTAL
kmp_taskdata_t *new_task_data = KMP_TASK_TO_TASKDATA(new_task);
- //new_task_data->tdg = taskdata->tdg;
+ // new_task_data->tdg = taskdata->tdg;
new_task_data->owning_taskgraph = nullptr;
#endif
@@ -5432,7 +5410,7 @@ static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
- // This is unreachable, I think.
+ // This is unreachable, I think.
if (!taskgraph_rec)
__kmpc_taskgroup(loc, gtid);
}
@@ -5548,8 +5526,9 @@ static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
OMPT_GET_RETURN_ADDRESS(0),
#endif
task_dup, taskgraph_rec);
- // check if clause value next
- // Also require GOMP_taskloop to reduce to linear (taskdata->td_flags.native)
+ // check if clause value next
+ // Also require GOMP_taskloop to reduce to linear
+ // (taskdata->td_flags.native)
} else if (if_val == 0) { // if(0) specified, mark task as serial
taskdata->td_flags.task_serial = 1;
taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
@@ -5702,11 +5681,12 @@ bool __kmpc_omp_has_task_team(kmp_int32 gtid) {
#if OMP_TASKGRAPH_EXPERIMENTAL
-static kmp_taskgraph_record_t*
-__kmp_taskgraph_alloc(kmp_int32 gtid, kmp_int32 graph_id) {
+static kmp_taskgraph_record_t *__kmp_taskgraph_alloc(kmp_int32 gtid,
+ kmp_int32 graph_id) {
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskgraph_record_t *new_rec =
- (kmp_taskgraph_record_t *)__kmp_fast_allocate(thread, sizeof(kmp_taskgraph_record_t));
+ (kmp_taskgraph_record_t *)__kmp_fast_allocate(
+ thread, sizeof(kmp_taskgraph_record_t));
new_rec->status = KMP_TDG_RECORDING;
new_rec->gtid = gtid;
new_rec->graph_id = graph_id;
@@ -5726,8 +5706,10 @@ __kmp_taskgraph_alloc(kmp_int32 gtid, kmp_int32 graph_id) {
// Clone a (new) task that has had its private variables and shared variables
// initialised already.
static kmp_task_t *__kmp_taskgraph_clone_task(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph, kmp_task_t *orig,
- size_t sizeof_kmp_task_t, size_t sizeof_shareds) {
+ kmp_taskgraph_record_t *taskgraph,
+ kmp_task_t *orig,
+ size_t sizeof_kmp_task_t,
+ size_t sizeof_shareds) {
// FIXME: This should use a "taskdup" function like taskloops in cases where
// private variables are not trivially copyable. For now, do it by plain
// bitwise copy.
@@ -5738,7 +5720,8 @@ static kmp_task_t *__kmp_taskgraph_clone_task(kmp_info_t *thread,
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(orig);
size_t shareds_offset = sizeof(kmp_taskdata_t) + sizeof_kmp_task_t;
shareds_offset = __kmp_round_up_to_val(shareds_offset, sizeof(kmp_uint64));
- kmp_taskdata_t *copy_td = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset + sizeof_shareds);
+ kmp_taskdata_t *copy_td = (kmp_taskdata_t *)__kmp_fast_allocate(
+ thread, shareds_offset + sizeof_shareds);
KMP_MEMCPY(copy_td, taskdata, shareds_offset + sizeof_shareds);
// Tasks cloned for a taskgraph always have this field set.
copy_td->owning_taskgraph = taskgraph;
@@ -5757,10 +5740,11 @@ static kmp_task_t *__kmp_taskgraph_clone_task(kmp_info_t *thread,
// entry: Pointer to the entry function
// args: Pointer to the function arguments
void __kmpc_taskgraph(ident_t *loc_ref, kmp_int32 gtid,
- std::atomic<void*> *tdg_handle, kmp_uint32 graph_id,
+ std::atomic<void *> *tdg_handle, kmp_uint32 graph_id,
kmp_int32 graph_reset, kmp_int32 nogroup,
void (*entry)(void *), void *args) {
- kmp_taskgraph_record_t *record = (kmp_taskgraph_record_t*)KMP_ATOMIC_LD_ACQ(tdg_handle);
+ kmp_taskgraph_record_t *record =
+ (kmp_taskgraph_record_t *)KMP_ATOMIC_LD_ACQ(tdg_handle);
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskgroup_t *taskgroup;
@@ -5778,11 +5762,11 @@ void __kmpc_taskgraph(ident_t *loc_ref, kmp_int32 gtid,
if (!record) {
record = __kmp_taskgraph_alloc(gtid, graph_id);
- // Another thread may have allocated the taskgraph already. Check that here.
+ // Another thread may have allocated the taskgraph already. Check that
+ // here.
kmp_taskgraph_record_t *other =
- (kmp_taskgraph_record_t *)KMP_COMPARE_AND_STORE_RET64(tdg_handle,
- nullptr,
- record);
+ (kmp_taskgraph_record_t *)KMP_COMPARE_AND_STORE_RET64(tdg_handle,
+ nullptr, record);
if (other != nullptr) {
__kmp_fast_free(thread, record);
record = other;
@@ -5807,7 +5791,7 @@ void __kmpc_taskgraph(ident_t *loc_ref, kmp_int32 gtid,
else if (status == KMP_TDG_READY) {
kmp_taskdata *current_taskdata = thread->th.th_current_task;
KG_TRACE(10, ("Replay taskgraph %p from task %p\n", record,
- KMP_TASKDATA_TO_TASK(current_taskdata)));
+ KMP_TASKDATA_TO_TASK(current_taskdata)));
__kmp_acquire_lock(&record->map_lock, gtid);
__kmp_replay_taskgraph(gtid, current_taskdata, record, graph_id, taskgroup);
__kmpc_end_taskgroup(loc_ref, gtid);
@@ -5837,9 +5821,8 @@ kmp_uint32 __kmpc_taskgraph_task(ident_t *loc_ref, kmp_int32 gtid,
if (rec) {
kmp_taskgraph_status_t status = KMP_ATOMIC_LD_ACQ(&rec->status);
if (status == KMP_TDG_RECORDING) {
- kmp_task_t *cloned_task =
- __kmp_taskgraph_clone_task(thread, rec, new_task, sizeof_kmp_task_t,
- sizeof_shareds);
+ kmp_task_t *cloned_task = __kmp_taskgraph_clone_task(
+ thread, rec, new_task, sizeof_kmp_task_t, sizeof_shareds);
kmp_taskgraph_node_t *node = __kmp_taskgraph_node_alloc(rec, cloned_task);
if (taskgroup->taskgraph.reduce_input) {
node->reduce_input = taskgroup->taskgraph.reduce_input;
@@ -5848,25 +5831,28 @@ kmp_uint32 __kmpc_taskgraph_task(ident_t *loc_ref, kmp_int32 gtid,
#if defined(DEBUG_TASKGRAPH)
fprintf(stderr, "__kmpc_taskgraph_task: record task here!\n");
fprintf(stderr, "private size: %d, shared size: %d\n",
- (int)(sizeof_kmp_task_t - sizeof(kmp_task_t)), (int)sizeof_shareds);
- fprintf(stderr, "ndeps: %d\n", (int) ndeps);
+ (int)(sizeof_kmp_task_t - sizeof(kmp_task_t)),
+ (int)sizeof_shareds);
+ fprintf(stderr, "ndeps: %d\n", (int)ndeps);
fprintf(stderr, "gtid: %d rec->gtid: %d\n", gtid, rec->gtid);
- fprintf(stderr, "taskgroup: %p\n", thread->th.th_current_task->td_taskgroup);
+ fprintf(stderr, "taskgroup: %p\n",
+ thread->th.th_current_task->td_taskgroup);
kmp_taskdata_t *parent = thread->th.th_current_task->td_parent;
while (parent) {
- fprintf(stderr, " parent: %p (taskgroup %p)\n", parent, parent->td_taskgroup);
+ fprintf(stderr, " parent: %p (taskgroup %p)\n", parent,
+ parent->td_taskgroup);
parent = parent->td_parent;
}
#endif
node->u.unresolved.ndeps = ndeps;
- node->u.unresolved.dep_list =
- (kmp_depend_info_t *)__kmp_thread_malloc(thread,
- ndeps * sizeof(kmp_depend_info_t));
+ node->u.unresolved.dep_list = (kmp_depend_info_t *)__kmp_thread_malloc(
+ thread, ndeps * sizeof(kmp_depend_info_t));
KMP_MEMCPY(node->u.unresolved.dep_list, dep_list,
ndeps * sizeof(kmp_depend_info_t));
} else if (status == KMP_TDG_READY) {
#ifdef DEBUG_TASKGRAPH
- fprintf(stderr, "non-taskgraph task entry point for task in finalized taskgraph");
+ fprintf(stderr,
+ "non-taskgraph task entry point for task in finalized taskgraph");
#endif
return 0;
}
@@ -5881,15 +5867,15 @@ kmp_uint32 __kmpc_taskgraph_task(ident_t *loc_ref, kmp_int32 gtid,
if (ndeps == 0)
res = __kmpc_omp_task(loc_ref, gtid, new_task);
else
- res = __kmpc_omp_task_with_deps(loc_ref, gtid, new_task, ndeps, dep_list,
- 0, nullptr);
+ res = __kmpc_omp_task_with_deps(loc_ref, gtid, new_task, ndeps, dep_list, 0,
+ nullptr);
return res;
}
-void
-__kmpc_taskgraph_taskwait(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
- kmp_depend_info_t *dep_list, kmp_int32 has_no_wait) {
+void __kmpc_taskgraph_taskwait(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_int32 ndeps, kmp_depend_info_t *dep_list,
+ kmp_int32 has_no_wait) {
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskgroup_t *taskgroup = thread->th.th_current_task->td_taskgroup;
kmp_taskgraph_record_t *rec = __kmp_taskgraph_or_parent_recording(taskgroup);
@@ -5900,17 +5886,20 @@ __kmpc_taskgraph_taskwait(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
kmp_taskgraph_node_t *node = __kmp_taskgraph_node_alloc(rec, nullptr);
#ifdef DEBUG_TASKGRAPH
fprintf(stderr, "__kmpc_taskgraph_taskwait: record taskwait here!\n");
- fprintf(stderr, "ndeps: %d\n", (int) ndeps);
+ fprintf(stderr, "ndeps: %d\n", (int)ndeps);
#endif
node->u.unresolved.ndeps = ndeps;
- node->u.unresolved.dep_list = (kmp_depend_info_t *)__kmp_thread_malloc(thread, ndeps * sizeof(kmp_depend_info_t));
- KMP_MEMCPY(node->u.unresolved.dep_list, dep_list, ndeps * sizeof(kmp_depend_info_t));
+ node->u.unresolved.dep_list = (kmp_depend_info_t *)__kmp_thread_malloc(
+ thread, ndeps * sizeof(kmp_depend_info_t));
+ KMP_MEMCPY(node->u.unresolved.dep_list, dep_list,
+ ndeps * sizeof(kmp_depend_info_t));
// TODO: Record has_no_wait somewhere?
- //if (has_no_wait)
+ // if (has_no_wait)
// return;
} else if (status == KMP_TDG_READY) {
#ifdef DEBUG_TASKGRAPH
- fprintf(stderr, "non-taskgraph taskwait entry point for taskwait in finalized taskgraph\n");
+ fprintf(stderr, "non-taskgraph taskwait entry point for taskwait in "
+ "finalized taskgraph\n");
#endif
return;
}
@@ -5920,14 +5909,14 @@ __kmpc_taskgraph_taskwait(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
has_no_wait);
}
-kmp_uint32
-__kmpc_taskgraph_taskloop(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task,
- kmp_int32 flags, size_t sizeof_kmp_task_t,
- void *shareds, size_t sizeof_shareds,
- kmp_int32 if_val, kmp_uint64 *lb, kmp_uint64 *ub,
- kmp_int64 st, kmp_int32 nogroup, kmp_int32 sched,
- kmp_uint64 grainsize, kmp_int32 modifier,
- void *task_dup) {
+kmp_uint32 __kmpc_taskgraph_taskloop(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_task_t *new_task, kmp_int32 flags,
+ size_t sizeof_kmp_task_t, void *shareds,
+ size_t sizeof_shareds, kmp_int32 if_val,
+ kmp_uint64 *lb, kmp_uint64 *ub,
+ kmp_int64 st, kmp_int32 nogroup,
+ kmp_int32 sched, kmp_uint64 grainsize,
+ kmp_int32 modifier, void *task_dup) {
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskgroup_t *taskgroup = thread->th.th_current_task->td_taskgroup;
kmp_taskgraph_record_t *rec = __kmp_taskgraph_or_parent_recording(taskgroup);
@@ -5939,7 +5928,8 @@ __kmpc_taskgraph_taskloop(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task
sched, grainsize, modifier, task_dup, rec);
else if (status == KMP_TDG_READY) {
#ifdef DEBUG_TASKGRAPH
- fprintf(stderr, "non-taskgraph taskloop entry point for taskloop in finalized taskgraph\n");
+ fprintf(stderr, "non-taskgraph taskloop entry point for taskloop in "
+ "finalized taskgraph\n");
#endif
return 0;
}
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_1.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_1.cpp
index d6abdb1e1..cfcb84e88 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_1.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_1.cpp
@@ -1,28 +1,33 @@
-// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck %s
+// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck
+// %s
-int main()
-{
+int main() {
int deps[3];
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 2; i++)
- {
- #pragma omp taskgraph
+ for (int i = 0; i < 2; i++) {
+#pragma omp taskgraph
{
- #pragma omp task depend(out: deps[2])
- { }
- #pragma omp task depend(out: deps[0], deps[1])
- { }
- #pragma omp task depend(inout: deps[0])
- { }
- #pragma omp task depend(inout: deps[1])
- { }
- #pragma omp task depend(inout: deps[2])
- { }
- #pragma omp task depend(in: deps[0], deps[1], deps[2])
- { }
+#pragma omp task depend(out : deps[2])
+ {
+ }
+#pragma omp task depend(out : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(inout : deps[0])
+ {
+ }
+#pragma omp task depend(inout : deps[1])
+ {
+ }
+#pragma omp task depend(inout : deps[2])
+ {
+ }
+#pragma omp task depend(in : deps[0], deps[1], deps[2])
+ {
+ }
}
}
}
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_10.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_10.cpp
index f3dd856f8..b37f739fa 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_10.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_10.cpp
@@ -1,29 +1,34 @@
-// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck %s
+// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck
+// %s
-int main()
-{
+int main() {
int deps[5];
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 2; i++)
- {
- #pragma omp taskgraph
+ for (int i = 0; i < 2; i++) {
+#pragma omp taskgraph
{
- #pragma omp task depend(out: deps[0], deps[1])
- { }
- #pragma omp task depend(mutexinoutset: deps[0], deps[1], deps[4])
- { }
- #pragma omp task depend(in: deps[0], deps[1])
- { }
+#pragma omp task depend(out : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[0], deps[1], deps[4])
+ {
+ }
+#pragma omp task depend(in : deps[0], deps[1])
+ {
+ }
- #pragma omp task depend(out: deps[2], deps[3])
- { }
- #pragma omp task depend(mutexinoutset: deps[2], deps[3], deps[4])
- { }
- #pragma omp task depend(in: deps[2], deps[3])
- { }
+#pragma omp task depend(out : deps[2], deps[3])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[2], deps[3], deps[4])
+ {
+ }
+#pragma omp task depend(in : deps[2], deps[3])
+ {
+ }
}
}
}
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_11.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_11.cpp
index 4f2babaef..634b4896c 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_11.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_11.cpp
@@ -1,33 +1,40 @@
-// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck %s
+// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck
+// %s
-int main()
-{
+int main() {
int deps[4];
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 2; i++)
- {
- #pragma omp taskgraph
+ for (int i = 0; i < 2; i++) {
+#pragma omp taskgraph
{
- #pragma omp task depend(out: deps[0], deps[1])
- { }
- #pragma omp task depend(mutexinoutset: deps[0], deps[1])
- { }
- #pragma omp task depend(mutexinoutset: deps[0], deps[1])
- { }
- #pragma omp task depend(in: deps[0], deps[1])
- { }
+#pragma omp task depend(out : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(in : deps[0], deps[1])
+ {
+ }
- #pragma omp task depend(out: deps[2], deps[3])
- { }
- #pragma omp task depend(mutexinoutset: deps[2], deps[3])
- { }
- #pragma omp task depend(mutexinoutset: deps[2], deps[3])
- { }
- #pragma omp task depend(in: deps[2], deps[3])
- { }
+#pragma omp task depend(out : deps[2], deps[3])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[2], deps[3])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[2], deps[3])
+ {
+ }
+#pragma omp task depend(in : deps[2], deps[3])
+ {
+ }
}
}
}
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_12.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_12.cpp
index d3615187b..14973399e 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_12.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_12.cpp
@@ -1,32 +1,39 @@
-// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck %s
+// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck
+// %s
-int main()
-{
+int main() {
int deps[2];
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 2; i++)
- {
- #pragma omp taskgraph
+ for (int i = 0; i < 2; i++) {
+#pragma omp taskgraph
{
- #pragma omp task depend(mutexinoutset: deps[0])
- { }
- #pragma omp task depend(mutexinoutset: deps[1])
- { }
- #pragma omp task depend(mutexinoutset: deps[0])
- { }
- #pragma omp task depend(mutexinoutset: deps[1])
- { }
- #pragma omp task depend(mutexinoutset: deps[0])
- { }
- #pragma omp task depend(mutexinoutset: deps[1])
- { }
- #pragma omp task depend(mutexinoutset: deps[0])
- { }
- #pragma omp task depend(mutexinoutset: deps[1])
- { }
+#pragma omp task depend(mutexinoutset : deps[0])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[1])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[0])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[1])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[0])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[1])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[0])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[1])
+ {
+ }
}
}
}
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_13.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_13.cpp
index de2b5e138..b04b97a81 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_13.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_13.cpp
@@ -1,28 +1,33 @@
-// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck %s
+// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck
+// %s
-int main()
-{
+int main() {
int deps[4];
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 2; i++)
- {
- #pragma omp taskgraph
+ for (int i = 0; i < 2; i++) {
+#pragma omp taskgraph
{
- #pragma omp task depend(mutexinoutset: deps[0])
- { }
- #pragma omp task depend(mutexinoutset: deps[1])
- { }
- #pragma omp task depend(mutexinoutset: deps[0], deps[1])
- { }
- #pragma omp task depend(mutexinoutset: deps[2])
- { }
- #pragma omp task depend(mutexinoutset: deps[3])
- { }
- #pragma omp task depend(mutexinoutset: deps[2], deps[3])
- { }
+#pragma omp task depend(mutexinoutset : deps[0])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[1])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[2])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[3])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[2], deps[3])
+ {
+ }
}
}
}
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_14.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_14.cpp
index 684a196a8..d907c2827 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_14.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_14.cpp
@@ -1,30 +1,36 @@
-// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck %s
+// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck
+// %s
-int main()
-{
+int main() {
int deps[4];
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 2; i++)
- {
- #pragma omp taskgraph
+ for (int i = 0; i < 2; i++) {
+#pragma omp taskgraph
{
- #pragma omp task depend(mutexinoutset: deps[0])
- { }
- #pragma omp task depend(mutexinoutset: deps[0], deps[1])
- { }
- #pragma omp task depend(mutexinoutset: deps[0], deps[1], deps[2])
- { }
- #pragma omp task depend(mutexinoutset: deps[0], deps[1], deps[2], deps[3])
- { }
- #pragma omp task depend(mutexinoutset: deps[1], deps[2], deps[3])
- { }
- #pragma omp task depend(mutexinoutset: deps[2], deps[3])
- { }
- #pragma omp task depend(mutexinoutset: deps[3])
- { }
+#pragma omp task depend(mutexinoutset : deps[0])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[0], deps[1], deps[2])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[0], deps[1], deps[2], deps[3])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[1], deps[2], deps[3])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[2], deps[3])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[3])
+ {
+ }
}
}
}
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_15.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_15.cpp
index d35660ddf..9bfbb15d1 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_15.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_15.cpp
@@ -1,48 +1,63 @@
-// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck %s
+// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck
+// %s
-int main()
-{
+int main() {
int deps[4];
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 2; i++)
- {
- #pragma omp taskgraph
+ for (int i = 0; i < 2; i++) {
+#pragma omp taskgraph
{
- #pragma omp task
- { }
- #pragma omp task depend(mutexinoutset: deps[0])
- { }
- #pragma omp task depend(mutexinoutset: deps[1])
- { }
- #pragma omp task depend(mutexinoutset: deps[1], deps[0])
- { }
- #pragma omp task depend(mutexinoutset: deps[2])
- { }
- #pragma omp task depend(mutexinoutset: deps[2], deps[0])
- { }
- #pragma omp task depend(mutexinoutset: deps[2], deps[1])
- { }
- #pragma omp task depend(mutexinoutset: deps[2], deps[1], deps[0])
- { }
- #pragma omp task depend(mutexinoutset: deps[3])
- { }
- #pragma omp task depend(mutexinoutset: deps[3], deps[0])
- { }
- #pragma omp task depend(mutexinoutset: deps[3], deps[1])
- { }
- #pragma omp task depend(mutexinoutset: deps[3], deps[1], deps[0])
- { }
- #pragma omp task depend(mutexinoutset: deps[3], deps[2])
- { }
- #pragma omp task depend(mutexinoutset: deps[3], deps[2], deps[0])
- { }
- #pragma omp task depend(mutexinoutset: deps[3], deps[2], deps[1])
- { }
- #pragma omp task depend(mutexinoutset: deps[3], deps[2], deps[1], deps[0])
- { }
+#pragma omp task
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[0])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[1])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[1], deps[0])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[2])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[2], deps[0])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[2], deps[1])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[2], deps[1], deps[0])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[3])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[3], deps[0])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[3], deps[1])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[3], deps[1], deps[0])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[3], deps[2])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[3], deps[2], deps[0])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[3], deps[2], deps[1])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[3], deps[2], deps[1], deps[0])
+ {
+ }
}
}
}
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_16.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_16.cpp
index 45aa3c587..675472b73 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_16.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_16.cpp
@@ -1,32 +1,39 @@
-// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck %s
+// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck
+// %s
-int main()
-{
+int main() {
int deps[8];
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 2; i++)
- {
- #pragma omp taskgraph
+ for (int i = 0; i < 2; i++) {
+#pragma omp taskgraph
{
- #pragma omp task depend(inout: deps[0])
- { }
- #pragma omp task depend(mutexinoutset: deps[4], deps[7])
- { }
- #pragma omp task depend(inout: deps[1])
- { }
- #pragma omp task depend(mutexinoutset: deps[4], deps[7])
- { }
- #pragma omp task depend(inout: deps[2])
- { }
- #pragma omp task depend(mutexinoutset: deps[5], deps[6])
- { }
- #pragma omp task depend(inout: deps[3])
- { }
- #pragma omp task depend(mutexinoutset: deps[5], deps[6])
- { }
+#pragma omp task depend(inout : deps[0])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[4], deps[7])
+ {
+ }
+#pragma omp task depend(inout : deps[1])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[4], deps[7])
+ {
+ }
+#pragma omp task depend(inout : deps[2])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[5], deps[6])
+ {
+ }
+#pragma omp task depend(inout : deps[3])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[5], deps[6])
+ {
+ }
}
}
}
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_17.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_17.cpp
index 2c59595f5..3cc229cf0 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_17.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_17.cpp
@@ -2,42 +2,40 @@
#include <cstdio>
-int main()
-{
+int main() {
int deps[4];
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 2; i++)
- {
- #pragma omp taskgraph
+ for (int i = 0; i < 2; i++) {
+#pragma omp taskgraph
{
- #pragma omp task depend(out: deps[0], deps[1])
+#pragma omp task depend(out : deps[0], deps[1])
{
fprintf(stderr, "task 0\n");
}
- #pragma omp task depend(out: deps[2], deps[3])
+#pragma omp task depend(out : deps[2], deps[3])
{
fprintf(stderr, "task 1\n");
}
- #pragma omp task depend(inout: deps[0])
+#pragma omp task depend(inout : deps[0])
{
fprintf(stderr, "task 2\n");
}
- #pragma omp task depend(inout: deps[1])
+#pragma omp task depend(inout : deps[1])
{
fprintf(stderr, "task 3\n");
}
- #pragma omp task depend(inout: deps[2])
+#pragma omp task depend(inout : deps[2])
{
fprintf(stderr, "task 4\n");
}
- #pragma omp task depend(inout: deps[3])
+#pragma omp task depend(inout : deps[3])
{
fprintf(stderr, "task 5\n");
}
- #pragma omp task depend(in: deps[0], deps[1], deps[2], deps[3])
+#pragma omp task depend(in : deps[0], deps[1], deps[2], deps[3])
{
fprintf(stderr, "task 6\n");
}
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_18.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_18.cpp
index 954cfcbad..8ebd25dd4 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_18.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_18.cpp
@@ -1,24 +1,26 @@
-// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck %s
+// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck
+// %s
-int main()
-{
+int main() {
int deps[2];
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 2; i++)
- {
- #pragma omp taskgraph
+ for (int i = 0; i < 2; i++) {
+#pragma omp taskgraph
{
- #pragma omp task depend(out: deps[0], deps[1])
- { }
- #pragma omp taskloop num_tasks(strict: 2)
+#pragma omp task depend(out : deps[0], deps[1])
+ {
+ }
+#pragma omp taskloop num_tasks(strict : 2)
+ {
+ for (int j = 0; j < 20; j++) {
+ }
+ }
+#pragma omp task depend(in : deps[0], deps[1])
{
- for (int j = 0; j < 20; j++) { }
}
- #pragma omp task depend(in: deps[0], deps[1])
- { }
}
}
}
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_19.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_19.cpp
index 24d5fdfcb..83ed4551e 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_19.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_19.cpp
@@ -2,31 +2,29 @@
#include <cstdio>
-int main()
-{
+int main() {
int deps[3];
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 10; i++)
- {
- #pragma omp taskgraph
+ for (int i = 0; i < 10; i++) {
+#pragma omp taskgraph
{
- #pragma omp task depend(out: deps[0])
+#pragma omp task depend(out : deps[0])
{
fprintf(stderr, "task 0\n");
}
- #pragma omp task depend(out: deps[1])
+#pragma omp task depend(out : deps[1])
{
fprintf(stderr, "task 1\n");
}
- #pragma omp task depend(out: deps[2])
+#pragma omp task depend(out : deps[2])
{
fprintf(stderr, "task 2\n");
}
- #pragma omp taskwait depend(in: deps[0], deps[1], deps[2])
- #pragma omp task depend(in: deps[0], deps[1], deps[2])
+#pragma omp taskwait depend(in : deps[0], deps[1], deps[2])
+#pragma omp task depend(in : deps[0], deps[1], deps[2])
{
fprintf(stderr, "task 3\n");
}
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_2.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_2.cpp
index 89dd9137e..00e63d54e 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_2.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_2.cpp
@@ -1,30 +1,36 @@
-// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck %s
+// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck
+// %s
-int main()
-{
+int main() {
int deps[4];
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 2; i++)
- {
- #pragma omp taskgraph
+ for (int i = 0; i < 2; i++) {
+#pragma omp taskgraph
{
- #pragma omp task depend(out: deps[0], deps[1])
- { }
- #pragma omp task depend(out: deps[2], deps[3])
- { }
- #pragma omp task depend(inout: deps[0])
- { }
- #pragma omp task depend(inout: deps[1])
- { }
- #pragma omp task depend(inout: deps[2])
- { }
- #pragma omp task depend(inout: deps[3])
- { }
- #pragma omp task depend(in: deps[0], deps[1], deps[2], deps[3])
- { }
+#pragma omp task depend(out : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(out : deps[2], deps[3])
+ {
+ }
+#pragma omp task depend(inout : deps[0])
+ {
+ }
+#pragma omp task depend(inout : deps[1])
+ {
+ }
+#pragma omp task depend(inout : deps[2])
+ {
+ }
+#pragma omp task depend(inout : deps[3])
+ {
+ }
+#pragma omp task depend(in : deps[0], deps[1], deps[2], deps[3])
+ {
+ }
}
}
}
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_20.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_20.cpp
index ab3b42995..4d5bc191f 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_20.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_20.cpp
@@ -2,31 +2,29 @@
#include <cstdio>
-int main()
-{
+int main() {
int deps[3];
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 10; i++)
- {
- #pragma omp taskgraph
+ for (int i = 0; i < 10; i++) {
+#pragma omp taskgraph
{
- #pragma omp task depend(out: deps[0])
+#pragma omp task depend(out : deps[0])
{
fprintf(stderr, "task 0\n");
}
- #pragma omp task depend(out: deps[1])
+#pragma omp task depend(out : deps[1])
{
fprintf(stderr, "task 1\n");
}
- #pragma omp task depend(out: deps[2])
+#pragma omp task depend(out : deps[2])
{
fprintf(stderr, "task 2\n");
}
- #pragma omp taskwait depend(inoutset: deps[0], deps[1])
- #pragma omp task depend(in: deps[0], deps[1], deps[2])
+#pragma omp taskwait depend(inoutset : deps[0], deps[1])
+#pragma omp task depend(in : deps[0], deps[1], deps[2])
{
fprintf(stderr, "task 3\n");
}
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_21.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_21.cpp
index ad36c8c5a..c6be2b0c0 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_21.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_21.cpp
@@ -2,8 +2,7 @@
#include <cstdio>
-int main()
-{
+int main() {
int arr[100];
int res = 0;
@@ -13,16 +12,15 @@ int main()
}
printf("base result: %d\n", res);
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 10; i++)
- {
+ for (int i = 0; i < 10; i++) {
int res = 0;
- #pragma omp taskgraph
+#pragma omp taskgraph
{
- #pragma omp taskloop reduction(+: res) num_tasks(10)
+#pragma omp taskloop reduction(+ : res) num_tasks(10)
{
for (int j = 0; j < 100; j++) {
res += arr[j];
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_22.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_22.cpp
index 254de8e35..38f5c6501 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_22.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_22.cpp
@@ -1,4 +1,5 @@
-// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t && %libomp-run 2>&1 | FileCheck %s
+// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t &&
+// %libomp-run 2>&1 | FileCheck %s
#include <cstdio>
@@ -9,8 +10,7 @@ void foo() {
}
}
-int main()
-{
+int main() {
int arr[100];
int res = 0;
@@ -20,16 +20,15 @@ int main()
}
fprintf(stderr, "base result: %d\n", res);
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 10; i++)
- {
+ for (int i = 0; i < 10; i++) {
int res = 0;
- #pragma omp taskgraph
+#pragma omp taskgraph
{
- #pragma omp taskloop reduction(+: res) num_tasks(10)
+#pragma omp taskloop reduction(+ : res) num_tasks(10)
{
for (int j = 0; j < 100; j++) {
res += arr[j];
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_23.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_23.cpp
index eb6930965..4750870c2 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_23.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_23.cpp
@@ -1,4 +1,5 @@
-// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t && %libomp-run 2>&1 | FileCheck %s
+// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t &&
+// %libomp-run 2>&1 | FileCheck %s
#include <cstdio>
@@ -11,8 +12,7 @@ void foo() {
}
}
-int main()
-{
+int main() {
int arr[100];
int res = 0;
@@ -22,16 +22,15 @@ int main()
}
fprintf(stderr, "base result: %d\n", res);
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 10; i++)
- {
+ for (int i = 0; i < 10; i++) {
int res = 0;
- #pragma omp taskgraph
+#pragma omp taskgraph
{
- #pragma omp taskloop reduction(+: res) num_tasks(10)
+#pragma omp taskloop reduction(+ : res) num_tasks(10)
{
for (int j = 0; j < 100; j++) {
res += arr[j];
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_24.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_24.cpp
index c974a0852..e03269876 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_24.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_24.cpp
@@ -1,15 +1,15 @@
-// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck %s
+// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t && env
+// KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck %s
#include <cassert>
int global_dep;
void foo() {
-#pragma omp taskwait replayable(1) depend(in: global_dep)
+#pragma omp taskwait replayable(1) depend(in : global_dep)
}
-int main()
-{
+int main() {
int arr[100];
int res = 0;
@@ -20,23 +20,23 @@ int main()
assert(res == 4950);
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 10; i++)
- {
+ for (int i = 0; i < 10; i++) {
int res = 0;
- #pragma omp taskgraph
+#pragma omp taskgraph
{
- #pragma omp taskloop reduction(+: res) num_tasks(10)
+#pragma omp taskloop reduction(+ : res) num_tasks(10)
{
for (int j = 0; j < 100; j++) {
res += arr[j];
}
}
- #pragma omp task depend(out: global_dep)
- { }
+#pragma omp task depend(out : global_dep)
+ {
+ }
foo();
}
assert(res == 4950);
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_25.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_25.cpp
index 0f2c3fbf9..f9d91f4ac 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_25.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_25.cpp
@@ -1,4 +1,5 @@
-// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t && %libomp-run 2>&1 | FileCheck %s
+// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t &&
+// %libomp-run 2>&1 | FileCheck %s
#include <cstdio>
@@ -6,14 +7,13 @@ int global_dep;
void foo() {
fprintf(stderr, "called function foo\n");
-#pragma omp task replayable(1) depend(in: global_dep)
+#pragma omp task replayable(1) depend(in : global_dep)
{
fprintf(stderr, "out-of-line task created from within taskloop\n");
}
}
-int main()
-{
+int main() {
int arr[100];
int res = 0;
@@ -23,16 +23,15 @@ int main()
}
fprintf(stderr, "base result: %d\n", res);
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 4; i++)
- {
+ for (int i = 0; i < 4; i++) {
int res = 0;
- #pragma omp taskgraph
+#pragma omp taskgraph
{
- #pragma omp taskloop reduction(+: res) num_tasks(4)
+#pragma omp taskloop reduction(+ : res) num_tasks(4)
{
for (int j = 0; j < 4; j++) {
res += arr[j];
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_26.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_26.cpp
index 86c69e813..c694545b8 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_26.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_26.cpp
@@ -1,9 +1,9 @@
-// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t && %libomp-run 2>&1 | FileCheck %s
+// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t &&
+// %libomp-run 2>&1 | FileCheck %s
#include <cstdio>
-int main()
-{
+int main() {
int arr[100];
int arr2[100];
@@ -16,22 +16,21 @@ int main()
}
fprintf(stderr, "base results: %d, %d\n", res, res2);
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 10; i++)
- {
+ for (int i = 0; i < 10; i++) {
int res = 0, res2 = 0;
- #pragma omp taskgraph
+#pragma omp taskgraph
{
- #pragma omp taskloop reduction(+: res) num_tasks(10)
+#pragma omp taskloop reduction(+ : res) num_tasks(10)
{
for (int j = 0; j < 10; j++) {
res += arr[j];
}
}
- #pragma omp taskloop reduction(+: res2) num_tasks(10)
+#pragma omp taskloop reduction(+ : res2) num_tasks(10)
{
for (int j = 0; j < 10; j++) {
res2 += arr2[j];
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_27.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_27.cpp
index 20b81b143..da9206383 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_27.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_27.cpp
@@ -1,9 +1,9 @@
-// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t && %libomp-run 2>&1 | FileCheck %s
+// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t &&
+// %libomp-run 2>&1 | FileCheck %s
#include <cstdio>
-int main()
-{
+int main() {
int arr[100];
int arr2[100];
@@ -18,22 +18,21 @@ int main()
}
fprintf(stderr, "base results: %d, %d\n", res, res2);
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 10; i++)
- {
+ for (int i = 0; i < 10; i++) {
int res = 0, res2 = 0;
- #pragma omp taskgraph
+#pragma omp taskgraph
{
- #pragma omp taskloop reduction(+: res) num_tasks(10)
+#pragma omp taskloop reduction(+ : res) num_tasks(10)
{
for (int j = 0; j < 10; j++) {
res += arr[j];
}
}
- #pragma omp taskloop reduction(+: res2) num_tasks(10)
+#pragma omp taskloop reduction(+ : res2) num_tasks(10)
{
for (int j = 0; j < 10; j++) {
res2 += res * arr2[j];
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_3.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_3.cpp
index 368677269..7fadf7426 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_3.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_3.cpp
@@ -1,34 +1,42 @@
-// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck %s
+// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck
+// %s
-int main()
-{
+int main() {
int deps[6];
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 2; i++)
- {
- #pragma omp taskgraph
+ for (int i = 0; i < 2; i++) {
+#pragma omp taskgraph
{
- #pragma omp task depend(out: deps[0], deps[1])
- { }
- #pragma omp task depend(out: deps[2], deps[3])
- { }
- #pragma omp task depend(inout: deps[0])
- { }
- #pragma omp task depend(inout: deps[1])
- { }
- #pragma omp task depend(inout: deps[2])
- { }
- #pragma omp task depend(inout: deps[3])
- { }
- #pragma omp task depend(in: deps[0], deps[1], deps[2], deps[3])
- { }
- #pragma omp task depend(in: deps[1], deps[2]) depend(out: deps[5])
- { }
- #pragma omp task depend(in: deps[5])
- { }
+#pragma omp task depend(out : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(out : deps[2], deps[3])
+ {
+ }
+#pragma omp task depend(inout : deps[0])
+ {
+ }
+#pragma omp task depend(inout : deps[1])
+ {
+ }
+#pragma omp task depend(inout : deps[2])
+ {
+ }
+#pragma omp task depend(inout : deps[3])
+ {
+ }
+#pragma omp task depend(in : deps[0], deps[1], deps[2], deps[3])
+ {
+ }
+#pragma omp task depend(in : deps[1], deps[2]) depend(out : deps[5])
+ {
+ }
+#pragma omp task depend(in : deps[5])
+ {
+ }
}
}
}
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_4.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_4.cpp
index a70fed484..6f12f7625 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_4.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_4.cpp
@@ -1,34 +1,42 @@
-// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck %s
+// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck
+// %s
-int main()
-{
+int main() {
int deps[4];
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 2; i++)
- {
- #pragma omp taskgraph
+ for (int i = 0; i < 2; i++) {
+#pragma omp taskgraph
{
- #pragma omp task depend(out: deps[0], deps[1])
- { }
- #pragma omp task depend(out: deps[2], deps[3])
- { }
- #pragma omp task depend(inout: deps[0])
- { }
- #pragma omp task depend(inout: deps[1])
- { }
- #pragma omp task depend(inout: deps[2])
- { }
- #pragma omp task depend(inout: deps[3])
- { }
- #pragma omp task depend(in: deps[0], deps[2], deps[3])
- { }
- #pragma omp task depend(in: deps[0], deps[1])
- { }
- #pragma omp task depend(in: deps[3])
- { }
+#pragma omp task depend(out : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(out : deps[2], deps[3])
+ {
+ }
+#pragma omp task depend(inout : deps[0])
+ {
+ }
+#pragma omp task depend(inout : deps[1])
+ {
+ }
+#pragma omp task depend(inout : deps[2])
+ {
+ }
+#pragma omp task depend(inout : deps[3])
+ {
+ }
+#pragma omp task depend(in : deps[0], deps[2], deps[3])
+ {
+ }
+#pragma omp task depend(in : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(in : deps[3])
+ {
+ }
}
}
}
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_5.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_5.cpp
index 636208245..ee2bccebe 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_5.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_5.cpp
@@ -1,32 +1,39 @@
-// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck %s
+// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck
+// %s
-int main()
-{
+int main() {
int deps[4];
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 2; i++)
- {
- #pragma omp taskgraph
+ for (int i = 0; i < 2; i++) {
+#pragma omp taskgraph
{
- #pragma omp task depend(out: deps[0], deps[2])
- { }
- #pragma omp task depend(out: deps[1], deps[3])
- { }
- #pragma omp task depend(inoutset: deps[0], deps[1])
- { }
- #pragma omp task depend(inoutset: deps[0], deps[1])
- { }
- #pragma omp task depend(inoutset: deps[2], deps[3])
- { }
- #pragma omp task depend(inoutset: deps[2], deps[3])
- { }
- #pragma omp task depend(in: deps[0], deps[1])
- { }
- #pragma omp task depend(in: deps[2], deps[3])
- { }
+#pragma omp task depend(out : deps[0], deps[2])
+ {
+ }
+#pragma omp task depend(out : deps[1], deps[3])
+ {
+ }
+#pragma omp task depend(inoutset : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(inoutset : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(inoutset : deps[2], deps[3])
+ {
+ }
+#pragma omp task depend(inoutset : deps[2], deps[3])
+ {
+ }
+#pragma omp task depend(in : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(in : deps[2], deps[3])
+ {
+ }
}
}
}
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_6.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_6.cpp
index 66e872f83..4ccea3584 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_6.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_6.cpp
@@ -1,32 +1,39 @@
-// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck %s
+// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck
+// %s
-int main()
-{
+int main() {
int deps[4];
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 2; i++)
- {
- #pragma omp taskgraph
+ for (int i = 0; i < 2; i++) {
+#pragma omp taskgraph
{
- #pragma omp task depend(out: deps[1], deps[3])
- { }
- #pragma omp task depend(out: deps[0], deps[2])
- { }
- #pragma omp task depend(inoutset: deps[0], deps[1])
- { }
- #pragma omp task depend(inoutset: deps[0], deps[1])
- { }
- #pragma omp task depend(inoutset: deps[2], deps[3])
- { }
- #pragma omp task depend(inoutset: deps[2], deps[3])
- { }
- #pragma omp task depend(in: deps[0], deps[2])
- { }
- #pragma omp task depend(in: deps[0], deps[2])
- { }
+#pragma omp task depend(out : deps[1], deps[3])
+ {
+ }
+#pragma omp task depend(out : deps[0], deps[2])
+ {
+ }
+#pragma omp task depend(inoutset : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(inoutset : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(inoutset : deps[2], deps[3])
+ {
+ }
+#pragma omp task depend(inoutset : deps[2], deps[3])
+ {
+ }
+#pragma omp task depend(in : deps[0], deps[2])
+ {
+ }
+#pragma omp task depend(in : deps[0], deps[2])
+ {
+ }
}
}
}
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_7.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_7.cpp
index c01d4a080..384206793 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_7.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_7.cpp
@@ -1,32 +1,39 @@
-// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck %s
+// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck
+// %s
-int main()
-{
+int main() {
int deps[4];
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 2; i++)
- {
- #pragma omp taskgraph
+ for (int i = 0; i < 2; i++) {
+#pragma omp taskgraph
{
- #pragma omp task depend(out: deps[0], deps[1])
- { }
- #pragma omp task depend(out: deps[2], deps[3])
- { }
- #pragma omp task depend(mutexinoutset: deps[0], deps[1])
- { }
- #pragma omp task depend(mutexinoutset: deps[0], deps[1])
- { }
- #pragma omp task depend(mutexinoutset: deps[2], deps[3])
- { }
- #pragma omp task depend(mutexinoutset: deps[2], deps[3])
- { }
- #pragma omp task depend(in: deps[0], deps[1])
- { }
- #pragma omp task depend(in: deps[2], deps[3])
- { }
+#pragma omp task depend(out : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(out : deps[2], deps[3])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[2], deps[3])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[2], deps[3])
+ {
+ }
+#pragma omp task depend(in : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(in : deps[2], deps[3])
+ {
+ }
}
}
}
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_8.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_8.cpp
index 179e8bb08..0dc4731cc 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_8.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_8.cpp
@@ -1,22 +1,24 @@
-// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck %s
+// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck
+// %s
-int main()
-{
+int main() {
int deps[2];
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 2; i++)
- {
- #pragma omp taskgraph
+ for (int i = 0; i < 2; i++) {
+#pragma omp taskgraph
{
- #pragma omp task depend(out: deps[0], deps[1])
- { }
- #pragma omp task depend(mutexinoutset: deps[0], deps[1])
- { }
- #pragma omp task depend(in: deps[0], deps[1])
- { }
+#pragma omp task depend(out : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(in : deps[0], deps[1])
+ {
+ }
}
}
}
diff --git a/openmp/runtime/test/taskgraph/taskgraph_deps_9.cpp b/openmp/runtime/test/taskgraph/taskgraph_deps_9.cpp
index 7e9af09db..fcd8019e0 100644
--- a/openmp/runtime/test/taskgraph/taskgraph_deps_9.cpp
+++ b/openmp/runtime/test/taskgraph/taskgraph_deps_9.cpp
@@ -1,26 +1,30 @@
-// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck %s
+// RUN: %libomp-cxx-compile && env KMP_G_DEBUG=10 %libomp-run 2>&1 | FileCheck
+// %s
-int main()
-{
+int main() {
int deps[3];
- #pragma omp parallel
+#pragma omp parallel
{
- #pragma omp single
+#pragma omp single
{
- for (int i = 0; i < 2; i++)
- {
- #pragma omp taskgraph
+ for (int i = 0; i < 2; i++) {
+#pragma omp taskgraph
{
- #pragma omp task depend(out: deps[0], deps[1])
- { }
- #pragma omp task depend(mutexinoutset: deps[0], deps[1])
- { }
- #pragma omp task depend(in: deps[1]) depend(out: deps[2])
- { }
- #pragma omp task depend(mutexinoutset: deps[0], deps[1])
- { }
- #pragma omp task depend(in: deps[0], deps[1], deps[2])
- { }
+#pragma omp task depend(out : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(in : deps[1]) depend(out : deps[2])
+ {
+ }
+#pragma omp task depend(mutexinoutset : deps[0], deps[1])
+ {
+ }
+#pragma omp task depend(in : deps[0], deps[1], deps[2])
+ {
+ }
}
}
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/188765
More information about the cfe-commits
mailing list