[llvm-branch-commits] [OpenMP] New taskgraph runtime implementation (PR #194047)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Apr 24 13:55:24 PDT 2026
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff origin/main HEAD --extensions cpp,h -- openmp/runtime/src/kmp.h openmp/runtime/src/kmp_debug.h openmp/runtime/src/kmp_global.cpp openmp/runtime/src/kmp_settings.cpp openmp/runtime/src/kmp_taskdeps.cpp openmp/runtime/src/kmp_taskdeps.h openmp/runtime/src/kmp_tasking.cpp --diff_from_common_commit
``````````
:warning:
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing `origin/main` to the base branch/commit you want to compare against.
:warning:
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 28bac7b26..8a735b876 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -4500,37 +4500,25 @@ KMP_EXPORT void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
uintptr_t hint);
#if OMP_TASKGRAPH_EXPERIMENTAL
KMP_EXPORT void __kmpc_taskgraph(ident_t *loc_ref, kmp_int32 gtid,
- std::atomic<void*> *tdg_handle,
+ std::atomic<void *> *tdg_handle,
kmp_uint32 graph_id, kmp_int32 graph_reset,
kmp_int32 nogroup, void (*entry)(void *),
void *args);
-KMP_EXPORT kmp_uint32 __kmpc_taskgraph_task(ident_t *loc_ref, kmp_int32 gtid,
- kmp_task_t *new_task,
- kmp_int32 flags,
- size_t sizeof_kmp_task_t,
- void* shareds,
- size_t sizeof_shareds,
- kmp_int32 ndeps,
- kmp_depend_info_t *dep_list);
-KMP_EXPORT kmp_uint32 __kmpc_taskgraph_taskloop(ident_t *loc_ref,
- kmp_int32 gtid,
- kmp_task_t *new_task,
- kmp_int32 flags,
- size_t sizeof_kmp_task_t,
- void *shareds,
- size_t sizeof_shareds,
- kmp_int32 if_val,
- kmp_uint64 *lb, kmp_uint64 *ub,
- kmp_int64 st, kmp_int32 nogroup,
- kmp_int32 sched,
- kmp_uint64 grainsize,
- kmp_int32 modifier,
- void *task_dup);
+KMP_EXPORT kmp_uint32 __kmpc_taskgraph_task(
+ ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 flags,
+ size_t sizeof_kmp_task_t, void *shareds, size_t sizeof_shareds,
+ kmp_int32 ndeps, kmp_depend_info_t *dep_list);
+KMP_EXPORT kmp_uint32 __kmpc_taskgraph_taskloop(
+ ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 flags,
+ size_t sizeof_kmp_task_t, void *shareds, size_t sizeof_shareds,
+ kmp_int32 if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
+ kmp_int32 nogroup, kmp_int32 sched, kmp_uint64 grainsize,
+ kmp_int32 modifier, void *task_dup);
KMP_EXPORT void __kmpc_taskgraph_taskwait(ident_t *loc_ref, kmp_int32 gtid,
kmp_int32 ndeps,
kmp_depend_info_t *dep_list,
kmp_int32 has_no_wait);
-KMP_EXPORT void* __kmpc_taskgraph_taskred_init(kmp_int32 gtid, kmp_int32 num,
+KMP_EXPORT void *__kmpc_taskgraph_taskred_init(kmp_int32 gtid, kmp_int32 num,
void *data);
#endif
/* Interface to fast scalable reduce methods routines */
diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index 90ab78e24..9d42202e1 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -183,7 +183,7 @@ static kmp_dephash_entry *__kmp_dephash_find(kmp_info_t *thread,
,
bool taskgraph_p
#endif
- ) {
+) {
kmp_dephash_t *h = *hash;
if (h->nelements != 0 && h->nconflicts / h->size >= 1) {
*hash = __kmp_dephash_extend(thread, h);
@@ -309,7 +309,7 @@ __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread,
if (!dep->dn.successors || dep->dn.successors->node != node) {
__kmp_track_dependence(gtid, dep, node, task);
dep->dn.successors =
- __kmp_add_node<true>(thread, dep->dn.successors, node);
+ __kmp_add_node<true>(thread, dep->dn.successors, node);
KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
"%p\n",
gtid, KMP_TASK_TO_TASKDATA(dep->dn.task),
@@ -338,7 +338,8 @@ static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid,
if (sink->dn.task) {
if (!sink->dn.successors || sink->dn.successors->node != source) {
__kmp_track_dependence(gtid, sink, source, task);
- sink->dn.successors = __kmp_add_node<true>(thread, sink->dn.successors, source);
+ sink->dn.successors =
+ __kmp_add_node<true>(thread, sink->dn.successors, source);
KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
"%p\n",
gtid, KMP_TASK_TO_TASKDATA(sink->dn.task),
@@ -352,21 +353,23 @@ static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid,
}
#if OMP_TASKGRAPH_EXPERIMENTAL
-kmp_taskgraph_region_dep_t *__kmp_region_deplist_add(kmp_info_t *thread,
- kmp_taskgraph_region_dep_t **recycled_deps, kmp_taskgraph_region_t *region,
- kmp_taskgraph_region_dep_t *list) {
+kmp_taskgraph_region_dep_t *__kmp_region_deplist_add(
+ kmp_info_t *thread, kmp_taskgraph_region_dep_t **recycled_deps,
+ kmp_taskgraph_region_t *region, kmp_taskgraph_region_dep_t *list) {
kmp_taskgraph_region_dep_t *head;
if (*recycled_deps) {
head = *recycled_deps;
*recycled_deps = (*recycled_deps)->next;
} else
- head = (kmp_taskgraph_region_dep_t *)__kmp_fast_allocate(thread, sizeof(kmp_taskgraph_region_dep_t));
+ head = (kmp_taskgraph_region_dep_t *)__kmp_fast_allocate(
+ thread, sizeof(kmp_taskgraph_region_dep_t));
head->region = region;
head->next = list;
return head;
}
-kmp_taskgraph_region_t *__kmp_region_worklist_reverse(kmp_taskgraph_region_t *list) {
+kmp_taskgraph_region_t *
+__kmp_region_worklist_reverse(kmp_taskgraph_region_t *list) {
kmp_taskgraph_region_t *last = nullptr;
while (list) {
kmp_taskgraph_region_t *next = list->next;
@@ -377,7 +380,8 @@ kmp_taskgraph_region_t *__kmp_region_worklist_reverse(kmp_taskgraph_region_t *li
return last;
}
-static kmp_depnode_t *__kmp_find_in_depnode_list(kmp_depnode_t *node, kmp_depnode_list_t *list) {
+static kmp_depnode_t *__kmp_find_in_depnode_list(kmp_depnode_t *node,
+ kmp_depnode_list_t *list) {
for (; list; list = list->next)
if (list->node == node)
return list->node;
@@ -392,38 +396,36 @@ typedef struct kmp_bitset {
kmp_size_t num_chunks;
} kmp_bitset_t;
-static kmp_bitset_t *
-__kmp_bitset_alloc(kmp_info_t *thread, kmp_size_t bitsize) {
+static kmp_bitset_t *__kmp_bitset_alloc(kmp_info_t *thread,
+ kmp_size_t bitsize) {
kmp_size_t bytesize = (bitsize + 7) / 8;
- kmp_size_t num_chunks = (bytesize + sizeof(kmp_uint64) - 1) / sizeof(kmp_uint64);
- kmp_bitset_t *bitset = (kmp_bitset_t *) __kmp_fast_allocate(thread, sizeof(kmp_bitset_t) + sizeof(kmp_uint64) * num_chunks);
- bitset->bits = (kmp_uint64*) &bitset[1];
+ kmp_size_t num_chunks =
+ (bytesize + sizeof(kmp_uint64) - 1) / sizeof(kmp_uint64);
+ kmp_bitset_t *bitset = (kmp_bitset_t *)__kmp_fast_allocate(
+ thread, sizeof(kmp_bitset_t) + sizeof(kmp_uint64) * num_chunks);
+ bitset->bits = (kmp_uint64 *)&bitset[1];
memset(bitset->bits, 0, sizeof(kmp_uint64) * num_chunks);
bitset->bitsize = bitsize;
bitset->num_chunks = num_chunks;
return bitset;
}
-static void
-__kmp_bitset_free(kmp_info_t *thread, kmp_bitset_t *bitset) {
+static void __kmp_bitset_free(kmp_info_t *thread, kmp_bitset_t *bitset) {
__kmp_fast_free(thread, bitset);
}
-static void
-__kmp_bitset_set(kmp_bitset_t *bitset, kmp_size_t bitnum) {
+static void __kmp_bitset_set(kmp_bitset_t *bitset, kmp_size_t bitnum) {
kmp_size_t chunk = bitnum / (8 * sizeof(kmp_uint64));
if (bitnum < bitset->bitsize)
bitset->bits[chunk] |= (kmp_uint64)1 << (bitnum & 63);
}
-static void
-__kmp_bitset_clearall(kmp_bitset_t *bitset) {
+static void __kmp_bitset_clearall(kmp_bitset_t *bitset) {
if (bitset)
memset(bitset->bits, 0, sizeof(kmp_int64) * bitset->num_chunks);
}
-static void
-__kmp_bitset_setall(kmp_bitset_t *bitset) {
+static void __kmp_bitset_setall(kmp_bitset_t *bitset) {
for (kmp_int32 chunk = 0; chunk < bitset->num_chunks - 1; chunk++)
bitset->bits[chunk] = ~(kmp_uint64)0;
kmp_int32 last_chunk_numbits = bitset->bitsize & 63;
@@ -433,8 +435,7 @@ __kmp_bitset_setall(kmp_bitset_t *bitset) {
}
}
-static void
-__kmp_bitset_copy(kmp_bitset_t *dst, const kmp_bitset_t *src) {
+static void __kmp_bitset_copy(kmp_bitset_t *dst, const kmp_bitset_t *src) {
assert(dst->num_chunks == src->num_chunks);
assert(dst->bitsize == src->bitsize);
memcpy(dst->bits, src->bits, sizeof(kmp_uint64) * dst->num_chunks);
@@ -442,8 +443,8 @@ __kmp_bitset_copy(kmp_bitset_t *dst, const kmp_bitset_t *src) {
/// Return TRUE if \c b is a subset of \c a.
-static bool
-__kmp_bitset_subset_p(const kmp_bitset_t *a, const kmp_bitset_t *b) {
+static bool __kmp_bitset_subset_p(const kmp_bitset_t *a,
+ const kmp_bitset_t *b) {
if (!b)
return true;
kmp_size_t chunk_max = std::max(a->num_chunks, b->num_chunks);
@@ -456,8 +457,8 @@ __kmp_bitset_subset_p(const kmp_bitset_t *a, const kmp_bitset_t *b) {
return true;
}
-static void
-__kmp_bitset_and(kmp_bitset_t *a, kmp_bitset_t *b, kmp_bitset_t *c) {
+static void __kmp_bitset_and(kmp_bitset_t *a, kmp_bitset_t *b,
+ kmp_bitset_t *c) {
kmp_size_t chunk_max = std::max(b->num_chunks, c->num_chunks);
for (kmp_size_t chunk = 0; chunk < chunk_max; chunk++) {
kmp_uint64 b_bits = chunk < b->num_chunks ? b->bits[chunk] : 0;
@@ -466,8 +467,8 @@ __kmp_bitset_and(kmp_bitset_t *a, kmp_bitset_t *b, kmp_bitset_t *c) {
}
}
-static void
-__kmp_bitset_and_not(kmp_bitset_t *a, kmp_bitset_t *b, kmp_bitset_t *c) {
+static void __kmp_bitset_and_not(kmp_bitset_t *a, kmp_bitset_t *b,
+ kmp_bitset_t *c) {
if (!c)
__kmp_bitset_copy(a, b);
else {
@@ -480,8 +481,7 @@ __kmp_bitset_and_not(kmp_bitset_t *a, kmp_bitset_t *b, kmp_bitset_t *c) {
}
}
-static void
-__kmp_bitset_or(kmp_bitset_t *a, kmp_bitset_t *b, kmp_bitset_t *c) {
+static void __kmp_bitset_or(kmp_bitset_t *a, kmp_bitset_t *b, kmp_bitset_t *c) {
if (!b && !c)
__kmp_bitset_clearall(a);
else if (!b)
@@ -498,8 +498,7 @@ __kmp_bitset_or(kmp_bitset_t *a, kmp_bitset_t *b, kmp_bitset_t *c) {
}
}
-static bool
-__kmp_bitset_empty_p(kmp_bitset_t *bitset) {
+static bool __kmp_bitset_empty_p(kmp_bitset_t *bitset) {
if (!bitset)
return true;
for (kmp_size_t chunk = 0; chunk < bitset->num_chunks; chunk++) {
@@ -512,8 +511,7 @@ __kmp_bitset_empty_p(kmp_bitset_t *bitset) {
/// Test two bitsets for equality. Note that any unused bits at the end of the
/// last chunk are kept as zero.
-static bool
-__kmp_bitset_equal(kmp_bitset_t *a, kmp_bitset_t *b) {
+static bool __kmp_bitset_equal(kmp_bitset_t *a, kmp_bitset_t *b) {
if (!b)
return __kmp_bitset_empty_p(a);
kmp_size_t chunk_max = std::max(a->num_chunks, b->num_chunks);
@@ -526,8 +524,7 @@ __kmp_bitset_equal(kmp_bitset_t *a, kmp_bitset_t *b) {
return true;
}
-static bool
-__kmp_bitset_intersect_p(kmp_bitset_t *a, kmp_bitset_t *b) {
+static bool __kmp_bitset_intersect_p(kmp_bitset_t *a, kmp_bitset_t *b) {
if (!a || !b)
return false;
kmp_size_t chunk_max = std::max(a->num_chunks, b->num_chunks);
@@ -540,8 +537,7 @@ __kmp_bitset_intersect_p(kmp_bitset_t *a, kmp_bitset_t *b) {
return false;
}
-static kmp_int32
-__kmp_bitset_popcount(kmp_bitset_t *bitset) {
+static kmp_int32 __kmp_bitset_popcount(kmp_bitset_t *bitset) {
if (!bitset)
return 0;
kmp_int32 accum = 0;
@@ -557,9 +553,10 @@ static kmp_int32 __kmp_taskgraph_add_dep(kmp_info_t *thread,
kmp_int32 npredecessors = 0;
for (; plist; plist = plist->next) {
kmp_depnode_t *dep = plist->node;
- if (!dep->dn.successors || !__kmp_find_in_depnode_list(node, dep->dn.successors)) {
+ if (!dep->dn.successors ||
+ !__kmp_find_in_depnode_list(node, dep->dn.successors)) {
dep->dn.successors =
- __kmp_add_node<false>(thread, dep->dn.successors, node);
+ __kmp_add_node<false>(thread, dep->dn.successors, node);
npredecessors++;
}
}
@@ -574,8 +571,8 @@ static kmp_int32 __kmp_taskgraph_add_dep(kmp_info_t *thread,
kmp_int32 npredecessors = 0;
if (!sink->dn.successors || sink->dn.successors->node != source) {
if (!__kmp_find_in_depnode_list(source, sink->dn.successors)) {
- sink->dn.successors = __kmp_add_node<false>(thread, sink->dn.successors,
- source);
+ sink->dn.successors =
+ __kmp_add_node<false>(thread, sink->dn.successors, source);
npredecessors++;
}
}
@@ -583,13 +580,13 @@ static kmp_int32 __kmp_taskgraph_add_dep(kmp_info_t *thread,
}
#endif
-template<typename T>
+template <typename T>
static inline kmp_int32
__kmp_process_dep_all(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *h,
bool dep_barrier, kmp_task_t *task) {
KA_TRACE(30, ("__kmp_process_dep_all<%s>: T#%d processing dep_all, "
- "dep_barrier = %d\n", T::name,
- gtid, dep_barrier));
+ "dep_barrier = %d\n",
+ T::name, gtid, dep_barrier));
kmp_info_t *thread = __kmp_threads[gtid];
kmp_int32 npredecessors = 0;
@@ -634,19 +631,19 @@ __kmp_process_dep_all(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *h,
}
}
KA_TRACE(30, ("__kmp_process_dep_all<%s>: T#%d found %d predecessors\n",
- T::name, gtid, npredecessors));
+ T::name, gtid, npredecessors));
return npredecessors;
}
-template<typename T>
+template <typename T>
static inline kmp_int32
__kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
bool dep_barrier, kmp_int32 ndeps,
kmp_depend_info_t *dep_list, kmp_task_t *task,
kmp_int32 &next_mutex_set, bool filter = true) {
KA_TRACE(30, ("__kmp_process_deps<%s>: T#%d processing %d dependences : "
- "dep_barrier = %d, filter = %d\n", T::name,
- gtid, ndeps, dep_barrier, filter));
+ "dep_barrier = %d, filter = %d\n",
+ T::name, gtid, ndeps, dep_barrier, filter));
kmp_info_t *thread = __kmp_threads[gtid];
kmp_int32 npredecessors = 0;
@@ -730,8 +727,9 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
}
}
}
- KA_TRACE(30, ("__kmp_process_deps<%s>: T#%d found %d predecessors (filter: %d)\n",
- T::name, gtid, npredecessors, filter));
+ KA_TRACE(30,
+ ("__kmp_process_deps<%s>: T#%d found %d predecessors (filter: %d)\n",
+ T::name, gtid, npredecessors, filter));
return npredecessors;
}
@@ -807,7 +805,7 @@ struct taskgraph_deps {
kmp_task_t *task, kmp_depnode_t *node,
kmp_depnode_list_t *plist);
static kmp_depnode_t *ref(kmp_depnode_t *node) { return node; }
- static void deref(kmp_info_t *thread, kmp_depnode_t *node) { }
+ static void deref(kmp_info_t *thread, kmp_depnode_t *node) {}
static void mutex_dep(kmp_info_t *thread, kmp_dephash_entry_t *info,
kmp_depnode_t *node, kmp_int32 &next_mutex_set);
};
@@ -897,15 +895,13 @@ static size_t __kmp_round_up_to_val(size_t size, size_t val) {
} // __kmp_round_up_to_val
// FIXME: C++-ify this.
-static kmp_taskgraph_region_t *
-__kmp_taskgraph_region_alloc(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t **&alloc_chain,
- kmp_taskgraph_node_t *node,
- kmp_taskgraph_region_t *parent) {
+static kmp_taskgraph_region_t *__kmp_taskgraph_region_alloc(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t **&alloc_chain, kmp_taskgraph_node_t *node,
+ kmp_taskgraph_region_t *parent) {
kmp_taskgraph_region_t *region =
- (kmp_taskgraph_region_t *)__kmp_fast_allocate(thread,
- sizeof(kmp_taskgraph_region_t));
+ (kmp_taskgraph_region_t *)__kmp_fast_allocate(
+ thread, sizeof(kmp_taskgraph_region_t));
region->owner = taskgraph;
region->type = node ? TASKGRAPH_REGION_NODE : TASKGRAPH_REGION_WAIT;
region->task.node = node;
@@ -925,22 +921,18 @@ __kmp_taskgraph_region_alloc(kmp_info_t *thread,
}
// FIXME: This too.
-static kmp_taskgraph_region_t *
-__kmp_taskgraph_region_alloc(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t **&alloc_chain,
- enum kmp_taskgraph_region_type type,
- kmp_int32 num_nodes,
- kmp_taskgraph_region_t *parent) {
- kmp_size_t size =
- sizeof(kmp_taskgraph_region_t) +
- num_nodes * sizeof(kmp_taskgraph_region_t *);
+static kmp_taskgraph_region_t *__kmp_taskgraph_region_alloc(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t **&alloc_chain, enum kmp_taskgraph_region_type type,
+ kmp_int32 num_nodes, kmp_taskgraph_region_t *parent) {
+ kmp_size_t size = sizeof(kmp_taskgraph_region_t) +
+ num_nodes * sizeof(kmp_taskgraph_region_t *);
size = __kmp_round_up_to_val(size, sizeof(kmp_taskgraph_region_t *));
kmp_taskgraph_region_t *region =
- (kmp_taskgraph_region_t *)__kmp_fast_allocate(thread, size);
+ (kmp_taskgraph_region_t *)__kmp_fast_allocate(thread, size);
region->owner = taskgraph;
region->type = type;
- region->inner.children = (kmp_taskgraph_region**)®ion[1];
+ region->inner.children = (kmp_taskgraph_region **)®ion[1];
region->inner.num_children = num_nodes;
region->mark = TASKGRAPH_UNMARKED;
region->level = -1;
@@ -957,40 +949,36 @@ __kmp_taskgraph_region_alloc(kmp_info_t *thread,
return region;
}
-// This makes a mostly-deep copy of a region. The region itself and children nodes are
-// created new, but node pointers are shared.
-static kmp_taskgraph_region_t *
-__kmp_taskgraph_region_clone(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t **&alloc_chain,
- kmp_taskgraph_region_t *from,
- kmp_taskgraph_region_t *parent,
- kmp_int32 indent = 0) {
+// This makes a mostly-deep copy of a region. The region itself and children
+// nodes are created new, but node pointers are shared.
+static kmp_taskgraph_region_t *__kmp_taskgraph_region_clone(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t **&alloc_chain, kmp_taskgraph_region_t *from,
+ kmp_taskgraph_region_t *parent, kmp_int32 indent = 0) {
kmp_taskgraph_region_t *clone = nullptr;
switch (from->type) {
- case TASKGRAPH_REGION_ENTRY:
- case TASKGRAPH_REGION_EXIT:
- clone = __kmp_taskgraph_region_alloc(thread, taskgraph, alloc_chain,
- nullptr, parent);
- clone->type = from->type;
- break;
- case TASKGRAPH_REGION_NODE:
- case TASKGRAPH_REGION_WAIT:
- clone = __kmp_taskgraph_region_alloc(thread, taskgraph, alloc_chain,
- from->task.node, parent);
- break;
- default: {
- clone = __kmp_taskgraph_region_alloc(thread, taskgraph, alloc_chain,
- from->type, from->inner.num_children,
- parent);
- for (kmp_int32 n = 0; n < from->inner.num_children; n++) {
- clone->inner.children[n] =
- __kmp_taskgraph_region_clone(thread, taskgraph, alloc_chain,
- from->inner.children[n], clone,
- indent + 2);
- }
+ case TASKGRAPH_REGION_ENTRY:
+ case TASKGRAPH_REGION_EXIT:
+ clone = __kmp_taskgraph_region_alloc(thread, taskgraph, alloc_chain,
+ nullptr, parent);
+ clone->type = from->type;
+ break;
+ case TASKGRAPH_REGION_NODE:
+ case TASKGRAPH_REGION_WAIT:
+ clone = __kmp_taskgraph_region_alloc(thread, taskgraph, alloc_chain,
+ from->task.node, parent);
+ break;
+ default: {
+ clone =
+ __kmp_taskgraph_region_alloc(thread, taskgraph, alloc_chain, from->type,
+ from->inner.num_children, parent);
+ for (kmp_int32 n = 0; n < from->inner.num_children; n++) {
+ clone->inner.children[n] = __kmp_taskgraph_region_clone(
+ thread, taskgraph, alloc_chain, from->inner.children[n], clone,
+ indent + 2);
}
}
+ }
TGDBG("%*scloned region %p from region %p\n", indent, "", clone, from);
return clone;
}
@@ -1007,11 +995,9 @@ __kmp_taskgraph_topological_order(kmp_taskgraph_region_t *region,
region->mark = TASKGRAPH_TEMP_MARK;
kmp_int32 max_level = -1;
- for (kmp_taskgraph_region_dep_t *s = region->predecessors;
- s;
- s = s->next) {
+ for (kmp_taskgraph_region_dep_t *s = region->predecessors; s; s = s->next) {
kmp_int32 pred_level =
- __kmp_taskgraph_topological_order(s->region, order_out, outidx);
+ __kmp_taskgraph_topological_order(s->region, order_out, outidx);
max_level = pred_level > max_level ? pred_level : max_level;
}
@@ -1058,9 +1044,8 @@ static kmp_int32 __kmp_region_deplist_len(kmp_taskgraph_region_dep_t *list) {
return len;
}
-static void
-__kmp_region_deplist_free(kmp_info_t *thread,
- kmp_taskgraph_region_dep_t *list) {
+static void __kmp_region_deplist_free(kmp_info_t *thread,
+ kmp_taskgraph_region_dep_t *list) {
while (list) {
kmp_taskgraph_region_dep_t *next = list->next;
__kmp_fast_free(thread, list);
@@ -1083,13 +1068,10 @@ static void __kmp_region_deplist_recycle(kmp_taskgraph_region_dep_t **recycled,
}
}
-static bool
-__kmp_taskgraph_collapse_sequence(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t **&alloc_chain,
- kmp_taskgraph_region_t **region_p,
- kmp_taskgraph_region_t *parent,
- kmp_int32 &stamp) {
+static bool __kmp_taskgraph_collapse_sequence(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t **&alloc_chain, kmp_taskgraph_region_t **region_p,
+ kmp_taskgraph_region_t *parent, kmp_int32 &stamp) {
kmp_taskgraph_region_t *region = *region_p;
kmp_taskgraph_region_t *chain_start = region;
kmp_taskgraph_region_t *chain_end = region;
@@ -1114,10 +1096,9 @@ __kmp_taskgraph_collapse_sequence(kmp_info_t *thread,
if (chain_len <= 1)
return false;
- kmp_taskgraph_region_t *seq_region =
- __kmp_taskgraph_region_alloc(thread, taskgraph, alloc_chain,
- TASKGRAPH_REGION_SEQUENTIAL, chain_len,
- parent);
+ kmp_taskgraph_region_t *seq_region = __kmp_taskgraph_region_alloc(
+ thread, taskgraph, alloc_chain, TASKGRAPH_REGION_SEQUENTIAL, chain_len,
+ parent);
TGDBG("allocated new seq region: %p (length %d)\n", seq_region, chain_len);
kmp_taskgraph_region_t **worklist_p = region_p;
*worklist_p = seq_region;
@@ -1141,7 +1122,7 @@ __kmp_taskgraph_collapse_sequence(kmp_info_t *thread,
seq_region->level = level;
seq_region->predecessors = seq_region->inner.children[0]->predecessors;
seq_region->successors =
- seq_region->inner.children[chain_len - 1]->successors;
+ seq_region->inner.children[chain_len - 1]->successors;
seq_region->inner.children[0]->predecessors = nullptr;
seq_region->inner.children[chain_len - 1]->successors = nullptr;
@@ -1170,21 +1151,20 @@ __kmp_taskgraph_collapse_sequence(kmp_info_t *thread,
return true;
}
-static const char*
+static const char *
__kmp_taskgraph_region_type_name(kmp_taskgraph_region_type type);
-static void
-__kmp_taskgraph_region_dfs(kmp_taskgraph_region_t *region,
- kmp_taskgraph_region_t **order,
- kmp_int32 &idx, bool use_preds) {
+static void __kmp_taskgraph_region_dfs(kmp_taskgraph_region_t *region,
+ kmp_taskgraph_region_t **order,
+ kmp_int32 &idx, bool use_preds) {
if (order) {
region->timestamp = --idx;
order[idx] = region;
}
region->mark = TASKGRAPH_TEMP_MARK;
for (kmp_taskgraph_region_dep_t *reg = use_preds ? region->predecessors
- : region->successors; reg;
- reg = reg->next) {
+ : region->successors;
+ reg; reg = reg->next) {
if (reg->region->mark == TASKGRAPH_UNMARKED)
__kmp_taskgraph_region_dfs(reg->region, order, idx, use_preds);
}
@@ -1192,12 +1172,10 @@ __kmp_taskgraph_region_dfs(kmp_taskgraph_region_t *region,
#if defined(DEBUG_TASKGRAPH) && defined(CHECK_WORKLIST)
-static void
-__kmp_taskgraph_region_gather_deps(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t *region,
- kmp_taskgraph_region_dep_t **deplist,
- bool &ok) {
+static void __kmp_taskgraph_region_gather_deps(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t *region, kmp_taskgraph_region_dep_t **deplist,
+ bool &ok) {
for (kmp_taskgraph_region_dep_t *dep = *deplist; dep; dep = dep->next) {
if (dep->region == region)
return;
@@ -1207,14 +1185,14 @@ __kmp_taskgraph_region_gather_deps(kmp_info_t *thread,
*deplist);
for (kmp_taskgraph_region_dep_t *pred = region->predecessors; pred;
- pred = pred->next) {
+ pred = pred->next) {
if (pred->region->mark == TASKGRAPH_DELETED) {
fprintf(stderr, "*** Region %p's predecessor %p is a deleted node\n",
region, pred->region);
ok = false;
}
- __kmp_taskgraph_region_gather_deps(thread, taskgraph, pred->region,
- deplist, ok);
+ __kmp_taskgraph_region_gather_deps(thread, taskgraph, pred->region, deplist,
+ ok);
}
for (kmp_taskgraph_region_dep_t *succ = region->successors; succ;
@@ -1224,16 +1202,14 @@ __kmp_taskgraph_region_gather_deps(kmp_info_t *thread,
region, succ->region);
ok = false;
}
- __kmp_taskgraph_region_gather_deps(thread, taskgraph, succ->region,
- deplist, ok);
+ __kmp_taskgraph_region_gather_deps(thread, taskgraph, succ->region, deplist,
+ ok);
}
}
-static bool
-__kmp_taskgraph_region_worklist_check(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t *region,
- const char *where) {
+static bool __kmp_taskgraph_region_worklist_check(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t *region, const char *where) {
kmp_taskgraph_region_dep_t *collected_nodes = nullptr;
bool ok = true;
__kmp_taskgraph_region_gather_deps(thread, taskgraph, region,
@@ -1266,8 +1242,8 @@ __kmp_taskgraph_region_worklist_check(kmp_info_t *thread,
}
if (!in_list) {
fprintf(stderr,
- "*** Region %p is in worklist but not dependency graph (%s)\n",
- r, where);
+ "*** Region %p is in worklist but not dependency graph (%s)\n", r,
+ where);
ok = false;
}
}
@@ -1277,20 +1253,16 @@ __kmp_taskgraph_region_worklist_check(kmp_info_t *thread,
return ok;
}
#else
-static bool
-__kmp_taskgraph_region_worklist_check(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t *region,
- const char *where) {
+static bool __kmp_taskgraph_region_worklist_check(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t *region, const char *where) {
return true;
}
#endif
-static kmp_taskgraph_region_t *
-__kmp_taskgraph_region_dom_intersect(kmp_taskgraph_region_t **order,
- kmp_taskgraph_region_t **doms,
- kmp_taskgraph_region_t *b1,
- kmp_taskgraph_region_t *b2) {
+static kmp_taskgraph_region_t *__kmp_taskgraph_region_dom_intersect(
+ kmp_taskgraph_region_t **order, kmp_taskgraph_region_t **doms,
+ kmp_taskgraph_region_t *b1, kmp_taskgraph_region_t *b2) {
kmp_int32 finger1 = b1->timestamp;
kmp_int32 finger2 = b2->timestamp;
while (finger1 != finger2) {
@@ -1302,10 +1274,10 @@ __kmp_taskgraph_region_dom_intersect(kmp_taskgraph_region_t **order,
return order[finger1];
}
-static void
-__kmp_taskgraph_region_doms(kmp_taskgraph_region_t **order,
- kmp_taskgraph_region_t **doms,
- kmp_int32 worklist_length, bool postdom) {
+static void __kmp_taskgraph_region_doms(kmp_taskgraph_region_t **order,
+ kmp_taskgraph_region_t **doms,
+ kmp_int32 worklist_length,
+ bool postdom) {
bool changed = true;
// Set doms[start_node] <- start_node
doms[worklist_length - 1] = order[worklist_length - 1];
@@ -1316,22 +1288,21 @@ __kmp_taskgraph_region_doms(kmp_taskgraph_region_t **order,
kmp_taskgraph_region_t *b = order[n];
kmp_taskgraph_region_t *new_idom = nullptr;
for (kmp_taskgraph_region_dep_t *pred = postdom ? b->successors
- : b->predecessors; pred;
- pred = pred->next) {
+ : b->predecessors;
+ pred; pred = pred->next) {
if (pred->region->mark == TASKGRAPH_PERMANENT_MARK) {
new_idom = pred->region;
break;
}
}
for (kmp_taskgraph_region_dep_t *pred = postdom ? b->successors
- : b->predecessors; pred;
- pred = pred->next) {
+ : b->predecessors;
+ pred; pred = pred->next) {
if (pred->region == new_idom)
continue;
if (doms[pred->region->timestamp]) {
- new_idom =
- __kmp_taskgraph_region_dom_intersect(order, doms, pred->region,
- new_idom);
+ new_idom = __kmp_taskgraph_region_dom_intersect(
+ order, doms, pred->region, new_idom);
}
}
if (doms[b->timestamp] != new_idom) {
@@ -1343,8 +1314,7 @@ __kmp_taskgraph_region_doms(kmp_taskgraph_region_t **order,
}
}
-static bool
-__kmp_taskgraph_region_mutex_p(kmp_taskgraph_region_t *reg) {
+static bool __kmp_taskgraph_region_mutex_p(kmp_taskgraph_region_t *reg) {
if (reg->type == TASKGRAPH_REGION_NODE)
return reg->mutexset != nullptr;
return false;
@@ -1369,19 +1339,16 @@ __kmp_taskgraph_region_mutex_p(kmp_taskgraph_region_t *reg) {
// We choose the pp the the highest level ("furthest down the graph"), and
// collapse the subgraph into a parallel region.
-static bool
-__kmp_taskgraph_collapse_par_exclusive(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t **&alloc_chain,
- kmp_taskgraph_region_t **region_p,
- kmp_taskgraph_region_t *parent,
- kmp_int32 &stamp) {
+static bool __kmp_taskgraph_collapse_par_exclusive(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t **&alloc_chain, kmp_taskgraph_region_t **region_p,
+ kmp_taskgraph_region_t *parent, kmp_int32 &stamp) {
kmp_taskgraph_region_t *region = *region_p;
kmp_int32 num_predecessors = __kmp_region_deplist_len(region->predecessors);
TGDBG("predecessors %d, successors %d\n",
- __kmp_region_deplist_len(region->predecessors),
- __kmp_region_deplist_len(region->successors));
+ __kmp_region_deplist_len(region->predecessors),
+ __kmp_region_deplist_len(region->successors));
if (num_predecessors <= 1)
return false;
@@ -1391,7 +1358,7 @@ __kmp_taskgraph_collapse_par_exclusive(kmp_info_t *thread,
kmp_int32 highest_level = -1;
for (kmp_taskgraph_region_dep_t *pred = region->predecessors; pred;
- pred = pred->next) {
+ pred = pred->next) {
TGDBG("consider predecessor: %p\n", pred->region);
TGDBG("-- successors %d, predecessors %d\n",
__kmp_region_deplist_len(pred->region->successors),
@@ -1405,7 +1372,8 @@ __kmp_taskgraph_collapse_par_exclusive(kmp_info_t *thread,
continue;
bool in_list = false;
TGDBG("pp region: %p (%s)\n", pred_region->predecessors->region,
- __kmp_taskgraph_region_type_name(pred_region->predecessors->region->type));
+ __kmp_taskgraph_region_type_name(
+ pred_region->predecessors->region->type));
kmp_taskgraph_region_t *pp_region = pred_region->predecessors->region;
for (kmp_taskgraph_region_dep_t *pp = pred_preds; pp; pp = pp->next) {
if (pp->region == pp_region) {
@@ -1460,10 +1428,9 @@ __kmp_taskgraph_collapse_par_exclusive(kmp_info_t *thread,
if (preds_for_pp < 2)
continue;
kmp_taskgraph_region_type region_type =
- any_mutex_p ? TASKGRAPH_REGION_EXCLUSIVE : TASKGRAPH_REGION_PARALLEL;
- kmp_taskgraph_region_t *par_region =
- __kmp_taskgraph_region_alloc(thread, taskgraph, alloc_chain, region_type,
- preds_for_pp, parent);
+ any_mutex_p ? TASKGRAPH_REGION_EXCLUSIVE : TASKGRAPH_REGION_PARALLEL;
+ kmp_taskgraph_region_t *par_region = __kmp_taskgraph_region_alloc(
+ thread, taskgraph, alloc_chain, region_type, preds_for_pp, parent);
changed = true;
TGDBG("allocated %s region: %p\n",
region_type == TASKGRAPH_REGION_EXCLUSIVE ? "exclusive" : "parallel",
@@ -1487,7 +1454,7 @@ __kmp_taskgraph_collapse_par_exclusive(kmp_info_t *thread,
TGDBG("bailing (non-unit pred/succ list length)\n");
continue;
}
- TGDBG("process region %p (%d/%d), level %d\n", pred->region, i+1,
+ TGDBG("process region %p (%d/%d), level %d\n", pred->region, i + 1,
preds_for_pp, pred_region->level);
par_region->inner.children[i] = pred_region;
pred_region->mark = TASKGRAPH_COMBINED;
@@ -1526,8 +1493,7 @@ __kmp_taskgraph_collapse_par_exclusive(kmp_info_t *thread,
par_region->predecessors = par_preds;
par_region->successors = par_succs;
- if (region->type == TASKGRAPH_REGION_WAIT &&
- !found_reduction_data) {
+ if (region->type == TASKGRAPH_REGION_WAIT && !found_reduction_data) {
// If we have no reduction data, we will not create a taskgroup for this
// parallel region at replay time, so we don't need to terminate/discard
// that region when we're done. Clear the taskloop_task flag.
@@ -1599,8 +1565,8 @@ __kmp_taskgraph_collapse_par_exclusive(kmp_info_t *thread,
return changed;
}
-static void
-__kmp_taskgraph_region_dot(kmp_taskgraph_region_t *region, const char *name) {
+static void __kmp_taskgraph_region_dot(kmp_taskgraph_region_t *region,
+ const char *name) {
fprintf(stderr, "digraph %s {\n", name);
for (kmp_taskgraph_region_t *r = region; r; r = r->next) {
if (r->mark == TASKGRAPH_DELETED) {
@@ -1697,16 +1663,13 @@ __kmp_taskgraph_count_edges_to_dominator(kmp_taskgraph_region_t *reg,
// critical point is what it means to clone a task node in this way: that is
// discussed in the commentary of __kmp_taskgraph_rewrite_irreducible.
-static void
-__kmp_taskgraph_clone_subgraph(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t **&alloc_chain,
- kmp_taskgraph_region_t *cloned_nodes[],
- kmp_taskgraph_region_t *orig_region,
- kmp_taskgraph_region_t *doms[],
- kmp_taskgraph_region_dep_t *preds_with_dom,
- kmp_taskgraph_region_t *region_dom,
- kmp_taskgraph_region_t ***added_worklist) {
+static void __kmp_taskgraph_clone_subgraph(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t **&alloc_chain,
+ kmp_taskgraph_region_t *cloned_nodes[], kmp_taskgraph_region_t *orig_region,
+ kmp_taskgraph_region_t *doms[], kmp_taskgraph_region_dep_t *preds_with_dom,
+ kmp_taskgraph_region_t *region_dom,
+ kmp_taskgraph_region_t ***added_worklist) {
for (kmp_taskgraph_region_dep_t *pred = preds_with_dom; pred;
pred = pred->next) {
kmp_taskgraph_region_t *pred_region = pred->region;
@@ -1720,9 +1683,8 @@ __kmp_taskgraph_clone_subgraph(kmp_info_t *thread,
pred->region = cloned_nodes[pred_region->timestamp];
continue;
}
- kmp_taskgraph_region_t *cloned_region =
- __kmp_taskgraph_region_clone(thread, taskgraph, alloc_chain,
- pred_region, nullptr);
+ kmp_taskgraph_region_t *cloned_region = __kmp_taskgraph_region_clone(
+ thread, taskgraph, alloc_chain, pred_region, nullptr);
cloned_nodes[pred_region->timestamp] = cloned_region;
**added_worklist = cloned_region;
@@ -1733,9 +1695,8 @@ __kmp_taskgraph_clone_subgraph(kmp_info_t *thread,
kmp_taskgraph_region_dep_t *cloned_preds = nullptr;
for (kmp_taskgraph_region_dep_t *p = pred_region->predecessors; p;
p = p->next) {
- cloned_preds =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
- p->region, cloned_preds);
+ cloned_preds = __kmp_region_deplist_add(
+ thread, &taskgraph->recycled_deps, p->region, cloned_preds);
}
cloned_region->predecessors = cloned_preds;
// Note pred_region is the original predecessor region here, not the
@@ -1805,12 +1766,10 @@ __kmp_taskgraph_clone_subgraph(kmp_info_t *thread,
//
// For host execution, this is handled by __kmp_exec_descr_link_instances, etc.
-static bool
-__kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t **alloc_chain,
- kmp_taskgraph_region_t **region_p,
- kmp_taskgraph_region_t *exitregion) {
+static bool __kmp_taskgraph_rewrite_irreducible(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t **alloc_chain, kmp_taskgraph_region_t **region_p,
+ kmp_taskgraph_region_t *exitregion) {
kmp_taskgraph_region_t *entryregion = *region_p;
bool changed = false;
@@ -1831,11 +1790,11 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
#endif
kmp_taskgraph_region_t **order =
- (kmp_taskgraph_region_t **)__kmp_fast_allocate(thread,
- worklist_length * sizeof(kmp_taskgraph_region_t *));
+ (kmp_taskgraph_region_t **)__kmp_fast_allocate(
+ thread, worklist_length * sizeof(kmp_taskgraph_region_t *));
kmp_taskgraph_region_t **doms =
- (kmp_taskgraph_region_t **)__kmp_fast_allocate(thread,
- worklist_length * sizeof(kmp_taskgraph_region_t *));
+ (kmp_taskgraph_region_t **)__kmp_fast_allocate(
+ thread, worklist_length * sizeof(kmp_taskgraph_region_t *));
memset(doms, 0, worklist_length * sizeof(kmp_taskgraph_region_t *));
kmp_int32 cursor = worklist_length;
assert(entryregion->type == TASKGRAPH_REGION_ENTRY);
@@ -1899,8 +1858,8 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
if (passes_pred) {
// We can drop this predecessor.
- TGDBG("dropping pred %p from region %p, dom %p\n",
- pred->region, region, doms[pred->region->timestamp]);
+ TGDBG("dropping pred %p from region %p, dom %p\n", pred->region, region,
+ doms[pred->region->timestamp]);
kmp_taskgraph_region_dep_t *next = pred->next;
kmp_taskgraph_region_dep_t **succp = &pred->region->successors;
while (*succp) {
@@ -1945,7 +1904,7 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
kmp_taskgraph_region_t *this_dom = doms[pred_region->timestamp];
#ifdef DEBUG_TASKGRAPH
kmp_int32 edges_to_dom =
- __kmp_taskgraph_count_edges_to_dominator(pred_region, this_dom);
+ __kmp_taskgraph_count_edges_to_dominator(pred_region, this_dom);
TGDBG("this pred: %p, edges_to_dom=%d\n", pred_region, edges_to_dom);
#endif
bool found = false;
@@ -1967,10 +1926,10 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
TGDBG("region %p: all predecessors have a single dominator\n", region);
if (!pred_bitsets) {
- pred_bitsets = (kmp_bitset_t **) __kmp_fast_allocate(thread,
- sizeof(kmp_bitset_t *) * worklist_length);
- succ_bitsets = (kmp_bitset_t **) __kmp_fast_allocate(thread,
- sizeof(kmp_bitset_t *) * worklist_length);
+ pred_bitsets = (kmp_bitset_t **)__kmp_fast_allocate(
+ thread, sizeof(kmp_bitset_t *) * worklist_length);
+ succ_bitsets = (kmp_bitset_t **)__kmp_fast_allocate(
+ thread, sizeof(kmp_bitset_t *) * worklist_length);
for (kmp_int32 i = 0; i < worklist_length; i++) {
pred_bitsets[i] = __kmp_bitset_alloc(thread, worklist_length);
@@ -2005,21 +1964,19 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
TGDBG("regions %p and %p share all predecessors/successors\n",
order[i], order[j]);
same_preds_and_succs++;
- equal_deps_chain =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
- order[j], equal_deps_chain);
+ equal_deps_chain = __kmp_region_deplist_add(
+ thread, &taskgraph->recycled_deps, order[j], equal_deps_chain);
if (__kmp_taskgraph_region_mutex_p(order[j]))
any_mutex_p = true;
}
}
if (same_preds_and_succs > 1) {
kmp_taskgraph_region_type region_type =
- any_mutex_p ? TASKGRAPH_REGION_EXCLUSIVE
- : TASKGRAPH_REGION_PARALLEL;
- kmp_taskgraph_region_t *par_region =
- __kmp_taskgraph_region_alloc(thread, taskgraph, alloc_chain,
- region_type, same_preds_and_succs,
- nullptr);
+ any_mutex_p ? TASKGRAPH_REGION_EXCLUSIVE
+ : TASKGRAPH_REGION_PARALLEL;
+ kmp_taskgraph_region_t *par_region = __kmp_taskgraph_region_alloc(
+ thread, taskgraph, alloc_chain, region_type, same_preds_and_succs,
+ nullptr);
par_region->inner.children[0] = region;
region->mark = TASKGRAPH_COMBINED;
region->parent = par_region;
@@ -2033,7 +1990,7 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
equal_deps_chain = next;
}
par_region->predecessors =
- par_region->inner.children[0]->predecessors;
+ par_region->inner.children[0]->predecessors;
par_region->inner.children[0]->predecessors = nullptr;
par_region->successors = par_region->inner.children[0]->successors;
par_region->inner.children[0]->successors = nullptr;
@@ -2104,11 +2061,10 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
if (regions_combined_p)
continue;
- assert (num_groups >= 1);
+ assert(num_groups >= 1);
TGDBG("should split region %p (%d)\n", region, region->timestamp);
- TGDBG("clone graph to dominator: %p (%d, %s)\n",
- doms[region->timestamp],
+ TGDBG("clone graph to dominator: %p (%d, %s)\n", doms[region->timestamp],
doms[region->timestamp]->timestamp,
__kmp_taskgraph_region_type_name(doms[region->timestamp]->type));
kmp_taskgraph_region_t *region_dom = doms[region->timestamp];
@@ -2154,9 +2110,8 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
region->predecessors = preds_with_dom;
for (kmp_taskgraph_region_dep_t **rp = ®ion->predecessors; *rp;
rp = &(*rp)->next) {
- kmp_int32 count =
- __kmp_taskgraph_count_edges_to_dominator((*rp)->region,
- dom_groups[grp].dom);
+ kmp_int32 count = __kmp_taskgraph_count_edges_to_dominator(
+ (*rp)->region, dom_groups[grp].dom);
TGDBG("for pred %p, outgoing edges to dom = %d\n", (*rp)->region,
count);
if (count > highest) {
@@ -2186,8 +2141,8 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
__kmp_region_dep_recycle(&taskgraph->recycled_deps, succ);
TGDBG("unlinking successor %p -> %p\n", pred->region, region);
unlinked_successors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
- pred->region, unlinked_successors);
+ __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
+ pred->region, unlinked_successors);
*succp = next;
} else {
succp = &succ->next;
@@ -2214,8 +2169,8 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
pred; pred = pred->next) {
kmp_taskgraph_region_t *pred_region = pred->region;
pred_region->successors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
- cloned_region, pred_region->successors);
+ __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
+ cloned_region, pred_region->successors);
}
}
@@ -2248,13 +2203,12 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
// the region.
for (kmp_taskgraph_region_dep_t *succ = unlinked_successors; succ;) {
kmp_taskgraph_region_t *cloned_reg =
- cloned_nodes[succ->region->timestamp];
+ cloned_nodes[succ->region->timestamp];
kmp_taskgraph_region_dep_t *next = succ->next;
__kmp_region_dep_recycle(&taskgraph->recycled_deps, succ);
TGDBG("add successor to cloned region: %p -> %p\n", cloned_reg, region);
- cloned_reg->successors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps, region,
- cloned_reg->successors);
+ cloned_reg->successors = __kmp_region_deplist_add(
+ thread, &taskgraph->recycled_deps, region, cloned_reg->successors);
succ = next;
}
@@ -2350,7 +2304,7 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
for (kmp_int32 i = 0; i < worklist_length; i++, r = r->next) {
if (r->mark == TASKGRAPH_UNMARKED) {
kmp_int32 level =
- __kmp_taskgraph_topological_order(r, order_out, &outidx);
+ __kmp_taskgraph_topological_order(r, order_out, &outidx);
max_level = level > max_level ? level : max_level;
}
}
@@ -2387,12 +2341,10 @@ __kmp_taskgraph_rewrite_irreducible(kmp_info_t *thread,
// much of the heavier processing involved in step (2), so the common case
// should be relatively fast.
-static kmp_taskgraph_region_t *
-__kmp_taskgraph_build_regions(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t **&alloc_chain,
- kmp_taskgraph_region_t *entryregion,
- kmp_taskgraph_region_t *exitregion) {
+static kmp_taskgraph_region_t *__kmp_taskgraph_build_regions(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t **&alloc_chain, kmp_taskgraph_region_t *entryregion,
+ kmp_taskgraph_region_t *exitregion) {
bool changed;
kmp_int32 phase = 0;
@@ -2407,17 +2359,17 @@ __kmp_taskgraph_build_regions(kmp_info_t *thread,
changed = false;
TGDBG("starting seq pass\n");
for (kmp_taskgraph_region_t **seq_head = &entryregion; *seq_head;
- seq_head = &(*seq_head)->next) {
+ seq_head = &(*seq_head)->next) {
TGDBG("consider %s region: %p\n",
__kmp_taskgraph_region_type_name((*seq_head)->type), *seq_head);
if ((*seq_head)->mark == TASKGRAPH_COMBINED) {
TGDBG("already combined\n");
continue;
}
- changed |=
- __kmp_taskgraph_collapse_sequence(thread, taskgraph, alloc_chain, seq_head,
- /*parent=*/nullptr, phase);
- TGDBG("changed: %s\n", changed ? "true" : "false");
+ changed |= __kmp_taskgraph_collapse_sequence(thread, taskgraph,
+ alloc_chain, seq_head,
+ /*parent=*/nullptr, phase);
+ TGDBG("changed: %s\n", changed ? "true" : "false");
}
++phase;
__kmp_taskgraph_region_chain_prune(&entryregion);
@@ -2425,17 +2377,16 @@ __kmp_taskgraph_build_regions(kmp_info_t *thread,
"after seq collapse");
TGDBG("starting par/unordered pass\n");
for (kmp_taskgraph_region_t **par_head = &entryregion; *par_head;
- par_head = &(*par_head)->next) {
+ par_head = &(*par_head)->next) {
TGDBG("consider %s region: %p\n",
__kmp_taskgraph_region_type_name((*par_head)->type), *par_head);
if ((*par_head)->mark == TASKGRAPH_COMBINED) {
TGDBG("already combined\n");
continue;
}
- changed |=
- __kmp_taskgraph_collapse_par_exclusive(thread, taskgraph, alloc_chain,
- par_head, /*parent=*/nullptr,
- phase);
+ changed |= __kmp_taskgraph_collapse_par_exclusive(
+ thread, taskgraph, alloc_chain, par_head, /*parent=*/nullptr,
+ phase);
TGDBG("changed: %s\n", changed ? "true" : "false");
}
++phase;
@@ -2460,9 +2411,8 @@ __kmp_taskgraph_build_regions(kmp_info_t *thread,
TGDBG("attempting to collapse irreducible regions\n");
- changed |=
- __kmp_taskgraph_rewrite_irreducible(thread, taskgraph, alloc_chain,
- &entryregion, exitregion);
+ changed |= __kmp_taskgraph_rewrite_irreducible(
+ thread, taskgraph, alloc_chain, &entryregion, exitregion);
if (!changed) {
fprintf(stderr, "FIXME: Failed to transform irreducible graph\n");
@@ -2473,114 +2423,109 @@ __kmp_taskgraph_build_regions(kmp_info_t *thread,
return entryregion;
}
-static void
-__kmp_taskgraph_count_nodes(kmp_taskgraph_region_t *region) {
+static void __kmp_taskgraph_count_nodes(kmp_taskgraph_region_t *region) {
switch (region->type) {
- case TASKGRAPH_REGION_ENTRY:
- case TASKGRAPH_REGION_EXIT:
- return;
- case TASKGRAPH_REGION_NODE:
- case TASKGRAPH_REGION_WAIT: {
- TGDBG("process region %p\n", region);
- region->task.node->u.resolved.count++;
- kmp_taskgraph_region_t *last_region =
+ case TASKGRAPH_REGION_ENTRY:
+ case TASKGRAPH_REGION_EXIT:
+ return;
+ case TASKGRAPH_REGION_NODE:
+ case TASKGRAPH_REGION_WAIT: {
+ TGDBG("process region %p\n", region);
+ region->task.node->u.resolved.count++;
+ kmp_taskgraph_region_t *last_region =
region->task.node->u.resolved.last_region;
- TGDBG("last region: %p\n", last_region);
- if (last_region) {
- kmp_taskgraph_region_t *next = last_region->task.next_instance;
- TGDBG("next: %p\n", next);
- last_region->task.next_instance = region;
- region->task.next_instance = next;
- }
- region->task.node->u.resolved.last_region = region;
- return;
+ TGDBG("last region: %p\n", last_region);
+ if (last_region) {
+ kmp_taskgraph_region_t *next = last_region->task.next_instance;
+ TGDBG("next: %p\n", next);
+ last_region->task.next_instance = region;
+ region->task.next_instance = next;
+ }
+ region->task.node->u.resolved.last_region = region;
+ return;
+ }
+ default:
+ for (kmp_int32 n = 0; n < region->inner.num_children; n++) {
+ __kmp_taskgraph_count_nodes(region->inner.children[n]);
}
- default:
- for (kmp_int32 n = 0; n < region->inner.num_children; n++) {
- __kmp_taskgraph_count_nodes(region->inner.children[n]);
- }
}
}
-static void
-__kmp_taskgraph_gather_mutex_sets(kmp_info_t *thread,
- kmp_taskgraph_region_t *region,
- const kmp_bitset_t *held) {
+static void __kmp_taskgraph_gather_mutex_sets(kmp_info_t *thread,
+ kmp_taskgraph_region_t *region,
+ const kmp_bitset_t *held) {
switch (region->type) {
- case TASKGRAPH_REGION_ENTRY:
- case TASKGRAPH_REGION_EXIT:
- case TASKGRAPH_REGION_WAIT:
- return;
- case TASKGRAPH_REGION_NODE: {
+ case TASKGRAPH_REGION_ENTRY:
+ case TASKGRAPH_REGION_EXIT:
+ case TASKGRAPH_REGION_WAIT:
+ return;
+ case TASKGRAPH_REGION_NODE: {
#ifdef DEBUG_TASKGRAPH
- if (region->mutexset && __kmp_bitset_subset_p(held, region->mutexset)) {
- TGDBG("node is mutually exclusive with held: 0x%llx <: 0x%llx\n",
- (unsigned long long)region->mutexset->bits[0],
- (unsigned long long)held->bits[0]);
- }
-#endif
- return;
+ if (region->mutexset && __kmp_bitset_subset_p(held, region->mutexset)) {
+ TGDBG("node is mutually exclusive with held: 0x%llx <: 0x%llx\n",
+ (unsigned long long)region->mutexset->bits[0],
+ (unsigned long long)held->bits[0]);
}
- case TASKGRAPH_REGION_SEQUENTIAL: {
- kmp_bitset_t *seq_held = __kmp_bitset_alloc(thread, held->bitsize);
- __kmp_bitset_clearall(seq_held);
+#endif
+ return;
+ }
+ case TASKGRAPH_REGION_SEQUENTIAL: {
+ kmp_bitset_t *seq_held = __kmp_bitset_alloc(thread, held->bitsize);
+ __kmp_bitset_clearall(seq_held);
+ for (kmp_int32 child = 0; child < region->inner.num_children; child++) {
+ __kmp_taskgraph_gather_mutex_sets(thread, region->inner.children[child],
+ held);
+ if (region->inner.children[child]->mutexset)
+ __kmp_bitset_or(seq_held, seq_held,
+ region->inner.children[child]->mutexset);
+ }
+ region->mutexset = seq_held;
+ return;
+ }
+ case TASKGRAPH_REGION_PARALLEL:
+ case TASKGRAPH_REGION_EXCLUSIVE: {
+ kmp_bitset_t *par_held = __kmp_bitset_alloc(thread, held->bitsize);
+ kmp_bitset_t *conflicts = __kmp_bitset_alloc(thread, held->bitsize);
+ while (true) {
+ __kmp_bitset_clearall(par_held);
for (kmp_int32 child = 0; child < region->inner.num_children; child++) {
+ __kmp_bitset_clearall(conflicts);
+ for (kmp_int32 other = 0; other < region->inner.num_children; other++) {
+ if (other != child) {
+ if (!region->inner.children[other]->mutexset)
+ __kmp_taskgraph_gather_mutex_sets(
+ thread, region->inner.children[other], held);
+ if (region->inner.children[other]->mutexset)
+ __kmp_bitset_or(conflicts, conflicts,
+ region->inner.children[other]->mutexset);
+ }
+ }
__kmp_taskgraph_gather_mutex_sets(thread, region->inner.children[child],
- held);
+ conflicts);
if (region->inner.children[child]->mutexset)
- __kmp_bitset_or(seq_held, seq_held,
+ __kmp_bitset_or(par_held, par_held,
region->inner.children[child]->mutexset);
}
- region->mutexset = seq_held;
- return;
- }
- case TASKGRAPH_REGION_PARALLEL:
- case TASKGRAPH_REGION_EXCLUSIVE: {
- kmp_bitset_t *par_held = __kmp_bitset_alloc(thread, held->bitsize);
- kmp_bitset_t *conflicts = __kmp_bitset_alloc(thread, held->bitsize);
- while (true) {
- __kmp_bitset_clearall(par_held);
- for (kmp_int32 child = 0; child < region->inner.num_children; child++) {
- __kmp_bitset_clearall(conflicts);
- for (kmp_int32 other = 0; other < region->inner.num_children; other++) {
- if (other != child) {
- if (!region->inner.children[other]->mutexset)
- __kmp_taskgraph_gather_mutex_sets(thread,
- region->inner.children[other],
- held);
- if (region->inner.children[other]->mutexset)
- __kmp_bitset_or(conflicts, conflicts,
- region->inner.children[other]->mutexset);
- }
- }
- __kmp_taskgraph_gather_mutex_sets(thread,
- region->inner.children[child],
- conflicts);
- if (region->inner.children[child]->mutexset)
- __kmp_bitset_or(par_held, par_held,
- region->inner.children[child]->mutexset);
- }
- if (!region->mutexset) {
- region->mutexset = par_held;
- } else if (__kmp_bitset_equal(region->mutexset, par_held)) {
- TGDBG("par mutexes stabilized, exiting loop\n");
- break;
- } else {
- TGDBG("par mutexes not stable, iterating\n");
- __kmp_bitset_copy(region->mutexset, par_held);
- __kmp_bitset_free(thread, par_held);
- }
+ if (!region->mutexset) {
+ region->mutexset = par_held;
+ } else if (__kmp_bitset_equal(region->mutexset, par_held)) {
+ TGDBG("par mutexes stabilized, exiting loop\n");
+ break;
+ } else {
+ TGDBG("par mutexes not stable, iterating\n");
+ __kmp_bitset_copy(region->mutexset, par_held);
+ __kmp_bitset_free(thread, par_held);
}
- __kmp_bitset_free(thread, conflicts);
- return;
}
+ __kmp_bitset_free(thread, conflicts);
+ return;
+ }
}
}
-static int
-__kmp_popcount_cmp(const void *a, const void *b) {
- const kmp_taskgraph_region_t *reg_a = *(kmp_taskgraph_region_t **) a;
- const kmp_taskgraph_region_t *reg_b = *(kmp_taskgraph_region_t **) b;
+static int __kmp_popcount_cmp(const void *a, const void *b) {
+ const kmp_taskgraph_region_t *reg_a = *(kmp_taskgraph_region_t **)a;
+ const kmp_taskgraph_region_t *reg_b = *(kmp_taskgraph_region_t **)b;
kmp_int32 popc_a = 0, popc_b = 0;
if (reg_a->mutexset)
popc_a = __kmp_bitset_popcount(reg_a->mutexset);
@@ -2596,179 +2541,172 @@ __kmp_popcount_cmp(const void *a, const void *b) {
/// Find "mutexinoutset" regions that can be represented without explicit
// mutexes, i.e. using "TASKGRAPH_REGION_EXCLUSIVE".
-static void
-__kmp_taskgraph_find_exclusive_regions(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t **&alloc_chain,
- kmp_taskgraph_region_t **region_p) {
+static void __kmp_taskgraph_find_exclusive_regions(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t **&alloc_chain, kmp_taskgraph_region_t **region_p) {
kmp_taskgraph_region_t *region = *region_p;
switch (region->type) {
- case TASKGRAPH_REGION_ENTRY:
- case TASKGRAPH_REGION_EXIT:
- case TASKGRAPH_REGION_NODE:
- case TASKGRAPH_REGION_WAIT:
- break;
- case TASKGRAPH_REGION_SEQUENTIAL:
- case TASKGRAPH_REGION_PARALLEL: {
- for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
- __kmp_taskgraph_find_exclusive_regions(thread, taskgraph, alloc_chain,
- ®ion->inner.children[c]);
- }
- break;
- }
- case TASKGRAPH_REGION_EXCLUSIVE: {
- qsort(region->inner.children, region->inner.num_children,
- sizeof(kmp_taskgraph_region_t *), __kmp_popcount_cmp);
- for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
- TGDBG("building tree: region mutexset = 0x%llx\n",
- (unsigned long long) region->inner.children[c]->mutexset
- ? region->inner.children[c]->mutexset->bits[0] : 0);
- region->inner.children[c]->mark = TASKGRAPH_UNMARKED;
- }
- kmp_bitset_t *conflicts =
+ case TASKGRAPH_REGION_ENTRY:
+ case TASKGRAPH_REGION_EXIT:
+ case TASKGRAPH_REGION_NODE:
+ case TASKGRAPH_REGION_WAIT:
+ break;
+ case TASKGRAPH_REGION_SEQUENTIAL:
+ case TASKGRAPH_REGION_PARALLEL: {
+ for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
+ __kmp_taskgraph_find_exclusive_regions(thread, taskgraph, alloc_chain,
+ ®ion->inner.children[c]);
+ }
+ break;
+ }
+ case TASKGRAPH_REGION_EXCLUSIVE: {
+ qsort(region->inner.children, region->inner.num_children,
+ sizeof(kmp_taskgraph_region_t *), __kmp_popcount_cmp);
+ for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
+ TGDBG("building tree: region mutexset = 0x%llx\n",
+ (unsigned long long)region->inner.children[c]->mutexset
+ ? region->inner.children[c]->mutexset->bits[0]
+ : 0);
+ region->inner.children[c]->mark = TASKGRAPH_UNMARKED;
+ }
+ kmp_bitset_t *conflicts =
__kmp_bitset_alloc(thread, region->mutexset->bitsize);
- kmp_bitset_t *subsets_cover =
+ kmp_bitset_t *subsets_cover =
__kmp_bitset_alloc(thread, region->mutexset->bitsize);
- __kmp_bitset_copy(conflicts, region->mutexset);
- bool irregular = false;
- kmp_int32 combined_children = 0;
- for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
- kmp_bitset_t *candidate = region->inner.children[c]->mutexset;
- if (__kmp_bitset_empty_p(candidate))
- continue;
- __kmp_bitset_clearall(subsets_cover);
- bool found_subset = false;
- bool other_overlaps = false;
- for (kmp_int32 d = c + 1; d < region->inner.num_children; d++) {
- // This could test for a subset in some cases, but that adds
- // complication for later processing. Maybe revisit later if it
- // seems worthwhile.
- // E.g. if we have deps like this:
- //
- // #pragma omp task depend(mutexinoutset: deps[0], deps[1]) { /*a*/ }
- // #pragma omp task depend(mutexinoutset: deps[0]) { /*b*/ }
- // #pragma omp task depend(mutexinoutset: deps[1]) { /*c*/ }
- //
- // This could be represented as:
- //
- // exclusive {
- // node: a
- // parallel {
- // node: b
- // node: c
- // }
- // }
- //
- // We're not doing that yet though.
- if (__kmp_bitset_equal(candidate,
- region->inner.children[d]->mutexset)) {
- found_subset = true;
- __kmp_bitset_or(subsets_cover, subsets_cover,
- region->inner.children[d]->mutexset);
- } else if (__kmp_bitset_intersect_p(
+ __kmp_bitset_copy(conflicts, region->mutexset);
+ bool irregular = false;
+ kmp_int32 combined_children = 0;
+ for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
+ kmp_bitset_t *candidate = region->inner.children[c]->mutexset;
+ if (__kmp_bitset_empty_p(candidate))
+ continue;
+ __kmp_bitset_clearall(subsets_cover);
+ bool found_subset = false;
+ bool other_overlaps = false;
+ for (kmp_int32 d = c + 1; d < region->inner.num_children; d++) {
+ // This could test for a subset in some cases, but that adds
+ // complication for later processing. Maybe revisit later if it
+ // seems worthwhile.
+ // E.g. if we have deps like this:
+ //
+ // #pragma omp task depend(mutexinoutset: deps[0], deps[1]) { /*a*/ }
+ // #pragma omp task depend(mutexinoutset: deps[0]) { /*b*/ }
+ // #pragma omp task depend(mutexinoutset: deps[1]) { /*c*/ }
+ //
+ // This could be represented as:
+ //
+ // exclusive {
+ // node: a
+ // parallel {
+ // node: b
+ // node: c
+ // }
+ // }
+ //
+ // We're not doing that yet though.
+ if (__kmp_bitset_equal(candidate,
+ region->inner.children[d]->mutexset)) {
+ found_subset = true;
+ __kmp_bitset_or(subsets_cover, subsets_cover,
+ region->inner.children[d]->mutexset);
+ } else if (__kmp_bitset_intersect_p(
candidate, region->inner.children[d]->mutexset)) {
- other_overlaps = true;
- break;
- }
- }
- if (!found_subset || other_overlaps)
- continue;
- if (!__kmp_bitset_equal(subsets_cover, candidate)) {
- TGDBG("subsets cover: 0x%llx, candidate: 0x%llx\n",
- (unsigned long long)subsets_cover->bits[0],
- (unsigned long long)candidate->bits[0]);
- irregular = true;
+ other_overlaps = true;
break;
}
- for (kmp_int32 d = c + 1; d < region->inner.num_children; d++) {
- if (region->inner.children[d]->mutexset_parent)
- continue;
- // As above wrt. subsets.
- if (__kmp_bitset_equal(candidate,
- region->inner.children[d]->mutexset)) {
- TGDBG("set index %d's parent to index %d\n", d, c);
- region->inner.children[d]->mutexset_parent =
+ }
+ if (!found_subset || other_overlaps)
+ continue;
+ if (!__kmp_bitset_equal(subsets_cover, candidate)) {
+ TGDBG("subsets cover: 0x%llx, candidate: 0x%llx\n",
+ (unsigned long long)subsets_cover->bits[0],
+ (unsigned long long)candidate->bits[0]);
+ irregular = true;
+ break;
+ }
+ for (kmp_int32 d = c + 1; d < region->inner.num_children; d++) {
+ if (region->inner.children[d]->mutexset_parent)
+ continue;
+ // As above wrt. subsets.
+ if (__kmp_bitset_equal(candidate,
+ region->inner.children[d]->mutexset)) {
+ TGDBG("set index %d's parent to index %d\n", d, c);
+ region->inner.children[d]->mutexset_parent =
region->inner.children[c];
- combined_children++;
- __kmp_bitset_and_not(conflicts, conflicts, candidate);
- }
+ combined_children++;
+ __kmp_bitset_and_not(conflicts, conflicts, candidate);
}
}
- TGDBG("irregular: %s\n", irregular ? "true" : "false");
- TGDBG("final conflicts: 0x%llx\n",
- (unsigned long long)conflicts->bits[0]);
- __kmp_bitset_free(thread, subsets_cover);
- region->type = TASKGRAPH_REGION_PARALLEL;
- if (!irregular && __kmp_bitset_empty_p(conflicts)) {
- TGDBG("transforming exclusive region %p\n", region);
- TGDBG("orig region children: %d\n", region->inner.num_children);
- TGDBG("combined children: %d\n", combined_children);
- if (region->inner.num_children == combined_children + 1) {
- region->type = TASKGRAPH_REGION_EXCLUSIVE;
- } else {
- kmp_taskgraph_region_t *new_par =
- __kmp_taskgraph_region_alloc(thread, taskgraph, alloc_chain,
- TASKGRAPH_REGION_PARALLEL,
- region->inner.num_children -
- combined_children,
- nullptr);
- for (kmp_int32 c = region->inner.num_children - 1; c >= 0; c--) {
- kmp_taskgraph_region_t *child = region->inner.children[c];
- // Make mutex set into a circular list.
- if (child->mutexset_parent && child->mark != TASKGRAPH_TEMP_MARK) {
- if (!child->mutexset_parent->mutexset_parent) {
- // child <-> parent
- child->mutexset_parent->mutexset_parent = child;
- child->mutexset_parent->mark = TASKGRAPH_TEMP_MARK;
- } else {
- kmp_taskgraph_region_t *parent = child->mutexset_parent;
- child->mutexset_parent = parent->mutexset_parent;
- parent->mutexset_parent = child;
- parent->mark = TASKGRAPH_TEMP_MARK;
- }
+ }
+ TGDBG("irregular: %s\n", irregular ? "true" : "false");
+ TGDBG("final conflicts: 0x%llx\n", (unsigned long long)conflicts->bits[0]);
+ __kmp_bitset_free(thread, subsets_cover);
+ region->type = TASKGRAPH_REGION_PARALLEL;
+ if (!irregular && __kmp_bitset_empty_p(conflicts)) {
+ TGDBG("transforming exclusive region %p\n", region);
+ TGDBG("orig region children: %d\n", region->inner.num_children);
+ TGDBG("combined children: %d\n", combined_children);
+ if (region->inner.num_children == combined_children + 1) {
+ region->type = TASKGRAPH_REGION_EXCLUSIVE;
+ } else {
+ kmp_taskgraph_region_t *new_par = __kmp_taskgraph_region_alloc(
+ thread, taskgraph, alloc_chain, TASKGRAPH_REGION_PARALLEL,
+ region->inner.num_children - combined_children, nullptr);
+ for (kmp_int32 c = region->inner.num_children - 1; c >= 0; c--) {
+ kmp_taskgraph_region_t *child = region->inner.children[c];
+ // Make mutex set into a circular list.
+ if (child->mutexset_parent && child->mark != TASKGRAPH_TEMP_MARK) {
+ if (!child->mutexset_parent->mutexset_parent) {
+ // child <-> parent
+ child->mutexset_parent->mutexset_parent = child;
+ child->mutexset_parent->mark = TASKGRAPH_TEMP_MARK;
+ } else {
+ kmp_taskgraph_region_t *parent = child->mutexset_parent;
+ child->mutexset_parent = parent->mutexset_parent;
+ parent->mutexset_parent = child;
+ parent->mark = TASKGRAPH_TEMP_MARK;
}
}
- kmp_int32 idx = 0;
- for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
- kmp_taskgraph_region_t *child = region->inner.children[c];
- TGDBG("process child: %p\n", child);
- if (child->mutexset_parent && child->mark != TASKGRAPH_COMBINED) {
- kmp_int32 elems = 0;
- kmp_taskgraph_region_t *next = child;
- do {
- elems++;
- next = next->mutexset_parent;
- } while (next != child);
- TGDBG("make exclusive region with %d children\n", elems);
- kmp_taskgraph_region_t *excl_region =
- __kmp_taskgraph_region_alloc(thread, taskgraph, alloc_chain,
- TASKGRAPH_REGION_EXCLUSIVE, elems,
- nullptr);
- kmp_int32 excl_child = 0;
- next = child;
- do {
- excl_region->inner.children[excl_child++] = next;
- next->mark = TASKGRAPH_COMBINED;
- next = next->mutexset_parent;
- } while (next != child);
- assert(excl_child == excl_region->inner.num_children);
- new_par->inner.children[idx++] = excl_region;
- } else if (!child->mutexset_parent) {
- new_par->inner.children[idx++] = child;
- }
+ }
+ kmp_int32 idx = 0;
+ for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
+ kmp_taskgraph_region_t *child = region->inner.children[c];
+ TGDBG("process child: %p\n", child);
+ if (child->mutexset_parent && child->mark != TASKGRAPH_COMBINED) {
+ kmp_int32 elems = 0;
+ kmp_taskgraph_region_t *next = child;
+ do {
+ elems++;
+ next = next->mutexset_parent;
+ } while (next != child);
+ TGDBG("make exclusive region with %d children\n", elems);
+ kmp_taskgraph_region_t *excl_region = __kmp_taskgraph_region_alloc(
+ thread, taskgraph, alloc_chain, TASKGRAPH_REGION_EXCLUSIVE,
+ elems, nullptr);
+ kmp_int32 excl_child = 0;
+ next = child;
+ do {
+ excl_region->inner.children[excl_child++] = next;
+ next->mark = TASKGRAPH_COMBINED;
+ next = next->mutexset_parent;
+ } while (next != child);
+ assert(excl_child == excl_region->inner.num_children);
+ new_par->inner.children[idx++] = excl_region;
+ } else if (!child->mutexset_parent) {
+ new_par->inner.children[idx++] = child;
}
- TGDBG("idx=%d, supposed to be %d\n", idx,
- new_par->inner.num_children);
- assert(idx == new_par->inner.num_children);
- *region_p = new_par;
- region->mark = TASKGRAPH_DELETED;
}
+ TGDBG("idx=%d, supposed to be %d\n", idx, new_par->inner.num_children);
+ assert(idx == new_par->inner.num_children);
+ *region_p = new_par;
+ region->mark = TASKGRAPH_DELETED;
}
- __kmp_bitset_free(thread, conflicts);
- break;
}
- default:
- assert(false && "unreachable");
+ __kmp_bitset_free(thread, conflicts);
+ break;
+ }
+ default:
+ assert(false && "unreachable");
}
}
@@ -2780,59 +2718,55 @@ __kmp_taskgraph_strip_mutex_sets(kmp_info_t *thread,
bool in_exclusive = false) {
kmp_int32 mutexes_needed = 0;
switch (region->type) {
- case TASKGRAPH_REGION_ENTRY:
- case TASKGRAPH_REGION_EXIT:
- case TASKGRAPH_REGION_WAIT:
- assert(!region->mutexset);
- break;
- case TASKGRAPH_REGION_NODE:
- if (region->mutexset) {
- if (in_exclusive) {
- __kmp_bitset_free(thread, region->mutexset);
- region->mutexset = nullptr;
- } else {
- // FIXME: This might be pessimistic -- the remaining mutex sets might
- // have holes or duplicates. We could compact them.
- kmp_int32 m = region->mutexset->bitsize;
- mutexes_needed = std::max(mutexes_needed, m);
- }
- }
- break;
- case TASKGRAPH_REGION_EXCLUSIVE: {
- if (region->mutexset) {
+ case TASKGRAPH_REGION_ENTRY:
+ case TASKGRAPH_REGION_EXIT:
+ case TASKGRAPH_REGION_WAIT:
+ assert(!region->mutexset);
+ break;
+ case TASKGRAPH_REGION_NODE:
+ if (region->mutexset) {
+ if (in_exclusive) {
__kmp_bitset_free(thread, region->mutexset);
region->mutexset = nullptr;
- }
- for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
- kmp_int32 m =
- __kmp_taskgraph_strip_mutex_sets(thread, region->inner.children[c],
- true);
+ } else {
+ // FIXME: This might be pessimistic -- the remaining mutex sets might
+ // have holes or duplicates. We could compact them.
+ kmp_int32 m = region->mutexset->bitsize;
mutexes_needed = std::max(mutexes_needed, m);
}
- break;
}
- default: {
- if (region->mutexset) {
- __kmp_bitset_free(thread, region->mutexset);
- region->mutexset = nullptr;
- }
- for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
- kmp_int32 m =
- __kmp_taskgraph_strip_mutex_sets(thread, region->inner.children[c],
- in_exclusive);
- mutexes_needed = std::max(mutexes_needed, m);
- }
+ break;
+ case TASKGRAPH_REGION_EXCLUSIVE: {
+ if (region->mutexset) {
+ __kmp_bitset_free(thread, region->mutexset);
+ region->mutexset = nullptr;
+ }
+ for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
+ kmp_int32 m = __kmp_taskgraph_strip_mutex_sets(
+ thread, region->inner.children[c], true);
+ mutexes_needed = std::max(mutexes_needed, m);
+ }
+ break;
+ }
+ default: {
+ if (region->mutexset) {
+ __kmp_bitset_free(thread, region->mutexset);
+ region->mutexset = nullptr;
}
+ for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
+ kmp_int32 m = __kmp_taskgraph_strip_mutex_sets(
+ thread, region->inner.children[c], in_exclusive);
+ mutexes_needed = std::max(mutexes_needed, m);
+ }
+ }
}
return mutexes_needed;
}
-static void
-__kmp_taskgraph_exclusive_regions(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t **&alloc_chain,
- kmp_taskgraph_region_t **region_p,
- kmp_int32 max_mutex) {
+static void __kmp_taskgraph_exclusive_regions(
+ kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t **&alloc_chain, kmp_taskgraph_region_t **region_p,
+ kmp_int32 max_mutex) {
kmp_bitset_t *top = __kmp_bitset_alloc(thread, max_mutex);
__kmp_bitset_clearall(top);
__kmp_taskgraph_gather_mutex_sets(thread, *region_p, top);
@@ -2842,64 +2776,72 @@ __kmp_taskgraph_exclusive_regions(kmp_info_t *thread,
taskgraph->num_mutexes = num_mutexes;
}
-static const char*
+static const char *
__kmp_taskgraph_region_type_name(kmp_taskgraph_region_type type) {
switch (type) {
- case TASKGRAPH_REGION_ENTRY: return "entry";
- case TASKGRAPH_REGION_EXIT: return "exit";
- case TASKGRAPH_REGION_NODE: return "node";
- case TASKGRAPH_REGION_WAIT: return "wait";
- case TASKGRAPH_REGION_PARALLEL: return "parallel";
- case TASKGRAPH_REGION_EXCLUSIVE: return "exclusive";
- case TASKGRAPH_REGION_SEQUENTIAL: return "sequential";
- case TASKGRAPH_REGION_IRREDUCIBLE: return "irreducible";
- default: return "<unknown>";
+ case TASKGRAPH_REGION_ENTRY:
+ return "entry";
+ case TASKGRAPH_REGION_EXIT:
+ return "exit";
+ case TASKGRAPH_REGION_NODE:
+ return "node";
+ case TASKGRAPH_REGION_WAIT:
+ return "wait";
+ case TASKGRAPH_REGION_PARALLEL:
+ return "parallel";
+ case TASKGRAPH_REGION_EXCLUSIVE:
+ return "exclusive";
+ case TASKGRAPH_REGION_SEQUENTIAL:
+ return "sequential";
+ case TASKGRAPH_REGION_IRREDUCIBLE:
+ return "irreducible";
+ default:
+ return "<unknown>";
}
}
#if defined(KMP_DEBUG) || defined(DEBUG_TASKGRAPH)
-static void
-__kmp_dump_taskgraph_regions(FILE *f, kmp_taskgraph_region_t *region,
- int indent = 0) {
+static void __kmp_dump_taskgraph_regions(FILE *f,
+ kmp_taskgraph_region_t *region,
+ int indent = 0) {
switch (region->type) {
- case TASKGRAPH_REGION_ENTRY:
- case TASKGRAPH_REGION_EXIT:
- fprintf(f, "%*s%s node\n", indent, "",
- __kmp_taskgraph_region_type_name(region->type));
- break;
- case TASKGRAPH_REGION_NODE:
- case TASKGRAPH_REGION_WAIT: {
- char set_membership[40];
- if (region->mutexset)
- sprintf(set_membership, " [sets: 0x%llx]",
- (unsigned long long) region->mutexset->bits[0]);
- else
- strcpy(set_membership, "");
- if (region->task.node->u.resolved.count > 1)
- fprintf(f, "%*s%s: %p (* %d)%s\n", indent, "",
- __kmp_taskgraph_region_type_name(region->type),
- region->task.node, region->task.node->u.resolved.count,
- set_membership);
- else
- fprintf(f, "%*s%s: %p%s\n", indent, "",
- __kmp_taskgraph_region_type_name(region->type),
- region->task.node, set_membership);
- break;
- }
- default: {
- char set_membership[40];
- if (region->mutexset)
- sprintf(set_membership, " [sets: 0x%llx]",
- (unsigned long long) region->mutexset->bits[0]);
- else
- strcpy(set_membership, "");
- fprintf(f, "%*s%s%s {\n", indent, "",
- __kmp_taskgraph_region_type_name (region->type), set_membership);
- for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
- __kmp_dump_taskgraph_regions(f, region->inner.children[c], indent + 2);
- }
- fprintf(f, "%*s}\n", indent, "");
+ case TASKGRAPH_REGION_ENTRY:
+ case TASKGRAPH_REGION_EXIT:
+ fprintf(f, "%*s%s node\n", indent, "",
+ __kmp_taskgraph_region_type_name(region->type));
+ break;
+ case TASKGRAPH_REGION_NODE:
+ case TASKGRAPH_REGION_WAIT: {
+ char set_membership[40];
+ if (region->mutexset)
+ sprintf(set_membership, " [sets: 0x%llx]",
+ (unsigned long long)region->mutexset->bits[0]);
+ else
+ strcpy(set_membership, "");
+ if (region->task.node->u.resolved.count > 1)
+ fprintf(f, "%*s%s: %p (* %d)%s\n", indent, "",
+ __kmp_taskgraph_region_type_name(region->type), region->task.node,
+ region->task.node->u.resolved.count, set_membership);
+ else
+ fprintf(f, "%*s%s: %p%s\n", indent, "",
+ __kmp_taskgraph_region_type_name(region->type), region->task.node,
+ set_membership);
+ break;
+ }
+ default: {
+ char set_membership[40];
+ if (region->mutexset)
+ sprintf(set_membership, " [sets: 0x%llx]",
+ (unsigned long long)region->mutexset->bits[0]);
+ else
+ strcpy(set_membership, "");
+ fprintf(f, "%*s%s%s {\n", indent, "",
+ __kmp_taskgraph_region_type_name(region->type), set_membership);
+ for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
+ __kmp_dump_taskgraph_regions(f, region->inner.children[c], indent + 2);
}
+ fprintf(f, "%*s}\n", indent, "");
+ }
}
}
#endif
@@ -2923,18 +2865,17 @@ __kmp_dump_find_parent_regions(kmp_info *thd, kmp_taskgraph_record_t *taskgraph,
if (!in_list) {
list = __kmp_region_deplist_add(thd, &taskgraph->recycled_deps,
region[r].parent, list);
- list = __kmp_dump_find_parent_regions(thd, taskgraph, region[r].parent,
- 1, list);
+ list = __kmp_dump_find_parent_regions(thd, taskgraph, region[r].parent, 1,
+ list);
}
}
return list;
}
-static void
-__kmp_dump_raw_taskgraph_regions(FILE *f, kmp_info *thd,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t *region,
- int numregions, int indent = 0) {
+static void __kmp_dump_raw_taskgraph_regions(FILE *f, kmp_info *thd,
+ kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t *region,
+ int numregions, int indent = 0) {
kmp_taskgraph_region_dep_t *parentlist = nullptr;
kmp_taskgraph_region_dep_t *printedlist = nullptr;
for (int r = 0; r < numregions; r++) {
@@ -2944,24 +2885,23 @@ __kmp_dump_raw_taskgraph_regions(FILE *f, kmp_info *thd,
region[r].type == TASKGRAPH_REGION_EXCLUSIVE ||
region[r].type == TASKGRAPH_REGION_IRREDUCIBLE)
children = region[r].inner.num_children;
- fprintf(f,
- "%*sregion %d (%p): %s%s (%d children) parent %p succs %d preds %d\n",
- indent, "", r, ®ion[r],
- __kmp_taskgraph_region_type_name(region[r].type),
- region[r].mark == TASKGRAPH_COMBINED ? " (combined)" : "",
- children, region[r].parent,
- __kmp_region_deplist_len(region[r].successors),
- __kmp_region_deplist_len(region[r].predecessors));
+ fprintf(
+ f,
+ "%*sregion %d (%p): %s%s (%d children) parent %p succs %d preds %d\n",
+ indent, "", r, ®ion[r],
+ __kmp_taskgraph_region_type_name(region[r].type),
+ region[r].mark == TASKGRAPH_COMBINED ? " (combined)" : "", children,
+ region[r].parent, __kmp_region_deplist_len(region[r].successors),
+ __kmp_region_deplist_len(region[r].predecessors));
if (children > 0) {
for (int c = 0; c < children; c++)
- __kmp_dump_raw_taskgraph_regions(f, thd, taskgraph,
- region->inner.children[c], 1,
- indent + 2);
+ __kmp_dump_raw_taskgraph_regions(
+ f, thd, taskgraph, region->inner.children[c], 1, indent + 2);
}
}
if (indent == 0) {
- parentlist = __kmp_dump_find_parent_regions(thd, taskgraph, region,
- numregions);
+ parentlist =
+ __kmp_dump_find_parent_regions(thd, taskgraph, region, numregions);
fprintf(stderr, "%*sfound %d parent region(s):\n", indent, "",
__kmp_region_deplist_len(parentlist));
for (kmp_taskgraph_region_dep_t *p = parentlist; p; p = p->next) {
@@ -3151,9 +3091,9 @@ __kmp_dump_raw_taskgraph_regions(FILE *f, kmp_info *thd,
// annotated with a set of mutexes that must be held while executing the task.
// (Shown with [sets: 0xN] in dump output).
-kmp_int32
-__kmp_build_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
- kmp_taskgraph_record_t *taskgraph) {
+kmp_int32 __kmp_build_taskgraph(kmp_int32 gtid,
+ kmp_taskdata_t *current_taskdata,
+ kmp_taskgraph_record_t *taskgraph) {
kmp_int32 numnodes = taskgraph->num_tasks;
kmp_int32 numregions = numnodes + 2;
kmp_taskgraph_node_t *nodes = taskgraph->record_map;
@@ -3169,10 +3109,10 @@ __kmp_build_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
// The maximum amount of padding we need is CACHE_LINE - 1 bytes.
all_depnodes_size = all_depnodes_size + CACHE_LINE - 1;
char *all_depnodes_misaligned =
- (char *)__kmp_thread_malloc(thread, all_depnodes_size);
+ (char *)__kmp_thread_malloc(thread, all_depnodes_size);
kmp_depnode_t *all_depnodes =
- (kmp_depnode_t *) ((((intptr_t) all_depnodes_misaligned) + CACHE_LINE - 1)
- & ~(CACHE_LINE - 1));
+ (kmp_depnode_t *)((((intptr_t)all_depnodes_misaligned) + CACHE_LINE - 1) &
+ ~(CACHE_LINE - 1));
kmp_int32 next_mutex_set = 0;
for (kmp_int32 i = 0; i < numnodes; i++) {
@@ -3187,10 +3127,9 @@ __kmp_build_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
node->dn.task = nodes[i].task;
dep_barrier = !nodes[i].task && nodes[i].taskloop_task;
if (!dep_all) {
- __kmp_process_deps<taskgraph_deps>(gtid, node, &hash, dep_barrier,
- nodes[i].u.unresolved.ndeps,
- nodes[i].u.unresolved.dep_list,
- nodes[i].task, next_mutex_set);
+ __kmp_process_deps<taskgraph_deps>(
+ gtid, node, &hash, dep_barrier, nodes[i].u.unresolved.ndeps,
+ nodes[i].u.unresolved.dep_list, nodes[i].task, next_mutex_set);
} else {
__kmp_process_dep_all<taskgraph_deps>(gtid, node, hash, dep_barrier,
nodes[i].task);
@@ -3201,16 +3140,16 @@ __kmp_build_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
kmp_int32 outidx = 0;
kmp_taskgraph_region_t *initial_regions =
- (kmp_taskgraph_region_t *)__kmp_fast_allocate(thread,
- sizeof(kmp_taskgraph_region_t) * numregions);
+ (kmp_taskgraph_region_t *)__kmp_fast_allocate(
+ thread, sizeof(kmp_taskgraph_region_t) * numregions);
// FIXME: Something like 'placement new' here?
memset(initial_regions, 0, sizeof(kmp_taskgraph_region_t) * numregions);
kmp_taskgraph_region_t *cfg_barrier = nullptr;
for (kmp_int32 i = 0; i < numnodes; i++) {
- initial_regions[i].type = nodes[i].task ? TASKGRAPH_REGION_NODE
- : TASKGRAPH_REGION_WAIT;
+ initial_regions[i].type =
+ nodes[i].task ? TASKGRAPH_REGION_NODE : TASKGRAPH_REGION_WAIT;
initial_regions[i].task.node = &nodes[i];
initial_regions[i].task.next_instance = &initial_regions[i];
initial_regions[i].parent = nullptr;
@@ -3221,17 +3160,16 @@ __kmp_build_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
}
kmp_depnode_t *depnode = &all_depnodes[i];
initial_regions[i].mutexset = depnode->dn.set_membership;
- for (kmp_depnode_list_t *succ = depnode->dn.successors;
- succ;
+ for (kmp_depnode_list_t *succ = depnode->dn.successors; succ;
succ = succ->next) {
kmp_int32 succ_idx = succ->node - all_depnodes;
kmp_taskgraph_region_t *tg_succ = &initial_regions[succ_idx];
tg_succ->predecessors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
- &initial_regions[i], tg_succ->predecessors);
+ __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
+ &initial_regions[i], tg_succ->predecessors);
initial_regions[i].successors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps, tg_succ,
- initial_regions[i].successors);
+ __kmp_region_deplist_add(thread, &taskgraph->recycled_deps, tg_succ,
+ initial_regions[i].successors);
}
// Handle control flow dependencies. If a node (e.g. a taskloop task) has
// a wait after it corresponding to the end of an implicit taskgroup, join
@@ -3239,24 +3177,22 @@ __kmp_build_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
// it will depend on the barrier.
if (nodes[i].u.unresolved.cfg_successor != -1) {
kmp_int32 cfg_succ = nodes[i].u.unresolved.cfg_successor;
- initial_regions[i].successors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
- &initial_regions[cfg_succ],
- initial_regions[i].successors);
- initial_regions[cfg_succ].predecessors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
- &initial_regions[i],
- initial_regions[cfg_succ].predecessors);
+ initial_regions[i].successors = __kmp_region_deplist_add(
+ thread, &taskgraph->recycled_deps, &initial_regions[cfg_succ],
+ initial_regions[i].successors);
+ initial_regions[cfg_succ].predecessors = __kmp_region_deplist_add(
+ thread, &taskgraph->recycled_deps, &initial_regions[i],
+ initial_regions[cfg_succ].predecessors);
}
if (nodes[i].taskloop_task && !nodes[i].task) {
cfg_barrier = &initial_regions[i];
} else if (cfg_barrier) {
- cfg_barrier->successors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
- &initial_regions[i], cfg_barrier->successors);
- initial_regions[i].predecessors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
- cfg_barrier, initial_regions[i].predecessors);
+ cfg_barrier->successors = __kmp_region_deplist_add(
+ thread, &taskgraph->recycled_deps, &initial_regions[i],
+ cfg_barrier->successors);
+ initial_regions[i].predecessors = __kmp_region_deplist_add(
+ thread, &taskgraph->recycled_deps, cfg_barrier,
+ initial_regions[i].predecessors);
}
}
@@ -3286,21 +3222,19 @@ __kmp_build_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
kmp_int32 nsuccs = __kmp_region_deplist_len(region->successors);
if (npreds == 0) {
initial_regions[entryregion].successors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps, region,
- initial_regions[entryregion].successors);
- region->predecessors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
- &initial_regions[entryregion],
- region->predecessors);
+ __kmp_region_deplist_add(thread, &taskgraph->recycled_deps, region,
+ initial_regions[entryregion].successors);
+ region->predecessors = __kmp_region_deplist_add(
+ thread, &taskgraph->recycled_deps, &initial_regions[entryregion],
+ region->predecessors);
}
if (nsuccs == 0) {
initial_regions[exitregion].predecessors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps, region,
- initial_regions[exitregion].predecessors);
- region->successors =
- __kmp_region_deplist_add(thread, &taskgraph->recycled_deps,
- &initial_regions[exitregion],
- region->successors);
+ __kmp_region_deplist_add(thread, &taskgraph->recycled_deps, region,
+ initial_regions[exitregion].predecessors);
+ region->successors = __kmp_region_deplist_add(
+ thread, &taskgraph->recycled_deps, &initial_regions[exitregion],
+ region->successors);
}
region->owner = taskgraph;
}
@@ -3312,9 +3246,8 @@ __kmp_build_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
for (kmp_int32 i = 0; i < numregions; i++) {
if (initial_regions[i].mark == TASKGRAPH_UNMARKED) {
- kmp_int32 level =
- __kmp_taskgraph_topological_order(&initial_regions[i], order_out,
- &outidx);
+ kmp_int32 level = __kmp_taskgraph_topological_order(&initial_regions[i],
+ order_out, &outidx);
max_level = level > max_level ? level : max_level;
}
}
@@ -3332,10 +3265,9 @@ __kmp_build_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
kmp_taskgraph_region_t **alloc_chain = &initial_regions[0].alloc_chain;
- kmp_taskgraph_region_t *root_region =
- __kmp_taskgraph_build_regions(thread, taskgraph, alloc_chain,
- &initial_regions[entryregion],
- &initial_regions[exitregion]);
+ kmp_taskgraph_region_t *root_region = __kmp_taskgraph_build_regions(
+ thread, taskgraph, alloc_chain, &initial_regions[entryregion],
+ &initial_regions[exitregion]);
__kmp_taskgraph_count_nodes(root_region);
@@ -3376,14 +3308,14 @@ __kmp_build_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
taskgraph->recycled_deps = nullptr;
KG_TRACE(10, ("Processed taskgraph %p (graph_id %" PRIx64 "):\n", taskgraph,
- taskgraph->graph_id));
+ taskgraph->graph_id));
KG_DUMP(10, __kmp_dump_taskgraph_regions(stderr, root_region));
- #ifdef DEBUG_TASKGRAPH
- //__kmp_dump_taskgraph_regions(stderr, root_region);
- //__kmp_dump_raw_taskgraph_regions(stderr, thread, taskgraph,
- // &initial_regions[0], numregions);
- #endif
+#ifdef DEBUG_TASKGRAPH
+//__kmp_dump_taskgraph_regions(stderr, root_region);
+//__kmp_dump_raw_taskgraph_regions(stderr, thread, taskgraph,
+// &initial_regions[0], numregions);
+#endif
KMP_ATOMIC_ST_REL(&taskgraph->status, KMP_TDG_READY);
@@ -3424,16 +3356,14 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
kmp_int32 next_mutex = 0;
if (!dep_all) { // regular dependences
- npredecessors =
- __kmp_process_deps<normal_deps>(gtid, node, hash, dep_barrier,
- ndeps, dep_list, task, next_mutex);
- npredecessors +=
- __kmp_process_deps<normal_deps>(gtid, node, hash, dep_barrier,
- ndeps_noalias, noalias_dep_list, task,
- next_mutex, false);
+ npredecessors = __kmp_process_deps<normal_deps>(
+ gtid, node, hash, dep_barrier, ndeps, dep_list, task, next_mutex);
+ npredecessors += __kmp_process_deps<normal_deps>(
+ gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list, task,
+ next_mutex, false);
} else { // omp_all_memory dependence
- npredecessors =
- __kmp_process_dep_all<normal_deps>(gtid, node, *hash, dep_barrier, task);
+ npredecessors = __kmp_process_dep_all<normal_deps>(gtid, node, *hash,
+ dep_barrier, task);
}
node->dn.task = task;
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 1bbb10bfa..a37d42a74 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -756,9 +756,9 @@ static bool __kmp_track_children_task(kmp_taskdata_t *taskdata) {
}
#if OMP_TASKGRAPH_EXPERIMENTAL
-static bool __kmp_taskgraph_exec_descr_finish(kmp_int32 gtid,
- kmp_info_t *thread,
- kmp_taskgraph_exec_descr_t *descr);
+static bool
+__kmp_taskgraph_exec_descr_finish(kmp_int32 gtid, kmp_info_t *thread,
+ kmp_taskgraph_exec_descr_t *descr);
#endif
// __kmp_task_finish: bookkeeping to do when a task finishes execution
@@ -2118,43 +2118,35 @@ __kmp_fill_exec_descr(kmp_int32, kmp_info_t *, kmp_taskgraph_record_t *,
kmp_taskgraph_exec_descr_t *, kmp_size_t &,
kmp_taskgraph_exec_descr_t **);
-static kmp_int32
-__kmp_pred_list_length(kmp_taskgraph_exec_descr_t *desc) {
+static kmp_int32 __kmp_pred_list_length(kmp_taskgraph_exec_descr_t *desc) {
kmp_int32 res = 0;
for (; desc; desc = desc->predecessor_chain)
++res;
return res;
}
-static kmp_taskgraph_exec_descr_t *
-__kmp_fill_sequential_descr(kmp_int32 gtid, kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t *region,
- kmp_taskdata_t *parent_taskdata,
- kmp_taskgraph_exec_descr_t *exec_descrs,
- kmp_size_t &next_idx,
- kmp_taskgraph_exec_descr_t **succs_to_fill_p) {
+static kmp_taskgraph_exec_descr_t *__kmp_fill_sequential_descr(
+ kmp_int32 gtid, kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t *region, kmp_taskdata_t *parent_taskdata,
+ kmp_taskgraph_exec_descr_t *exec_descrs, kmp_size_t &next_idx,
+ kmp_taskgraph_exec_descr_t **succs_to_fill_p) {
assert(region->type == TASKGRAPH_REGION_SEQUENTIAL);
kmp_taskgraph_exec_descr_t *first_node = nullptr;
for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
- kmp_taskgraph_exec_descr *descr =
- __kmp_fill_exec_descr(gtid, thread, taskgraph, region->inner.children[c],
- parent_taskdata, exec_descrs, next_idx,
- succs_to_fill_p);
+ kmp_taskgraph_exec_descr *descr = __kmp_fill_exec_descr(
+ gtid, thread, taskgraph, region->inner.children[c], parent_taskdata,
+ exec_descrs, next_idx, succs_to_fill_p);
if (!first_node)
first_node = descr;
}
return first_node;
}
-static kmp_taskgraph_exec_descr_t *
-__kmp_fill_par_or_excl_descr(kmp_int32 gtid, kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t *region,
- kmp_taskdata_t *parent_taskdata,
- kmp_taskgraph_exec_descr_t *exec_descrs,
- kmp_size_t &next_idx,
- kmp_taskgraph_exec_descr_t **succs_to_fill_p) {
+static kmp_taskgraph_exec_descr_t *__kmp_fill_par_or_excl_descr(
+ kmp_int32 gtid, kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t *region, kmp_taskdata_t *parent_taskdata,
+ kmp_taskgraph_exec_descr_t *exec_descrs, kmp_size_t &next_idx,
+ kmp_taskgraph_exec_descr_t **succs_to_fill_p) {
assert(region->type == TASKGRAPH_REGION_PARALLEL ||
region->type == TASKGRAPH_REGION_EXCLUSIVE);
@@ -2177,10 +2169,9 @@ __kmp_fill_par_or_excl_descr(kmp_int32 gtid, kmp_info_t *thread,
for (kmp_int32 c = 0; c < region->inner.num_children; c++) {
kmp_taskgraph_exec_descr_t *succs_to_fill = nullptr;
- kmp_taskgraph_exec_descr_t *head =
- __kmp_fill_exec_descr(gtid, thread, taskgraph, region->inner.children[c],
- parent_taskdata, exec_descrs, next_idx,
- &succs_to_fill);
+ kmp_taskgraph_exec_descr_t *head = __kmp_fill_exec_descr(
+ gtid, thread, taskgraph, region->inner.children[c], parent_taskdata,
+ exec_descrs, next_idx, &succs_to_fill);
if (!sibling_list) {
sibling_list = head;
sibling_list->sibling = head;
@@ -2214,106 +2205,101 @@ __kmp_fill_par_or_excl_descr(kmp_int32 gtid, kmp_info_t *thread,
return exec_descr;
}
-static kmp_taskgraph_exec_descr_t *
-__kmp_fill_exec_descr(kmp_int32 gtid, kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph,
- kmp_taskgraph_region_t *region,
- kmp_taskdata_t *parent_taskdata,
- kmp_taskgraph_exec_descr_t *exec_descrs,
- kmp_size_t &next_idx,
- kmp_taskgraph_exec_descr_t **succs_to_fill_p) {
+static kmp_taskgraph_exec_descr_t *__kmp_fill_exec_descr(
+ kmp_int32 gtid, kmp_info_t *thread, kmp_taskgraph_record_t *taskgraph,
+ kmp_taskgraph_region_t *region, kmp_taskdata_t *parent_taskdata,
+ kmp_taskgraph_exec_descr_t *exec_descrs, kmp_size_t &next_idx,
+ kmp_taskgraph_exec_descr_t **succs_to_fill_p) {
switch (region->type) {
- case TASKGRAPH_REGION_ENTRY:
- case TASKGRAPH_REGION_EXIT:
- break;
- case TASKGRAPH_REGION_NODE:
- case TASKGRAPH_REGION_WAIT: {
- kmp_taskgraph_exec_descr_t *incoming_succs_to_fill = *succs_to_fill_p;
- kmp_taskgraph_exec_descr_t *exec_descr = &exec_descrs[next_idx++];
- exec_descr->region = region;
- exec_descr->region->exec_descr = exec_descr;
- exec_descr->nblocks = region->task.node->u.resolved.count - 1;
- exec_descr->npredecessors = __kmp_pred_list_length(incoming_succs_to_fill);
- exec_descr->sibling = exec_descr;
- exec_descr->predecessor_chain = nullptr;
- exec_descr->successor = nullptr;
- exec_descr->next_instance = nullptr;
-
- // Edit the taskdata for this specific instantiation. At present the
- // task/taskdata structures cannot be used simultaneously by different
- // threads. We could duplicate the structures to allow simultaneous issue,
- // but that's not done yet. The exec_descr can already by thread-local,
- // in principle, but for now it points to the taskgraph's single copy
- // of each task/taskdata structure.
- if (region->type == TASKGRAPH_REGION_NODE) {
- kmp_task_t *task = exec_descr->region->task.node->task;
- kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
- taskdata->exec_descr = exec_descr;
- }
+ case TASKGRAPH_REGION_ENTRY:
+ case TASKGRAPH_REGION_EXIT:
+ break;
+ case TASKGRAPH_REGION_NODE:
+ case TASKGRAPH_REGION_WAIT: {
+ kmp_taskgraph_exec_descr_t *incoming_succs_to_fill = *succs_to_fill_p;
+ kmp_taskgraph_exec_descr_t *exec_descr = &exec_descrs[next_idx++];
+ exec_descr->region = region;
+ exec_descr->region->exec_descr = exec_descr;
+ exec_descr->nblocks = region->task.node->u.resolved.count - 1;
+ exec_descr->npredecessors = __kmp_pred_list_length(incoming_succs_to_fill);
+ exec_descr->sibling = exec_descr;
+ exec_descr->predecessor_chain = nullptr;
+ exec_descr->successor = nullptr;
+ exec_descr->next_instance = nullptr;
+
+ // Edit the taskdata for this specific instantiation. At present the
+ // task/taskdata structures cannot be used simultaneously by different
+ // threads. We could duplicate the structures to allow simultaneous issue,
+ // but that's not done yet. The exec_descr can already by thread-local,
+ // in principle, but for now it points to the taskgraph's single copy
+ // of each task/taskdata structure.
+ if (region->type == TASKGRAPH_REGION_NODE) {
+ kmp_task_t *task = exec_descr->region->task.node->task;
+ kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
+ taskdata->exec_descr = exec_descr;
+ }
- for (kmp_taskgraph_exec_descr_t *pred = incoming_succs_to_fill; pred;
- pred = pred->predecessor_chain) {
- pred->successor = exec_descr;
- }
+ for (kmp_taskgraph_exec_descr_t *pred = incoming_succs_to_fill; pred;
+ pred = pred->predecessor_chain) {
+ pred->successor = exec_descr;
+ }
- *succs_to_fill_p = exec_descr;
+ *succs_to_fill_p = exec_descr;
- return exec_descr;
- }
- case TASKGRAPH_REGION_SEQUENTIAL:
- return __kmp_fill_sequential_descr(gtid, thread, taskgraph, region,
- parent_taskdata, exec_descrs,
- next_idx, succs_to_fill_p);
- case TASKGRAPH_REGION_PARALLEL:
- case TASKGRAPH_REGION_EXCLUSIVE:
- return __kmp_fill_par_or_excl_descr(gtid, thread, taskgraph, region,
- parent_taskdata, exec_descrs,
- next_idx, succs_to_fill_p);
+ return exec_descr;
+ }
+ case TASKGRAPH_REGION_SEQUENTIAL:
+ return __kmp_fill_sequential_descr(gtid, thread, taskgraph, region,
+ parent_taskdata, exec_descrs, next_idx,
+ succs_to_fill_p);
+ case TASKGRAPH_REGION_PARALLEL:
+ case TASKGRAPH_REGION_EXCLUSIVE:
+ return __kmp_fill_par_or_excl_descr(gtid, thread, taskgraph, region,
+ parent_taskdata, exec_descrs, next_idx,
+ succs_to_fill_p);
}
return nullptr;
}
#ifdef DEBUG_TASKGRAPH
-static void
-__kmp_debug_taskgraph_exec_descr(kmp_taskgraph_exec_descr_t *descrs,
- kmp_size_t count) {
+static void __kmp_debug_taskgraph_exec_descr(kmp_taskgraph_exec_descr_t *descrs,
+ kmp_size_t count) {
fprintf(stderr, "digraph ExecDescr {\n");
fprintf(stderr, " end [shape=diamond]\n");
for (kmp_size_t i = 0; i < count; i++) {
kmp_taskgraph_exec_descr_t *descr = &descrs[i];
fprintf(stderr, " \"%p\" [label=< <B>", descr->region);
switch (descr->region->type) {
- case TASKGRAPH_REGION_PARALLEL:
- fprintf(stderr, "par</B> %p<BR/>preds=%d", descr->region,
+ case TASKGRAPH_REGION_PARALLEL:
+ fprintf(stderr, "par</B> %p<BR/>preds=%d", descr->region,
+ descr->npredecessors.load());
+ break;
+ case TASKGRAPH_REGION_EXCLUSIVE:
+ fprintf(stderr, "excl</B> %p<BR/>preds=%d", descr->region,
+ descr->npredecessors.load());
+ break;
+ case TASKGRAPH_REGION_NODE:
+ if (descr->region->task.node->u.resolved.count > 1) {
+ fprintf(stderr, "task</B> %p<BR/>preds=%d instances=%d",
+ descr->region->task.node, descr->npredecessors.load(),
+ descr->region->task.node->u.resolved.count);
+ } else {
+ fprintf(stderr, "task</B> %p<BR/>preds=%d", descr->region->task.node,
descr->npredecessors.load());
- break;
- case TASKGRAPH_REGION_EXCLUSIVE:
- fprintf(stderr, "excl</B> %p<BR/>preds=%d", descr->region,
+ }
+ break;
+ case TASKGRAPH_REGION_WAIT:
+ if (descr->region->task.node->u.resolved.count > 1) {
+ fprintf(stderr, "wait</B> %p<BR/>preds=%d instances=%d", descr->region,
+ descr->npredecessors.load(),
+ descr->region->task.node->u.resolved.count);
+ } else {
+ fprintf(stderr, "wait</B> %p<BR/>preds=%d", descr->region,
descr->npredecessors.load());
- break;
- case TASKGRAPH_REGION_NODE:
- if (descr->region->task.node->u.resolved.count > 1) {
- fprintf(stderr, "task</B> %p<BR/>preds=%d instances=%d",
- descr->region->task.node,
- descr->npredecessors.load(),
- descr->region->task.node->u.resolved.count);
- } else {
- fprintf(stderr, "task</B> %p<BR/>preds=%d", descr->region->task.node,
- descr->npredecessors.load());
- }
- break;
- case TASKGRAPH_REGION_WAIT:
- if (descr->region->task.node->u.resolved.count > 1) {
- fprintf(stderr, "wait</B> %p<BR/>preds=%d instances=%d",
- descr->region, descr->npredecessors.load(),
- descr->region->task.node->u.resolved.count);
- } else {
- fprintf(stderr, "wait</B> %p<BR/>preds=%d", descr->region,
- descr->npredecessors.load());
- }
- break;
- default:
- fprintf(stderr, "???</B>");
+ }
+ break;
+ default:
+ fprintf(stderr, "???</B>");
}
fprintf(stderr, " >, shape=box]\n");
@@ -2321,9 +2307,10 @@ __kmp_debug_taskgraph_exec_descr(kmp_taskgraph_exec_descr_t *descrs,
descr->region->type == TASKGRAPH_REGION_WAIT) &&
descr->region->task.node->u.resolved.count > 1) {
kmp_taskgraph_region_t *region = descr->region;
- fprintf(stderr,
- " \"%p\" -> \"%p\" [style=dotted, color=blue, constraint=false]\n",
- region, region->task.next_instance);
+ fprintf(
+ stderr,
+ " \"%p\" -> \"%p\" [style=dotted, color=blue, constraint=false]\n",
+ region, region->task.next_instance);
}
if (descr->successor) {
@@ -2353,9 +2340,8 @@ __kmp_debug_taskgraph_exec_descr(kmp_taskgraph_exec_descr_t *descrs,
}
#endif
-static void
-__kmp_exec_descr_link_instances(kmp_taskgraph_exec_descr_t *descrs,
- kmp_size_t count) {
+static void __kmp_exec_descr_link_instances(kmp_taskgraph_exec_descr_t *descrs,
+ kmp_size_t count) {
for (kmp_size_t i = 0; i < count; i++) {
kmp_taskgraph_exec_descr_t *descr = &descrs[i];
if (descr->region->type == TASKGRAPH_REGION_NODE ||
@@ -2366,9 +2352,10 @@ __kmp_exec_descr_link_instances(kmp_taskgraph_exec_descr_t *descrs,
/// Reset, reparent and regroup the recorded task TASK and re-invoke it.
-static void
-__kmp_omp_tg_task(kmp_int32 gtid, kmp_task_t *task, kmp_taskgroup_t *taskgroup,
- kmp_taskdata_t *parent_taskdata, bool serialize_immediate) {
+static void __kmp_omp_tg_task(kmp_int32 gtid, kmp_task_t *task,
+ kmp_taskgroup_t *taskgroup,
+ kmp_taskdata_t *parent_taskdata,
+ bool serialize_immediate) {
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
taskdata->td_parent = parent_taskdata;
@@ -2377,8 +2364,8 @@ __kmp_omp_tg_task(kmp_int32 gtid, kmp_task_t *task, kmp_taskgroup_t *taskgroup,
taskdata->td_flags.freed = 0;
taskdata->td_flags.executing = 0;
taskdata->td_flags.task_serial =
- (parent_taskdata->td_flags.final ||
- taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser);
+ (parent_taskdata->td_flags.final || taskdata->td_flags.team_serial ||
+ taskdata->td_flags.tasking_ser);
KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
@@ -2398,79 +2385,76 @@ struct kmp_taskred_input;
template <typename T>
void *__kmp_task_reduction_init(int gtid, int num, T *data);
-static void
-__kmp_taskgraph_exec_descr_start(kmp_int32 gtid, kmp_info_t *thread,
- kmp_taskgraph_exec_descr_t *descr,
- kmp_taskgroup_t *taskgroup) {
+static void __kmp_taskgraph_exec_descr_start(kmp_int32 gtid, kmp_info_t *thread,
+ kmp_taskgraph_exec_descr_t *descr,
+ kmp_taskgroup_t *taskgroup) {
kmp_int32 npredecessors = KMP_ATOMIC_DEC(&descr->npredecessors) - 1;
if (npredecessors > 0)
return;
switch (descr->region->type) {
- case TASKGRAPH_REGION_NODE:
- case TASKGRAPH_REGION_WAIT: {
- kmp_taskgraph_exec_descr_t *lowest_descr = nullptr, *iter = descr;
- do {
- if (!lowest_descr || lowest_descr > iter)
- lowest_descr = iter;
- iter = iter->next_instance;
- } while (iter != descr);
- kmp_int32 nblocks = KMP_ATOMIC_DEC(&lowest_descr->nblocks);
- if (nblocks <= 0) {
- if (descr->region->type == TASKGRAPH_REGION_NODE) {
- kmp_task_t *task = descr->region->task.node->task;
- kmp_taskdata_t *current_taskdata = thread->th.th_current_task;
- __kmp_omp_tg_task(gtid, task, taskgroup, current_taskdata, false);
- } else {
- // There's no task for a 'taskwait', so start successors immediately.
- kmp_taskgraph_exec_descr_t *walk = descr;
- do {
- if (walk->successor) {
- __kmp_taskgraph_exec_descr_start(gtid, thread, walk->successor,
- taskgroup);
- }
- walk = walk->next_instance;
- } while (walk != descr);
-
- }
+ case TASKGRAPH_REGION_NODE:
+ case TASKGRAPH_REGION_WAIT: {
+ kmp_taskgraph_exec_descr_t *lowest_descr = nullptr, *iter = descr;
+ do {
+ if (!lowest_descr || lowest_descr > iter)
+ lowest_descr = iter;
+ iter = iter->next_instance;
+ } while (iter != descr);
+ kmp_int32 nblocks = KMP_ATOMIC_DEC(&lowest_descr->nblocks);
+ if (nblocks <= 0) {
+ if (descr->region->type == TASKGRAPH_REGION_NODE) {
+ kmp_task_t *task = descr->region->task.node->task;
+ kmp_taskdata_t *current_taskdata = thread->th.th_current_task;
+ __kmp_omp_tg_task(gtid, task, taskgroup, current_taskdata, false);
+ } else {
+ // There's no task for a 'taskwait', so start successors immediately.
+ kmp_taskgraph_exec_descr_t *walk = descr;
+ do {
+ if (walk->successor) {
+ __kmp_taskgraph_exec_descr_start(gtid, thread, walk->successor,
+ taskgroup);
+ }
+ walk = walk->next_instance;
+ } while (walk != descr);
}
- break;
}
- case TASKGRAPH_REGION_PARALLEL: {
- if (descr->region->reduce_input) {
- // If there are reductions associated with this parallel region, we
- // start a new taskgroup here.
- __kmpc_taskgroup(/*loc=*/nullptr, gtid);
- // Update variable to the newly-created taskgroup.
- taskgroup = thread->th.th_current_task->td_taskgroup;
- __kmp_task_reduction_init(gtid,
- descr->region->reduce_input->reduce_num_data,
- (struct kmp_taskred_input *)
- descr->region->reduce_input->reduce_data);
- }
- kmp_taskgraph_exec_descr_t *head = descr->successor;
- kmp_taskgraph_exec_descr_t *item = head;
- do {
- __kmp_taskgraph_exec_descr_start(gtid, thread, item, taskgroup);
- item = item->sibling;
- } while (item != head);
- if (descr->region->reduce_input)
- __kmpc_end_taskgroup(/*loc=*/nullptr, gtid);
- break;
- }
- case TASKGRAPH_REGION_EXCLUSIVE: {
- kmp_taskgraph_exec_descr_t *head = descr->successor;
- kmp_taskgraph_exec_descr_t *item = head;
- do {
- assert(item->region->type == TASKGRAPH_REGION_NODE);
- kmp_task_t *task = item->region->task.node->task;
- kmp_taskdata_t *current_taskdata = thread->th.th_current_task;
- __kmp_omp_tg_task(gtid, task, taskgroup, current_taskdata, true);
- item = item->sibling;
- } while (item != head);
- break;
+ break;
+ }
+ case TASKGRAPH_REGION_PARALLEL: {
+ if (descr->region->reduce_input) {
+ // If there are reductions associated with this parallel region, we
+ // start a new taskgroup here.
+ __kmpc_taskgroup(/*loc=*/nullptr, gtid);
+ // Update variable to the newly-created taskgroup.
+ taskgroup = thread->th.th_current_task->td_taskgroup;
+ __kmp_task_reduction_init(
+ gtid, descr->region->reduce_input->reduce_num_data,
+ (struct kmp_taskred_input *)descr->region->reduce_input->reduce_data);
}
- default: ;
+ kmp_taskgraph_exec_descr_t *head = descr->successor;
+ kmp_taskgraph_exec_descr_t *item = head;
+ do {
+ __kmp_taskgraph_exec_descr_start(gtid, thread, item, taskgroup);
+ item = item->sibling;
+ } while (item != head);
+ if (descr->region->reduce_input)
+ __kmpc_end_taskgroup(/*loc=*/nullptr, gtid);
+ break;
+ }
+ case TASKGRAPH_REGION_EXCLUSIVE: {
+ kmp_taskgraph_exec_descr_t *head = descr->successor;
+ kmp_taskgraph_exec_descr_t *item = head;
+ do {
+ assert(item->region->type == TASKGRAPH_REGION_NODE);
+ kmp_task_t *task = item->region->task.node->task;
+ kmp_taskdata_t *current_taskdata = thread->th.th_current_task;
+ __kmp_omp_tg_task(gtid, task, taskgroup, current_taskdata, true);
+ item = item->sibling;
+ } while (item != head);
+ break;
+ }
+ default:;
}
}
@@ -2478,55 +2462,54 @@ static bool
__kmp_taskgraph_exec_descr_finish(kmp_int32 gtid, kmp_info_t *thread,
kmp_taskgraph_exec_descr_t *descr) {
switch (descr->region->type) {
- case TASKGRAPH_REGION_NODE: {
- kmp_task_t *task = descr->region->task.node->task;
- kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
- taskdata->td_flags.started = 0;
- taskdata->td_flags.executing = 0;
- taskdata->td_flags.complete = 0;
- taskdata->td_flags.freed = 0;
- bool any_successors = false;
- kmp_taskgraph_exec_descr_t *walk = descr;
- do {
- if (walk->successor) {
- any_successors = true;
- __kmp_taskgraph_exec_descr_start(gtid, thread, walk->successor,
- taskdata->td_taskgroup);
- }
- walk = walk->next_instance;
- } while (walk != descr);
- return any_successors;
- }
- default:
- fprintf(stderr, "unexpected exec descr type for finish? (%p)\n", descr);
- exit(1);
+ case TASKGRAPH_REGION_NODE: {
+ kmp_task_t *task = descr->region->task.node->task;
+ kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
+ taskdata->td_flags.started = 0;
+ taskdata->td_flags.executing = 0;
+ taskdata->td_flags.complete = 0;
+ taskdata->td_flags.freed = 0;
+ bool any_successors = false;
+ kmp_taskgraph_exec_descr_t *walk = descr;
+ do {
+ if (walk->successor) {
+ any_successors = true;
+ __kmp_taskgraph_exec_descr_start(gtid, thread, walk->successor,
+ taskdata->td_taskgroup);
+ }
+ walk = walk->next_instance;
+ } while (walk != descr);
+ return any_successors;
+ }
+ default:
+ fprintf(stderr, "unexpected exec descr type for finish? (%p)\n", descr);
+ exit(1);
}
return false;
}
-static kmp_size_t
-__kmp_exec_descr_count(kmp_taskgraph_region_t *region) {
+static kmp_size_t __kmp_exec_descr_count(kmp_taskgraph_region_t *region) {
kmp_size_t sum = 0;
switch (region->type) {
- case TASKGRAPH_REGION_ENTRY:
- case TASKGRAPH_REGION_EXIT:
- return 0;
- case TASKGRAPH_REGION_NODE:
- case TASKGRAPH_REGION_WAIT:
- return 1;
- case TASKGRAPH_REGION_PARALLEL:
- case TASKGRAPH_REGION_EXCLUSIVE:
- sum++;
- KMP_FALLTHROUGH();
- case TASKGRAPH_REGION_SEQUENTIAL:
- for (kmp_int32 i = 0; i < region->inner.num_children; i++)
- sum += __kmp_exec_descr_count(region->inner.children[i]);
- break;
- default:
- fprintf(stderr, "unexpected region type\n");
- exit(1);
+ case TASKGRAPH_REGION_ENTRY:
+ case TASKGRAPH_REGION_EXIT:
+ return 0;
+ case TASKGRAPH_REGION_NODE:
+ case TASKGRAPH_REGION_WAIT:
+ return 1;
+ case TASKGRAPH_REGION_PARALLEL:
+ case TASKGRAPH_REGION_EXCLUSIVE:
+ sum++;
+ KMP_FALLTHROUGH();
+ case TASKGRAPH_REGION_SEQUENTIAL:
+ for (kmp_int32 i = 0; i < region->inner.num_children; i++)
+ sum += __kmp_exec_descr_count(region->inner.children[i]);
+ break;
+ default:
+ fprintf(stderr, "unexpected region type\n");
+ exit(1);
}
return sum;
}
@@ -2720,8 +2703,8 @@ void *__kmpc_taskred_init(int gtid, int num, void *data) {
}
#if OMP_TASKGRAPH_EXPERIMENTAL
-static kmp_taskgraph_record_t *__kmp_taskgraph_or_parent_recording(
- kmp_taskgroup_t *taskgroup) {
+static kmp_taskgraph_record_t *
+__kmp_taskgraph_or_parent_recording(kmp_taskgroup_t *taskgroup) {
kmp_taskgraph_record_t *rec = nullptr;
for (; taskgroup; taskgroup = taskgroup->parent) {
@@ -2742,13 +2725,15 @@ void *__kmpc_taskgraph_taskred_init(kmp_int32 gtid, kmp_int32 num, void *data) {
kmp_taskgraph_status_t status = KMP_ATOMIC_LD_ACQ(&rec->status);
if (status == KMP_TDG_RECORDING) {
kmp_taskgraph_reduce_input_data_t *input_data =
- (kmp_taskgraph_reduce_input_data_t *)
- __kmp_fast_allocate(thread,
- sizeof(kmp_taskgraph_reduce_input_data_t));
+ (kmp_taskgraph_reduce_input_data_t *)__kmp_fast_allocate(
+ thread, sizeof(kmp_taskgraph_reduce_input_data_t));
// The compiler might build the reduction input data on the stack, so
// we must make a copy.
- input_data->reduce_data = __kmp_fast_allocate(thread, sizeof(kmp_taskred_input_t) * num);
- KMP_MEMCPY(input_data->reduce_data, data, sizeof(kmp_taskred_input_t) * num);;
+ input_data->reduce_data =
+ __kmp_fast_allocate(thread, sizeof(kmp_taskred_input_t) * num);
+ KMP_MEMCPY(input_data->reduce_data, data,
+ sizeof(kmp_taskred_input_t) * num);
+ ;
input_data->reduce_num_data = num;
taskgroup->taskgraph.reduce_input = input_data;
} else if (status == KMP_TDG_READY)
@@ -3193,29 +3178,26 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
}
#if OMP_TASKGRAPH_EXPERIMENTAL
-void
-__kmp_replay_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
- kmp_taskgraph_record_t *taskgraph, kmp_uint32 graph_id,
- kmp_taskgroup_t *taskgroup) {
+void __kmp_replay_taskgraph(kmp_int32 gtid, kmp_taskdata_t *current_taskdata,
+ kmp_taskgraph_record_t *taskgraph,
+ kmp_uint32 graph_id, kmp_taskgroup_t *taskgroup) {
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskgraph_exec_descr_t *exec_descrs = taskgraph->exec_descrs;
if (!exec_descrs) {
kmp_int32 exec_descr_count = __kmp_exec_descr_count(taskgraph->root);
- exec_descrs =
- (kmp_taskgraph_exec_descr_t*)__kmp_thread_malloc(thread,
- exec_descr_count * sizeof(kmp_taskgraph_exec_descr_t));
+ exec_descrs = (kmp_taskgraph_exec_descr_t *)__kmp_thread_malloc(
+ thread, exec_descr_count * sizeof(kmp_taskgraph_exec_descr_t));
taskgraph->exec_descrs = exec_descrs;
taskgraph->exec_descr_size = exec_descr_count;
}
kmp_taskgraph_exec_descr_t *succs_to_fill = nullptr;
kmp_size_t next_idx = 0;
- kmp_taskgraph_exec_descr_t *head =
- __kmp_fill_exec_descr(gtid, thread, taskgraph, taskgraph->root,
- current_taskdata, exec_descrs, next_idx,
- &succs_to_fill);
+ kmp_taskgraph_exec_descr_t *head = __kmp_fill_exec_descr(
+ gtid, thread, taskgraph, taskgraph->root, current_taskdata, exec_descrs,
+ next_idx, &succs_to_fill);
assert(next_idx == taskgraph->exec_descr_size);
__kmp_exec_descr_link_instances(exec_descrs, taskgraph->exec_descr_size);
@@ -5008,9 +4990,9 @@ public:
};
#if OMP_TASKGRAPH_EXPERIMENTAL
-kmp_taskgraph_node_t* __kmp_taskgraph_node_alloc(kmp_taskgraph_record_t *rec,
- kmp_task_t *task,
- kmp_size_t *index_p = nullptr) {
+kmp_taskgraph_node_t *
+__kmp_taskgraph_node_alloc(kmp_taskgraph_record_t *rec, kmp_task_t *task,
+ kmp_size_t *index_p = nullptr) {
kmp_int32 gtid = rec->gtid;
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskgraph_node_t *new_task = nullptr;
@@ -5019,14 +5001,14 @@ kmp_taskgraph_node_t* __kmp_taskgraph_node_alloc(kmp_taskgraph_record_t *rec,
if (!rec->record_map) {
rec->nodes_allocated = 4;
- rec->record_map = (kmp_taskgraph_node_t *)__kmp_thread_malloc(thread,
- rec->nodes_allocated * sizeof(kmp_taskgraph_node_t));
+ rec->record_map = (kmp_taskgraph_node_t *)__kmp_thread_malloc(
+ thread, rec->nodes_allocated * sizeof(kmp_taskgraph_node_t));
}
if (rec->num_tasks >= rec->nodes_allocated) {
- rec->record_map =
- (kmp_taskgraph_node_t *)__kmp_thread_realloc(thread, rec->record_map,
- 2 * rec->nodes_allocated * sizeof(kmp_taskgraph_node_t));
+ rec->record_map = (kmp_taskgraph_node_t *)__kmp_thread_realloc(
+ thread, rec->record_map,
+ 2 * rec->nodes_allocated * sizeof(kmp_taskgraph_node_t));
rec->nodes_allocated *= 2;
}
@@ -5064,22 +5046,21 @@ kmp_taskgraph_node_t* __kmp_taskgraph_node_alloc(kmp_taskgraph_record_t *rec,
// tc Iterations count
// task_dup Tasks duplication routine
// codeptr_ra Return address for OMPT events
-static void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
- kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
- kmp_int32 nogroup, kmp_uint64 ub_glob,
- kmp_uint64 num_tasks, kmp_uint64 grainsize,
- kmp_uint64 extras, kmp_int64 last_chunk,
- kmp_uint64 tc,
+static void
+__kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task, kmp_uint64 *lb,
+ kmp_uint64 *ub, kmp_int64 st, kmp_int32 nogroup,
+ kmp_uint64 ub_glob, kmp_uint64 num_tasks,
+ kmp_uint64 grainsize, kmp_uint64 extras,
+ kmp_int64 last_chunk, kmp_uint64 tc,
#if OMPT_SUPPORT
- void *codeptr_ra,
+ void *codeptr_ra,
#endif
- void *task_dup
+ void *task_dup
#if OMP_TASKGRAPH_EXPERIMENTAL
- ,
- kmp_taskgraph_record_t *taskgraph_rec =
- nullptr
+ ,
+ kmp_taskgraph_record_t *taskgraph_rec = nullptr
#endif
- ) {
+) {
KMP_COUNT_BLOCK(OMP_TASKLOOP);
KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
@@ -5166,7 +5147,7 @@ static void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
kmp_size_t rec_index = -1;
// Record the task in the taskgraph.
kmp_taskgraph_node_t *node =
- __kmp_taskgraph_node_alloc(taskgraph_rec, next_task, &rec_index);
+ __kmp_taskgraph_node_alloc(taskgraph_rec, next_task, &rec_index);
kmp_taskgroup_t *taskgroup = current_task->td_taskgroup;
if (taskgroup->taskgraph.reduce_input) {
node->reduce_input = taskgroup->taskgraph.reduce_input;
@@ -5183,8 +5164,8 @@ static void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
if (nogroup)
taskgraph_rec->record_map[rec_index].u.unresolved.cfg_successor = -1;
else if (taskloop_prev_idx != -1)
- taskgraph_rec->record_map[taskloop_prev_idx].u.unresolved.cfg_successor =
- rec_index;
+ taskgraph_rec->record_map[taskloop_prev_idx]
+ .u.unresolved.cfg_successor = rec_index;
if (taskloop_first_idx == -1)
taskloop_first_idx = rec_index;
taskloop_prev_idx = rec_index;
@@ -5200,7 +5181,7 @@ static void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
}
#endif // OMPT_OPTIONAL
#else
- __kmp_omp_task(gtid, next_task, true); // schedule new task
+ __kmp_omp_task(gtid, next_task, true); // schedule new task
#endif
#if OMP_TASKGRAPH_EXPERIMENTAL
}
@@ -5213,14 +5194,14 @@ static void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
// Create a node to act as an "end group" marker.
kmp_size_t endgroup_idx = -1;
kmp_taskgraph_node_t *endgrpnode =
- __kmp_taskgraph_node_alloc(taskgraph_rec, nullptr, &endgroup_idx);
+ __kmp_taskgraph_node_alloc(taskgraph_rec, nullptr, &endgroup_idx);
endgrpnode->taskloop_task = true;
// Point all the cfg_successor indices to this node now.
for (kmp_int32 looptask = taskloop_first_idx; looptask != -1;) {
kmp_int32 next_task =
- taskgraph_rec->record_map[looptask].u.unresolved.cfg_successor;
+ taskgraph_rec->record_map[looptask].u.unresolved.cfg_successor;
taskgraph_rec->record_map[looptask].u.unresolved.cfg_successor =
- endgroup_idx;
+ endgroup_idx;
looptask = next_task;
}
}
@@ -5467,7 +5448,7 @@ static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
,
kmp_taskgraph_record_t *taskgraph_rec = nullptr
#endif
- ) {
+) {
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
KMP_DEBUG_ASSERT(task != NULL);
if (nogroup == 0) {
@@ -5600,11 +5581,12 @@ static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
OMPT_GET_RETURN_ADDRESS(0),
#endif
task_dup, taskgraph_rec);
- // check if clause value next
- // Also require GOMP_taskloop to reduce to linear (taskdata->td_flags.native)
+ // check if clause value next
+ // Also require GOMP_taskloop to reduce to linear
+ // (taskdata->td_flags.native)
} else
#endif
- if (if_val == 0) { // if(0) specified, mark task as serial
+ if (if_val == 0) { // if(0) specified, mark task as serial
taskdata->td_flags.task_serial = 1;
taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
// always start serial tasks linearly
@@ -5758,11 +5740,12 @@ bool __kmpc_omp_has_task_team(kmp_int32 gtid) {
#if OMP_TASKGRAPH_EXPERIMENTAL
-static kmp_taskgraph_record_t*
-__kmp_taskgraph_alloc(kmp_int32 gtid, kmp_int32 graph_id) {
+static kmp_taskgraph_record_t *__kmp_taskgraph_alloc(kmp_int32 gtid,
+ kmp_int32 graph_id) {
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskgraph_record_t *new_rec =
- (kmp_taskgraph_record_t *)__kmp_fast_allocate(thread, sizeof(kmp_taskgraph_record_t));
+ (kmp_taskgraph_record_t *)__kmp_fast_allocate(
+ thread, sizeof(kmp_taskgraph_record_t));
new_rec->status = KMP_TDG_RECORDING;
new_rec->gtid = gtid;
new_rec->graph_id = graph_id;
@@ -5782,8 +5765,10 @@ __kmp_taskgraph_alloc(kmp_int32 gtid, kmp_int32 graph_id) {
// Clone a (new) task that has had its private variables and shared variables
// initialised already.
static kmp_task_t *__kmp_taskgraph_clone_task(kmp_info_t *thread,
- kmp_taskgraph_record_t *taskgraph, kmp_task_t *orig,
- size_t sizeof_kmp_task_t, size_t sizeof_shareds) {
+ kmp_taskgraph_record_t *taskgraph,
+ kmp_task_t *orig,
+ size_t sizeof_kmp_task_t,
+ size_t sizeof_shareds) {
// FIXME: This should use a "taskdup" function like taskloops in cases where
// private variables are not trivially copyable. For now, do it by plain
// bitwise copy.
@@ -5794,7 +5779,8 @@ static kmp_task_t *__kmp_taskgraph_clone_task(kmp_info_t *thread,
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(orig);
size_t shareds_offset = sizeof(kmp_taskdata_t) + sizeof_kmp_task_t;
shareds_offset = __kmp_round_up_to_val(shareds_offset, sizeof(kmp_uint64));
- kmp_taskdata_t *copy_td = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset + sizeof_shareds);
+ kmp_taskdata_t *copy_td = (kmp_taskdata_t *)__kmp_fast_allocate(
+ thread, shareds_offset + sizeof_shareds);
KMP_MEMCPY(copy_td, taskdata, shareds_offset + sizeof_shareds);
// Tasks cloned for a taskgraph always have this field set.
copy_td->owning_taskgraph = taskgraph;
@@ -5813,10 +5799,11 @@ static kmp_task_t *__kmp_taskgraph_clone_task(kmp_info_t *thread,
// entry: Pointer to the entry function
// args: Pointer to the function arguments
void __kmpc_taskgraph(ident_t *loc_ref, kmp_int32 gtid,
- std::atomic<void*> *tdg_handle, kmp_uint32 graph_id,
+ std::atomic<void *> *tdg_handle, kmp_uint32 graph_id,
kmp_int32 graph_reset, kmp_int32 nogroup,
void (*entry)(void *), void *args) {
- kmp_taskgraph_record_t *record = (kmp_taskgraph_record_t*)KMP_ATOMIC_LD_ACQ(tdg_handle);
+ kmp_taskgraph_record_t *record =
+ (kmp_taskgraph_record_t *)KMP_ATOMIC_LD_ACQ(tdg_handle);
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskgroup_t *taskgroup;
@@ -5834,11 +5821,11 @@ void __kmpc_taskgraph(ident_t *loc_ref, kmp_int32 gtid,
if (!record) {
record = __kmp_taskgraph_alloc(gtid, graph_id);
- // Another thread may have allocated the taskgraph already. Check that here.
+ // Another thread may have allocated the taskgraph already. Check that
+ // here.
kmp_taskgraph_record_t *other =
- (kmp_taskgraph_record_t *)KMP_COMPARE_AND_STORE_RET64(tdg_handle,
- nullptr,
- record);
+ (kmp_taskgraph_record_t *)KMP_COMPARE_AND_STORE_RET64(tdg_handle,
+ nullptr, record);
if (other != nullptr) {
__kmp_fast_free(thread, record);
record = other;
@@ -5863,7 +5850,7 @@ void __kmpc_taskgraph(ident_t *loc_ref, kmp_int32 gtid,
else if (status == KMP_TDG_READY) {
kmp_taskdata *current_taskdata = thread->th.th_current_task;
KG_TRACE(10, ("Replay taskgraph %p from task %p\n", record,
- KMP_TASKDATA_TO_TASK(current_taskdata)));
+ KMP_TASKDATA_TO_TASK(current_taskdata)));
__kmp_acquire_lock(&record->map_lock, gtid);
__kmp_replay_taskgraph(gtid, current_taskdata, record, graph_id, taskgroup);
__kmpc_end_taskgroup(loc_ref, gtid);
@@ -5893,9 +5880,8 @@ kmp_uint32 __kmpc_taskgraph_task(ident_t *loc_ref, kmp_int32 gtid,
if (rec) {
kmp_taskgraph_status_t status = KMP_ATOMIC_LD_ACQ(&rec->status);
if (status == KMP_TDG_RECORDING) {
- kmp_task_t *cloned_task =
- __kmp_taskgraph_clone_task(thread, rec, new_task, sizeof_kmp_task_t,
- sizeof_shareds);
+ kmp_task_t *cloned_task = __kmp_taskgraph_clone_task(
+ thread, rec, new_task, sizeof_kmp_task_t, sizeof_shareds);
kmp_taskgraph_node_t *node = __kmp_taskgraph_node_alloc(rec, cloned_task);
if (taskgroup->taskgraph.reduce_input) {
node->reduce_input = taskgroup->taskgraph.reduce_input;
@@ -5904,25 +5890,28 @@ kmp_uint32 __kmpc_taskgraph_task(ident_t *loc_ref, kmp_int32 gtid,
#if defined(DEBUG_TASKGRAPH)
fprintf(stderr, "__kmpc_taskgraph_task: record task here!\n");
fprintf(stderr, "private size: %d, shared size: %d\n",
- (int)(sizeof_kmp_task_t - sizeof(kmp_task_t)), (int)sizeof_shareds);
- fprintf(stderr, "ndeps: %d\n", (int) ndeps);
+ (int)(sizeof_kmp_task_t - sizeof(kmp_task_t)),
+ (int)sizeof_shareds);
+ fprintf(stderr, "ndeps: %d\n", (int)ndeps);
fprintf(stderr, "gtid: %d rec->gtid: %d\n", gtid, rec->gtid);
- fprintf(stderr, "taskgroup: %p\n", thread->th.th_current_task->td_taskgroup);
+ fprintf(stderr, "taskgroup: %p\n",
+ thread->th.th_current_task->td_taskgroup);
kmp_taskdata_t *parent = thread->th.th_current_task->td_parent;
while (parent) {
- fprintf(stderr, " parent: %p (taskgroup %p)\n", parent, parent->td_taskgroup);
+ fprintf(stderr, " parent: %p (taskgroup %p)\n", parent,
+ parent->td_taskgroup);
parent = parent->td_parent;
}
#endif
node->u.unresolved.ndeps = ndeps;
- node->u.unresolved.dep_list =
- (kmp_depend_info_t *)__kmp_thread_malloc(thread,
- ndeps * sizeof(kmp_depend_info_t));
+ node->u.unresolved.dep_list = (kmp_depend_info_t *)__kmp_thread_malloc(
+ thread, ndeps * sizeof(kmp_depend_info_t));
KMP_MEMCPY(node->u.unresolved.dep_list, dep_list,
ndeps * sizeof(kmp_depend_info_t));
} else if (status == KMP_TDG_READY) {
#ifdef DEBUG_TASKGRAPH
- fprintf(stderr, "non-taskgraph task entry point for task in finalized taskgraph");
+ fprintf(stderr,
+ "non-taskgraph task entry point for task in finalized taskgraph");
#endif
return 0;
}
@@ -5937,15 +5926,15 @@ kmp_uint32 __kmpc_taskgraph_task(ident_t *loc_ref, kmp_int32 gtid,
if (ndeps == 0)
res = __kmpc_omp_task(loc_ref, gtid, new_task);
else
- res = __kmpc_omp_task_with_deps(loc_ref, gtid, new_task, ndeps, dep_list,
- 0, nullptr);
+ res = __kmpc_omp_task_with_deps(loc_ref, gtid, new_task, ndeps, dep_list, 0,
+ nullptr);
return res;
}
-void
-__kmpc_taskgraph_taskwait(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
- kmp_depend_info_t *dep_list, kmp_int32 has_no_wait) {
+void __kmpc_taskgraph_taskwait(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_int32 ndeps, kmp_depend_info_t *dep_list,
+ kmp_int32 has_no_wait) {
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskgroup_t *taskgroup = thread->th.th_current_task->td_taskgroup;
kmp_taskgraph_record_t *rec = __kmp_taskgraph_or_parent_recording(taskgroup);
@@ -5956,17 +5945,20 @@ __kmpc_taskgraph_taskwait(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
kmp_taskgraph_node_t *node = __kmp_taskgraph_node_alloc(rec, nullptr);
#ifdef DEBUG_TASKGRAPH
fprintf(stderr, "__kmpc_taskgraph_taskwait: record taskwait here!\n");
- fprintf(stderr, "ndeps: %d\n", (int) ndeps);
+ fprintf(stderr, "ndeps: %d\n", (int)ndeps);
#endif
node->u.unresolved.ndeps = ndeps;
- node->u.unresolved.dep_list = (kmp_depend_info_t *)__kmp_thread_malloc(thread, ndeps * sizeof(kmp_depend_info_t));
- KMP_MEMCPY(node->u.unresolved.dep_list, dep_list, ndeps * sizeof(kmp_depend_info_t));
+ node->u.unresolved.dep_list = (kmp_depend_info_t *)__kmp_thread_malloc(
+ thread, ndeps * sizeof(kmp_depend_info_t));
+ KMP_MEMCPY(node->u.unresolved.dep_list, dep_list,
+ ndeps * sizeof(kmp_depend_info_t));
// TODO: Record has_no_wait somewhere?
- //if (has_no_wait)
+ // if (has_no_wait)
// return;
} else if (status == KMP_TDG_READY) {
#ifdef DEBUG_TASKGRAPH
- fprintf(stderr, "non-taskgraph taskwait entry point for taskwait in finalized taskgraph\n");
+ fprintf(stderr, "non-taskgraph taskwait entry point for taskwait in "
+ "finalized taskgraph\n");
#endif
return;
}
@@ -5976,14 +5968,14 @@ __kmpc_taskgraph_taskwait(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
has_no_wait);
}
-kmp_uint32
-__kmpc_taskgraph_taskloop(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task,
- kmp_int32 flags, size_t sizeof_kmp_task_t,
- void *shareds, size_t sizeof_shareds,
- kmp_int32 if_val, kmp_uint64 *lb, kmp_uint64 *ub,
- kmp_int64 st, kmp_int32 nogroup, kmp_int32 sched,
- kmp_uint64 grainsize, kmp_int32 modifier,
- void *task_dup) {
+kmp_uint32 __kmpc_taskgraph_taskloop(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_task_t *new_task, kmp_int32 flags,
+ size_t sizeof_kmp_task_t, void *shareds,
+ size_t sizeof_shareds, kmp_int32 if_val,
+ kmp_uint64 *lb, kmp_uint64 *ub,
+ kmp_int64 st, kmp_int32 nogroup,
+ kmp_int32 sched, kmp_uint64 grainsize,
+ kmp_int32 modifier, void *task_dup) {
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskgroup_t *taskgroup = thread->th.th_current_task->td_taskgroup;
kmp_taskgraph_record_t *rec = __kmp_taskgraph_or_parent_recording(taskgroup);
@@ -5995,7 +5987,8 @@ __kmpc_taskgraph_taskloop(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task
sched, grainsize, modifier, task_dup, rec);
else if (status == KMP_TDG_READY) {
#ifdef DEBUG_TASKGRAPH
- fprintf(stderr, "non-taskgraph taskloop entry point for taskloop in finalized taskgraph\n");
+ fprintf(stderr, "non-taskgraph taskloop entry point for taskloop in "
+ "finalized taskgraph\n");
#endif
return 0;
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/194047
More information about the llvm-branch-commits
mailing list