[Openmp-commits] [openmp] f5e50b2 - [OpenMP] Optimized trivial multiple edges from task dependency graph
Joachim Jenke via Openmp-commits
openmp-commits at lists.llvm.org
Tue Nov 21 09:39:16 PST 2023
Author: Joachim Jenke
Date: 2023-11-21T18:36:12+01:00
New Revision: f5e50b21da0cb543064b2d0b9304ce0b368cf2bb
URL: https://github.com/llvm/llvm-project/commit/f5e50b21da0cb543064b2d0b9304ce0b368cf2bb
DIFF: https://github.com/llvm/llvm-project/commit/f5e50b21da0cb543064b2d0b9304ce0b368cf2bb.diff
LOG: [OpenMP] Optimized trivial multiple edges from task dependency graph
>From "3.1 Reducing the number of edges" of this [[ https://hal.science/hal-04136674v1/ | paper ]] - Optimization (b)
Task (dependency) nodes have a `successors` list built upon passed dependency.
Given the following code, B will be added to A's successors list building the graph `A` -> `B`
```
// A
# pragma omp task depend(out: x)
{}
// B
# pragma omp task depend(in: x)
{}
```
In the following code, B is currently added twice to A's successor list
```
// A
# pragma omp task depend(out: x, y)
{}
// B
# pragma omp task depend(in: x, y)
{}
```
This patch removes such dupplicates by checking lastly inserted task in `A` successor list.
Authored by: Romain Pereira (rpereira-dev)
Differential Revision: https://reviews.llvm.org/D158544
Added:
openmp/runtime/test/tasking/kmp_task_deps.h
openmp/runtime/test/tasking/kmp_task_deps_multiple_edges.c
openmp/runtime/test/tasking/kmp_task_deps_multiple_edges_inoutset.c
Modified:
openmp/runtime/src/kmp.h
openmp/runtime/src/kmp_taskdeps.cpp
Removed:
################################################################################
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index f95d008f2c6a006..d34adf7cbf8af3c 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -4237,6 +4237,11 @@ KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps,
kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
kmp_depend_info_t *noalias_dep_list);
+
+KMP_EXPORT kmp_base_depnode_t *__kmpc_task_get_depnode(kmp_task_t *task);
+
+KMP_EXPORT kmp_depnode_list_t *__kmpc_task_get_successors(kmp_task_t *task);
+
KMP_EXPORT void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid,
kmp_int32 ndeps,
kmp_depend_info_t *dep_list,
diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index 3b39f503973635b..f7529481393f978 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -284,6 +284,16 @@ static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source,
#endif /* OMPT_SUPPORT && OMPT_OPTIONAL */
}
+kmp_base_depnode_t *__kmpc_task_get_depnode(kmp_task_t *task) {
+ kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task);
+ return td->td_depnode ? &(td->td_depnode->dn) : NULL;
+}
+
+kmp_depnode_list_t *__kmpc_task_get_successors(kmp_task_t *task) {
+ kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task);
+ return td->td_depnode->dn.successors;
+}
+
static inline kmp_int32
__kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread,
kmp_task_t *task, kmp_depnode_t *node,
@@ -307,16 +317,18 @@ __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread,
if (dep->dn.task) {
KMP_ACQUIRE_DEPNODE(gtid, dep);
if (dep->dn.task) {
+ if (!dep->dn.successors || dep->dn.successors->node != node) {
#if OMPX_TASKGRAPH
- if (!(__kmp_tdg_is_recording(tdg_status)) && task)
+ if (!(__kmp_tdg_is_recording(tdg_status)) && task)
#endif
- __kmp_track_dependence(gtid, dep, node, task);
- dep->dn.successors = __kmp_add_node(thread, dep->dn.successors, node);
- KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
- "%p\n",
- gtid, KMP_TASK_TO_TASKDATA(dep->dn.task),
- KMP_TASK_TO_TASKDATA(task)));
- npredecessors++;
+ __kmp_track_dependence(gtid, dep, node, task);
+ dep->dn.successors = __kmp_add_node(thread, dep->dn.successors, node);
+ KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
+ "%p\n",
+ gtid, KMP_TASK_TO_TASKDATA(dep->dn.task),
+ KMP_TASK_TO_TASKDATA(task)));
+ npredecessors++;
+ }
}
KMP_RELEASE_DEPNODE(gtid, dep);
}
@@ -324,6 +336,7 @@ __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread,
return npredecessors;
}
+// Add the edge 'sink' -> 'source' in the task dependency graph
static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid,
kmp_info_t *thread,
kmp_task_t *task,
@@ -346,29 +359,31 @@ static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid,
// synchronously add source to sink' list of successors
KMP_ACQUIRE_DEPNODE(gtid, sink);
if (sink->dn.task) {
+ if (!sink->dn.successors || sink->dn.successors->node != source) {
#if OMPX_TASKGRAPH
- if (!(__kmp_tdg_is_recording(tdg_status)) && task)
+ if (!(__kmp_tdg_is_recording(tdg_status)) && task)
#endif
- __kmp_track_dependence(gtid, sink, source, task);
- sink->dn.successors = __kmp_add_node(thread, sink->dn.successors, source);
- KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
+ __kmp_track_dependence(gtid, sink, source, task);
+ sink->dn.successors = __kmp_add_node(thread, sink->dn.successors, source);
+ KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
"%p\n",
gtid, KMP_TASK_TO_TASKDATA(sink->dn.task),
KMP_TASK_TO_TASKDATA(task)));
#if OMPX_TASKGRAPH
- if (__kmp_tdg_is_recording(tdg_status)) {
- kmp_taskdata_t *tdd = KMP_TASK_TO_TASKDATA(sink->dn.task);
- if (tdd->is_taskgraph) {
- if (tdd->td_flags.onced)
- // decrement npredecessors if sink->dn.task belongs to a taskgraph
- // and
- // 1) the task is reset to its initial state (by kmp_free_task) or
- // 2) the task is complete but not yet reset
- npredecessors--;
+ if (__kmp_tdg_is_recording(tdg_status)) {
+ kmp_taskdata_t *tdd = KMP_TASK_TO_TASKDATA(sink->dn.task);
+ if (tdd->is_taskgraph) {
+ if (tdd->td_flags.onced)
+ // decrement npredecessors if sink->dn.task belongs to a taskgraph
+ // and
+ // 1) the task is reset to its initial state (by kmp_free_task) or
+ // 2) the task is complete but not yet reset
+ npredecessors--;
+ }
}
- }
#endif
npredecessors++;
+ }
}
KMP_RELEASE_DEPNODE(gtid, sink);
}
diff --git a/openmp/runtime/test/tasking/kmp_task_deps.h b/openmp/runtime/test/tasking/kmp_task_deps.h
new file mode 100644
index 000000000000000..5a1f2b0806a8a5c
--- /dev/null
+++ b/openmp/runtime/test/tasking/kmp_task_deps.h
@@ -0,0 +1,56 @@
+#ifndef KMP_TASK_DEPS_H
+#define KMP_TASK_DEPS_H
+
+#include <stddef.h> /* size_t */
+
+// ---------------------------------------------------------------------------
+// internal data to emulate compiler codegen
+typedef struct DEP {
+ size_t addr;
+ size_t len;
+ unsigned char flags;
+} dep;
+
+typedef struct task {
+ void **shareds;
+ void *entry;
+ int part_id;
+ void *destr_thunk;
+ int priority;
+ long long device_id;
+ int f_priv;
+} kmp_task_t;
+typedef int (*entry_t)(int, kmp_task_t *);
+typedef struct ID {
+ int reserved_1;
+ int flags;
+ int reserved_2;
+ int reserved_3;
+ char *psource;
+} id;
+
+#define TIED 1
+
+struct kmp_depnode_list;
+
+typedef struct kmp_base_depnode {
+ struct kmp_depnode_list *successors;
+ /* [...] more stuff down here */
+} kmp_base_depnode_t;
+
+typedef struct kmp_depnode_list {
+ struct kmp_base_depnode *node;
+ struct kmp_depnode_list *next;
+} kmp_depnode_list_t;
+
+static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
+kmp_task_t *__kmpc_omp_task_alloc(id *loc, int gtid, int flags, size_t sz,
+ size_t shar, entry_t rtn);
+int __kmpc_omp_task_with_deps(id *loc, int gtid, kmp_task_t *task, int nd,
+ dep *dep_lst, int nd_noalias,
+ dep *noalias_dep_lst);
+kmp_depnode_list_t *__kmpc_task_get_successors(kmp_task_t *task);
+kmp_base_depnode_t *__kmpc_task_get_depnode(kmp_task_t *task);
+int __kmpc_global_thread_num(id *);
+
+#endif /* KMP_TASK_DEPS_H */
diff --git a/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges.c b/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges.c
new file mode 100644
index 000000000000000..e04ebf0f394000a
--- /dev/null
+++ b/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges.c
@@ -0,0 +1,67 @@
+// REQUIRES: linux
+// RUN: %libomp-compile && env OMP_NUM_THREADS='2' %libomp-run
+
+#include <assert.h>
+#include <omp.h>
+
+#include "kmp_task_deps.h"
+
+// the test
+int main(void) {
+ volatile int done = 0;
+
+#pragma omp parallel num_threads(2)
+ {
+ while (omp_get_thread_num() != 0 && !done)
+ ;
+
+#pragma omp single
+ {
+ kmp_task_t *A, *B;
+ kmp_depnode_list_t *A_succ;
+ kmp_base_depnode_t *B_node;
+ dep deps[2];
+ int gtid;
+ int x, y;
+
+ gtid = __kmpc_global_thread_num(&loc);
+
+ // A - out(x, y)
+ A = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
+ deps[0].addr = (size_t)&x;
+ deps[0].len = 0;
+ deps[0].flags = 2; // OUT
+
+ deps[1].addr = (size_t)&y;
+ deps[1].len = 0;
+ deps[1].flags = 2; // OUT
+
+ __kmpc_omp_task_with_deps(&loc, gtid, A, 2, deps, 0, 0);
+
+ // B - in(x, y)
+ B = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
+ deps[0].addr = (size_t)&x;
+ deps[0].len = 0;
+ deps[0].flags = 1; // IN
+
+ deps[1].addr = (size_t)&y;
+ deps[1].len = 0;
+ deps[1].flags = 1; // IN
+
+ __kmpc_omp_task_with_deps(&loc, gtid, B, 2, deps, 0, 0);
+
+ // Retrieve TDG nodes
+ A_succ = __kmpc_task_get_successors(A);
+ B_node = __kmpc_task_get_depnode(B);
+
+ // 'B' should only be added once to 'A' successors list
+ assert(A_succ->node == B_node);
+ assert(A_succ->next == NULL);
+
+#pragma omp taskwait
+
+ done = 1;
+ }
+ }
+ return 0;
+}
diff --git a/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges_inoutset.c b/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges_inoutset.c
new file mode 100644
index 000000000000000..65f1ed8920baefe
--- /dev/null
+++ b/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges_inoutset.c
@@ -0,0 +1,137 @@
+// REQUIRES: linux
+// RUN: %libomp-compile && env OMP_NUM_THREADS='2' %libomp-run
+
+#include <assert.h>
+#include <omp.h>
+
+#include "kmp_task_deps.h"
+
+// Expected dependency graph (directed from top to bottom)
+//
+// A B C // inoutset(x), inoutset(x, y), inoutset(y)
+// | \ | / |
+// D E F // in(x), in(x, y), in(y)
+// \ /
+// G // out(y)
+
+// the test
+int main(void) {
+ volatile int done = 0;
+
+#pragma omp parallel num_threads(2)
+ {
+ while (omp_get_thread_num() != 0 && !done)
+ ;
+
+#pragma omp single
+ {
+ kmp_task_t *A, *B, *C, *D, *E, *F, *G;
+ kmp_depnode_list_t *A_succ, *B_succ, *C_succ, *E_succ, *F_succ;
+ kmp_base_depnode_t *D_node, *E_node, *F_node, *G_node;
+ dep deps[2];
+ int gtid;
+ int x, y;
+
+ gtid = __kmpc_global_thread_num(&loc);
+
+ deps[0].addr = (size_t)&x;
+ deps[0].len = 0;
+ deps[0].flags = 8; // INOUTSET
+
+ deps[1].addr = (size_t)&y;
+ deps[1].len = 0;
+ deps[1].flags = 8; // INOUTSET
+
+ // A inoutset(x)
+ A = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
+ __kmpc_omp_task_with_deps(&loc, gtid, A, 1, deps + 0, 0, 0);
+
+ // B inoutset(x, y)
+ B = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
+ __kmpc_omp_task_with_deps(&loc, gtid, B, 2, deps + 0, 0, 0);
+
+ // C inoutset(y)
+ C = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
+ __kmpc_omp_task_with_deps(&loc, gtid, C, 1, deps + 1, 0, 0);
+
+ deps[0].flags = 1; // IN
+ deps[1].flags = 1; // IN
+
+ // D in(x)
+ D = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
+ __kmpc_omp_task_with_deps(&loc, gtid, D, 1, deps + 0, 0, 0);
+
+ // E in(x, y)
+ E = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
+ __kmpc_omp_task_with_deps(&loc, gtid, E, 2, deps + 0, 0, 0);
+
+ // F in(y)
+ F = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
+ __kmpc_omp_task_with_deps(&loc, gtid, F, 1, deps + 1, 0, 0);
+
+ deps[1].flags = 2; // OUT
+
+ // G out(y)
+ G = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
+ __kmpc_omp_task_with_deps(&loc, gtid, G, 1, deps + 1, 0, 0);
+
+ // Retrieve TDG nodes and check edges
+ A_succ = __kmpc_task_get_successors(A);
+ B_succ = __kmpc_task_get_successors(B);
+ C_succ = __kmpc_task_get_successors(C);
+ E_succ = __kmpc_task_get_successors(E);
+ F_succ = __kmpc_task_get_successors(F);
+
+ D_node = __kmpc_task_get_depnode(D);
+ E_node = __kmpc_task_get_depnode(E);
+ F_node = __kmpc_task_get_depnode(F);
+
+ G_node = __kmpc_task_get_depnode(G);
+
+ // A -> D and A -> E
+ assert(A_succ && A_succ->next && !A_succ->next->next);
+ assert((A_succ->node == D_node && A_succ->next->node == E_node) ||
+ (A_succ->node == E_node && A_succ->next->node == D_node));
+
+ // B -> D and B -> E and B -> F
+ // valid lists are
+ // (D, E, F)
+ // (D, F, E)
+ // (E, D, F)
+ // (E, F, D)
+ // (F, D, E)
+ // (F, E, D)
+ assert(B_succ && B_succ->next && B_succ->next->next &&
+ !B_succ->next->next->next);
+ assert((B_succ->node == D_node && B_succ->next->node == E_node &&
+ B_succ->next->next->node == F_node) ||
+ (B_succ->node == D_node && B_succ->next->node == F_node &&
+ B_succ->next->next->node == E_node) ||
+ (B_succ->node == E_node && B_succ->next->node == D_node &&
+ B_succ->next->next->node == F_node) ||
+ (B_succ->node == E_node && B_succ->next->node == F_node &&
+ B_succ->next->next->node == D_node) ||
+ (B_succ->node == F_node && B_succ->next->node == D_node &&
+ B_succ->next->next->node == E_node) ||
+ (B_succ->node == F_node && B_succ->next->node == E_node &&
+ B_succ->next->next->node == D_node));
+
+ // C -> E and C -> F
+ assert(C_succ && C_succ->next && !C_succ->next->next);
+ assert((C_succ->node == E_node && C_succ->next->node == F_node) ||
+ (C_succ->node == F_node && C_succ->next->node == E_node));
+
+ // E -> G and F -> G
+ assert(E_succ && !E_succ->next);
+ assert(E_succ->node == G_node);
+
+ assert(F_succ && !F_succ->next);
+ assert(F_succ->node == G_node);
+
+#pragma omp taskwait
+
+ done = 1;
+ }
+ }
+ return 0;
+}
More information about the Openmp-commits
mailing list