[Openmp-commits] [openmp] f5e50b2 - [OpenMP] Optimized trivial multiple edges from task dependency graph

Joachim Jenke via Openmp-commits openmp-commits at lists.llvm.org
Tue Nov 21 09:39:16 PST 2023


Author: Joachim Jenke
Date: 2023-11-21T18:36:12+01:00
New Revision: f5e50b21da0cb543064b2d0b9304ce0b368cf2bb

URL: https://github.com/llvm/llvm-project/commit/f5e50b21da0cb543064b2d0b9304ce0b368cf2bb
DIFF: https://github.com/llvm/llvm-project/commit/f5e50b21da0cb543064b2d0b9304ce0b368cf2bb.diff

LOG: [OpenMP] Optimized trivial multiple edges from task dependency graph

>From "3.1 Reducing the number of edges" of this [[ https://hal.science/hal-04136674v1/ | paper ]] - Optimization (b)

Task (dependency) nodes have a `successors` list built upon passed dependency.
Given the following code, B will be added to A's successors list building the graph `A` -> `B`
```
// A
 # pragma omp task depend(out: x)
{}

// B
 # pragma omp task depend(in: x)
{}
```

In the following code, B is currently added twice to A's successor list
```
// A
 # pragma omp task depend(out: x, y)
{}

// B
 # pragma omp task depend(in: x, y)
{}
```

This patch removes such dupplicates by checking lastly inserted task in `A` successor list.

Authored by: Romain Pereira (rpereira-dev)
Differential Revision: https://reviews.llvm.org/D158544

Added: 
    openmp/runtime/test/tasking/kmp_task_deps.h
    openmp/runtime/test/tasking/kmp_task_deps_multiple_edges.c
    openmp/runtime/test/tasking/kmp_task_deps_multiple_edges_inoutset.c

Modified: 
    openmp/runtime/src/kmp.h
    openmp/runtime/src/kmp_taskdeps.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index f95d008f2c6a006..d34adf7cbf8af3c 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -4237,6 +4237,11 @@ KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps(
     ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps,
     kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
     kmp_depend_info_t *noalias_dep_list);
+
+KMP_EXPORT kmp_base_depnode_t *__kmpc_task_get_depnode(kmp_task_t *task);
+
+KMP_EXPORT kmp_depnode_list_t *__kmpc_task_get_successors(kmp_task_t *task);
+
 KMP_EXPORT void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid,
                                      kmp_int32 ndeps,
                                      kmp_depend_info_t *dep_list,

diff  --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index 3b39f503973635b..f7529481393f978 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -284,6 +284,16 @@ static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source,
 #endif /* OMPT_SUPPORT && OMPT_OPTIONAL */
 }
 
+kmp_base_depnode_t *__kmpc_task_get_depnode(kmp_task_t *task) {
+  kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task);
+  return td->td_depnode ? &(td->td_depnode->dn) : NULL;
+}
+
+kmp_depnode_list_t *__kmpc_task_get_successors(kmp_task_t *task) {
+  kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task);
+  return td->td_depnode->dn.successors;
+}
+
 static inline kmp_int32
 __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread,
                              kmp_task_t *task, kmp_depnode_t *node,
@@ -307,16 +317,18 @@ __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread,
     if (dep->dn.task) {
       KMP_ACQUIRE_DEPNODE(gtid, dep);
       if (dep->dn.task) {
+        if (!dep->dn.successors || dep->dn.successors->node != node) {
 #if OMPX_TASKGRAPH
-        if (!(__kmp_tdg_is_recording(tdg_status)) && task)
+          if (!(__kmp_tdg_is_recording(tdg_status)) && task)
 #endif
-          __kmp_track_dependence(gtid, dep, node, task);
-        dep->dn.successors = __kmp_add_node(thread, dep->dn.successors, node);
-        KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
-                      "%p\n",
-                      gtid, KMP_TASK_TO_TASKDATA(dep->dn.task),
-                      KMP_TASK_TO_TASKDATA(task)));
-        npredecessors++;
+            __kmp_track_dependence(gtid, dep, node, task);
+          dep->dn.successors = __kmp_add_node(thread, dep->dn.successors, node);
+          KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
+                        "%p\n",
+                        gtid, KMP_TASK_TO_TASKDATA(dep->dn.task),
+                        KMP_TASK_TO_TASKDATA(task)));
+          npredecessors++;
+        }
       }
       KMP_RELEASE_DEPNODE(gtid, dep);
     }
@@ -324,6 +336,7 @@ __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread,
   return npredecessors;
 }
 
+// Add the edge 'sink' -> 'source' in the task dependency graph
 static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid,
                                                      kmp_info_t *thread,
                                                      kmp_task_t *task,
@@ -346,29 +359,31 @@ static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid,
     // synchronously add source to sink' list of successors
     KMP_ACQUIRE_DEPNODE(gtid, sink);
     if (sink->dn.task) {
+      if (!sink->dn.successors || sink->dn.successors->node != source) {
 #if OMPX_TASKGRAPH
-      if (!(__kmp_tdg_is_recording(tdg_status)) && task)
+        if (!(__kmp_tdg_is_recording(tdg_status)) && task)
 #endif
-        __kmp_track_dependence(gtid, sink, source, task);
-      sink->dn.successors = __kmp_add_node(thread, sink->dn.successors, source);
-      KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
+          __kmp_track_dependence(gtid, sink, source, task);
+        sink->dn.successors = __kmp_add_node(thread, sink->dn.successors, source);
+        KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
                     "%p\n",
                     gtid, KMP_TASK_TO_TASKDATA(sink->dn.task),
                     KMP_TASK_TO_TASKDATA(task)));
 #if OMPX_TASKGRAPH
-      if (__kmp_tdg_is_recording(tdg_status)) {
-        kmp_taskdata_t *tdd = KMP_TASK_TO_TASKDATA(sink->dn.task);
-        if (tdd->is_taskgraph) {
-          if (tdd->td_flags.onced)
-            // decrement npredecessors if sink->dn.task belongs to a taskgraph
-            // and
-            //  1) the task is reset to its initial state (by kmp_free_task) or
-            //  2) the task is complete but not yet reset
-            npredecessors--;
+        if (__kmp_tdg_is_recording(tdg_status)) {
+          kmp_taskdata_t *tdd = KMP_TASK_TO_TASKDATA(sink->dn.task);
+          if (tdd->is_taskgraph) {
+            if (tdd->td_flags.onced)
+              // decrement npredecessors if sink->dn.task belongs to a taskgraph
+              // and
+              //  1) the task is reset to its initial state (by kmp_free_task) or
+              //  2) the task is complete but not yet reset
+              npredecessors--;
+          }
         }
-      }
 #endif
       npredecessors++;
+      }
     }
     KMP_RELEASE_DEPNODE(gtid, sink);
   }

diff  --git a/openmp/runtime/test/tasking/kmp_task_deps.h b/openmp/runtime/test/tasking/kmp_task_deps.h
new file mode 100644
index 000000000000000..5a1f2b0806a8a5c
--- /dev/null
+++ b/openmp/runtime/test/tasking/kmp_task_deps.h
@@ -0,0 +1,56 @@
+#ifndef KMP_TASK_DEPS_H
+#define KMP_TASK_DEPS_H
+
+#include <stddef.h> /* size_t */
+
+// ---------------------------------------------------------------------------
+// internal data to emulate compiler codegen
+typedef struct DEP {
+  size_t addr;
+  size_t len;
+  unsigned char flags;
+} dep;
+
+typedef struct task {
+  void **shareds;
+  void *entry;
+  int part_id;
+  void *destr_thunk;
+  int priority;
+  long long device_id;
+  int f_priv;
+} kmp_task_t;
+typedef int (*entry_t)(int, kmp_task_t *);
+typedef struct ID {
+  int reserved_1;
+  int flags;
+  int reserved_2;
+  int reserved_3;
+  char *psource;
+} id;
+
+#define TIED 1
+
+struct kmp_depnode_list;
+
+typedef struct kmp_base_depnode {
+  struct kmp_depnode_list *successors;
+  /* [...] more stuff down here */
+} kmp_base_depnode_t;
+
+typedef struct kmp_depnode_list {
+  struct kmp_base_depnode *node;
+  struct kmp_depnode_list *next;
+} kmp_depnode_list_t;
+
+static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
+kmp_task_t *__kmpc_omp_task_alloc(id *loc, int gtid, int flags, size_t sz,
+                                  size_t shar, entry_t rtn);
+int __kmpc_omp_task_with_deps(id *loc, int gtid, kmp_task_t *task, int nd,
+                              dep *dep_lst, int nd_noalias,
+                              dep *noalias_dep_lst);
+kmp_depnode_list_t *__kmpc_task_get_successors(kmp_task_t *task);
+kmp_base_depnode_t *__kmpc_task_get_depnode(kmp_task_t *task);
+int __kmpc_global_thread_num(id *);
+
+#endif /* KMP_TASK_DEPS_H */

diff  --git a/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges.c b/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges.c
new file mode 100644
index 000000000000000..e04ebf0f394000a
--- /dev/null
+++ b/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges.c
@@ -0,0 +1,67 @@
+// REQUIRES: linux
+// RUN: %libomp-compile && env OMP_NUM_THREADS='2' %libomp-run
+
+#include <assert.h>
+#include <omp.h>
+
+#include "kmp_task_deps.h"
+
+// the test
+int main(void) {
+  volatile int done = 0;
+
+#pragma omp parallel num_threads(2)
+  {
+    while (omp_get_thread_num() != 0 && !done)
+      ;
+
+#pragma omp single
+    {
+      kmp_task_t *A, *B;
+      kmp_depnode_list_t *A_succ;
+      kmp_base_depnode_t *B_node;
+      dep deps[2];
+      int gtid;
+      int x, y;
+
+      gtid = __kmpc_global_thread_num(&loc);
+
+      // A - out(x, y)
+      A = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
+      deps[0].addr = (size_t)&x;
+      deps[0].len = 0;
+      deps[0].flags = 2; // OUT
+
+      deps[1].addr = (size_t)&y;
+      deps[1].len = 0;
+      deps[1].flags = 2; // OUT
+
+      __kmpc_omp_task_with_deps(&loc, gtid, A, 2, deps, 0, 0);
+
+      // B - in(x, y)
+      B = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
+      deps[0].addr = (size_t)&x;
+      deps[0].len = 0;
+      deps[0].flags = 1; // IN
+
+      deps[1].addr = (size_t)&y;
+      deps[1].len = 0;
+      deps[1].flags = 1; // IN
+
+      __kmpc_omp_task_with_deps(&loc, gtid, B, 2, deps, 0, 0);
+
+      // Retrieve TDG nodes
+      A_succ = __kmpc_task_get_successors(A);
+      B_node = __kmpc_task_get_depnode(B);
+
+      // 'B' should only be added once to 'A' successors list
+      assert(A_succ->node == B_node);
+      assert(A_succ->next == NULL);
+
+#pragma omp taskwait
+
+      done = 1;
+    }
+  }
+  return 0;
+}

diff  --git a/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges_inoutset.c b/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges_inoutset.c
new file mode 100644
index 000000000000000..65f1ed8920baefe
--- /dev/null
+++ b/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges_inoutset.c
@@ -0,0 +1,137 @@
+// REQUIRES: linux
+// RUN: %libomp-compile && env OMP_NUM_THREADS='2' %libomp-run
+
+#include <assert.h>
+#include <omp.h>
+
+#include "kmp_task_deps.h"
+
+//  Expected dependency graph (directed from top to bottom)
+//
+//          A   B   C       // inoutset(x), inoutset(x, y), inoutset(y)
+//          | \ | / |
+//          D   E   F       // in(x), in(x, y), in(y)
+//               \ /
+//                G         // out(y)
+
+// the test
+int main(void) {
+  volatile int done = 0;
+
+#pragma omp parallel num_threads(2)
+  {
+    while (omp_get_thread_num() != 0 && !done)
+      ;
+
+#pragma omp single
+    {
+      kmp_task_t *A, *B, *C, *D, *E, *F, *G;
+      kmp_depnode_list_t *A_succ, *B_succ, *C_succ, *E_succ, *F_succ;
+      kmp_base_depnode_t *D_node, *E_node, *F_node, *G_node;
+      dep deps[2];
+      int gtid;
+      int x, y;
+
+      gtid = __kmpc_global_thread_num(&loc);
+
+      deps[0].addr = (size_t)&x;
+      deps[0].len = 0;
+      deps[0].flags = 8; // INOUTSET
+
+      deps[1].addr = (size_t)&y;
+      deps[1].len = 0;
+      deps[1].flags = 8; // INOUTSET
+
+      // A inoutset(x)
+      A = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
+      __kmpc_omp_task_with_deps(&loc, gtid, A, 1, deps + 0, 0, 0);
+
+      // B inoutset(x, y)
+      B = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
+      __kmpc_omp_task_with_deps(&loc, gtid, B, 2, deps + 0, 0, 0);
+
+      // C inoutset(y)
+      C = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
+      __kmpc_omp_task_with_deps(&loc, gtid, C, 1, deps + 1, 0, 0);
+
+      deps[0].flags = 1; // IN
+      deps[1].flags = 1; // IN
+
+      // D in(x)
+      D = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
+      __kmpc_omp_task_with_deps(&loc, gtid, D, 1, deps + 0, 0, 0);
+
+      // E in(x, y)
+      E = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
+      __kmpc_omp_task_with_deps(&loc, gtid, E, 2, deps + 0, 0, 0);
+
+      // F in(y)
+      F = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
+      __kmpc_omp_task_with_deps(&loc, gtid, F, 1, deps + 1, 0, 0);
+
+      deps[1].flags = 2; // OUT
+
+      // G out(y)
+      G = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
+      __kmpc_omp_task_with_deps(&loc, gtid, G, 1, deps + 1, 0, 0);
+
+      // Retrieve TDG nodes and check edges
+      A_succ = __kmpc_task_get_successors(A);
+      B_succ = __kmpc_task_get_successors(B);
+      C_succ = __kmpc_task_get_successors(C);
+      E_succ = __kmpc_task_get_successors(E);
+      F_succ = __kmpc_task_get_successors(F);
+
+      D_node = __kmpc_task_get_depnode(D);
+      E_node = __kmpc_task_get_depnode(E);
+      F_node = __kmpc_task_get_depnode(F);
+
+      G_node = __kmpc_task_get_depnode(G);
+
+      // A -> D and A -> E
+      assert(A_succ && A_succ->next && !A_succ->next->next);
+      assert((A_succ->node == D_node && A_succ->next->node == E_node) ||
+             (A_succ->node == E_node && A_succ->next->node == D_node));
+
+      // B -> D and B -> E and B -> F
+      // valid lists are
+      //  (D, E, F)
+      //  (D, F, E)
+      //  (E, D, F)
+      //  (E, F, D)
+      //  (F, D, E)
+      //  (F, E, D)
+      assert(B_succ && B_succ->next && B_succ->next->next &&
+             !B_succ->next->next->next);
+      assert((B_succ->node == D_node && B_succ->next->node == E_node &&
+              B_succ->next->next->node == F_node) ||
+             (B_succ->node == D_node && B_succ->next->node == F_node &&
+              B_succ->next->next->node == E_node) ||
+             (B_succ->node == E_node && B_succ->next->node == D_node &&
+              B_succ->next->next->node == F_node) ||
+             (B_succ->node == E_node && B_succ->next->node == F_node &&
+              B_succ->next->next->node == D_node) ||
+             (B_succ->node == F_node && B_succ->next->node == D_node &&
+              B_succ->next->next->node == E_node) ||
+             (B_succ->node == F_node && B_succ->next->node == E_node &&
+              B_succ->next->next->node == D_node));
+
+      // C -> E and C -> F
+      assert(C_succ && C_succ->next && !C_succ->next->next);
+      assert((C_succ->node == E_node && C_succ->next->node == F_node) ||
+             (C_succ->node == F_node && C_succ->next->node == E_node));
+
+      // E -> G and F -> G
+      assert(E_succ && !E_succ->next);
+      assert(E_succ->node == G_node);
+
+      assert(F_succ && !F_succ->next);
+      assert(F_succ->node == G_node);
+
+#pragma omp taskwait
+
+      done = 1;
+    }
+  }
+  return 0;
+}


        


More information about the Openmp-commits mailing list