[Openmp-commits] [openmp] 610fea6 - [OpenMP] libomp: fixed implementation of OMP 5.1 inoutset task dependence type

via Openmp-commits openmp-commits at lists.llvm.org
Wed Jun 16 04:48:57 PDT 2021


Author: AndreyChurbanov
Date: 2021-06-16T14:47:29+03:00
New Revision: 610fea65e296e5e9aad9c90903c2e40eaa4f03ac

URL: https://github.com/llvm/llvm-project/commit/610fea65e296e5e9aad9c90903c2e40eaa4f03ac
DIFF: https://github.com/llvm/llvm-project/commit/610fea65e296e5e9aad9c90903c2e40eaa4f03ac.diff

LOG: [OpenMP] libomp: fixed implementation of OMP 5.1 inoutset task dependence type

Refactored code of dependence processing and added new inoutset dependence type.
Compiler can set dependence flag to 0x8 when call __kmpc_omp_task_with_deps.
All dependence flags library gets so far and corresponding dependence types:
1 - IN, 2 - OUT, 3 - INOUT, 4 - MUTEXINOUTSET, 8 - INOUTSET.

Differential Revision: https://reviews.llvm.org/D97085

Added: 
    openmp/runtime/test/tasking/omp51_task_dep_inoutset.c

Modified: 
    openmp/runtime/src/kmp.h
    openmp/runtime/src/kmp_taskdeps.cpp
    openmp/runtime/src/kmp_taskdeps.h
    openmp/runtime/test/tasking/hidden_helper_task/common.h
    openmp/runtime/test/tasking/hidden_helper_task/depend.cpp
    openmp/runtime/test/tasking/hidden_helper_task/gtid.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 4795224270646..c42761c3cff96 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -2252,15 +2252,24 @@ typedef union kmp_depnode kmp_depnode_t;
 typedef struct kmp_depnode_list kmp_depnode_list_t;
 typedef struct kmp_dephash_entry kmp_dephash_entry_t;
 
+#define KMP_DEP_IN 0x1
+#define KMP_DEP_OUT 0x2
+#define KMP_DEP_INOUT 0x3
+#define KMP_DEP_MTX 0x4
+#define KMP_DEP_SET 0x8
 // Compiler sends us this info:
 typedef struct kmp_depend_info {
   kmp_intptr_t base_addr;
   size_t len;
-  struct {
-    bool in : 1;
-    bool out : 1;
-    bool mtx : 1;
-  } flags;
+  union {
+    kmp_uint8 flag;
+    struct {
+      unsigned in : 1;
+      unsigned out : 1;
+      unsigned mtx : 1;
+      unsigned set : 1;
+    } flags;
+  };
 } kmp_depend_info_t;
 
 // Internal structures to work with task dependencies:
@@ -2294,9 +2303,9 @@ union KMP_ALIGN_CACHE kmp_depnode {
 struct kmp_dephash_entry {
   kmp_intptr_t addr;
   kmp_depnode_t *last_out;
-  kmp_depnode_list_t *last_ins;
-  kmp_depnode_list_t *last_mtxs;
-  kmp_int32 last_flag;
+  kmp_depnode_list_t *last_set;
+  kmp_depnode_list_t *prev_set;
+  kmp_uint8 last_flag;
   kmp_lock_t *mtx_lock; /* is referenced by depnodes w/mutexinoutset dep */
   kmp_dephash_entry_t *next_in_bucket;
 };

diff  --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index 89a7dbcaddf47..162fb38e1eedd 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -149,9 +149,6 @@ static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
   return h;
 }
 
-#define ENTRY_LAST_INS 0
-#define ENTRY_LAST_MTXS 1
-
 static kmp_dephash_entry *__kmp_dephash_find(kmp_info_t *thread,
                                              kmp_dephash_t **hash,
                                              kmp_intptr_t addr) {
@@ -178,9 +175,9 @@ static kmp_dephash_entry *__kmp_dephash_find(kmp_info_t *thread,
 #endif
     entry->addr = addr;
     entry->last_out = NULL;
-    entry->last_ins = NULL;
-    entry->last_mtxs = NULL;
-    entry->last_flag = ENTRY_LAST_INS;
+    entry->last_set = NULL;
+    entry->prev_set = NULL;
+    entry->last_flag = 0;
     entry->mtx_lock = NULL;
     entry->next_in_bucket = h->buckets[bucket];
     h->buckets[bucket] = entry;
@@ -313,96 +310,81 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
     kmp_dephash_entry_t *info =
         __kmp_dephash_find(thread, hash, dep->base_addr);
     kmp_depnode_t *last_out = info->last_out;
-    kmp_depnode_list_t *last_ins = info->last_ins;
-    kmp_depnode_list_t *last_mtxs = info->last_mtxs;
-
-    if (dep->flags.out) { // out --> clean lists of ins and mtxs if any
-      if (last_ins || last_mtxs) {
-        if (info->last_flag == ENTRY_LAST_INS) { // INS were last
-          npredecessors +=
-              __kmp_depnode_link_successor(gtid, thread, task, node, last_ins);
-        } else { // MTXS were last
-          npredecessors +=
-              __kmp_depnode_link_successor(gtid, thread, task, node, last_mtxs);
-        }
-        __kmp_depnode_list_free(thread, last_ins);
-        __kmp_depnode_list_free(thread, last_mtxs);
-        info->last_ins = NULL;
-        info->last_mtxs = NULL;
+    kmp_depnode_list_t *last_set = info->last_set;
+    kmp_depnode_list_t *prev_set = info->prev_set;
+
+    if (dep->flags.out) { // out or inout --> clean lists if any
+      if (last_set) {
+        npredecessors +=
+            __kmp_depnode_link_successor(gtid, thread, task, node, last_set);
+        __kmp_depnode_list_free(thread, last_set);
+        __kmp_depnode_list_free(thread, prev_set);
+        info->last_set = NULL;
+        info->prev_set = NULL;
+        info->last_flag = 0; // no sets in this dephash entry
       } else {
         npredecessors +=
             __kmp_depnode_link_successor(gtid, thread, task, node, last_out);
       }
       __kmp_node_deref(thread, last_out);
-      if (dep_barrier) {
+      if (!dep_barrier) {
+        info->last_out = __kmp_node_ref(node);
+      } else {
         // if this is a sync point in the serial sequence, then the previous
         // outputs are guaranteed to be completed after the execution of this
         // task so the previous output nodes can be cleared.
         info->last_out = NULL;
-      } else {
-        info->last_out = __kmp_node_ref(node);
       }
-    } else if (dep->flags.in) {
-      // in --> link node to either last_out or last_mtxs, clean earlier deps
-      if (last_mtxs) {
-        npredecessors +=
-            __kmp_depnode_link_successor(gtid, thread, task, node, last_mtxs);
-        __kmp_node_deref(thread, last_out);
-        info->last_out = NULL;
-        if (info->last_flag == ENTRY_LAST_MTXS && last_ins) { // MTXS were last
-          // clean old INS before creating new list
-          __kmp_depnode_list_free(thread, last_ins);
-          info->last_ins = NULL;
-        }
-      } else {
+    } else { // either IN or MTX or SET
+      if (info->last_flag == 0 || info->last_flag == dep->flag) {
+        // last_set either didn't exist or of same dep kind
         // link node as successor of the last_out if any
         npredecessors +=
             __kmp_depnode_link_successor(gtid, thread, task, node, last_out);
-      }
-      info->last_flag = ENTRY_LAST_INS;
-      info->last_ins = __kmp_add_node(thread, info->last_ins, node);
-    } else {
-      KMP_DEBUG_ASSERT(dep->flags.mtx == 1);
-      // mtx --> link node to either last_out or last_ins, clean earlier deps
-      if (last_ins) {
+        // link node as successor of all nodes in the prev_set if any
+        npredecessors +=
+            __kmp_depnode_link_successor(gtid, thread, task, node, prev_set);
+      } else { // last_set is of 
diff erent dep kind, make it prev_set
+        // link node as successor of all nodes in the last_set
         npredecessors +=
-            __kmp_depnode_link_successor(gtid, thread, task, node, last_ins);
+            __kmp_depnode_link_successor(gtid, thread, task, node, last_set);
+        // clean last_out if any
         __kmp_node_deref(thread, last_out);
         info->last_out = NULL;
-        if (info->last_flag == ENTRY_LAST_INS && last_mtxs) { // INS were last
-          // clean old MTXS before creating new list
-          __kmp_depnode_list_free(thread, last_mtxs);
-          info->last_mtxs = NULL;
-        }
-      } else {
-        // link node as successor of the last_out if any
-        npredecessors +=
-            __kmp_depnode_link_successor(gtid, thread, task, node, last_out);
-      }
-      info->last_flag = ENTRY_LAST_MTXS;
-      info->last_mtxs = __kmp_add_node(thread, info->last_mtxs, node);
-      if (info->mtx_lock == NULL) {
-        info->mtx_lock = (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t));
-        __kmp_init_lock(info->mtx_lock);
+        // clean prev_set if any
+        __kmp_depnode_list_free(thread, prev_set);
+        // move last_set to prev_set, new last_set will be allocated
+        info->prev_set = last_set;
+        info->last_set = NULL;
       }
-      KMP_DEBUG_ASSERT(node->dn.mtx_num_locks < MAX_MTX_DEPS);
-      kmp_int32 m;
-      // Save lock in node's array
-      for (m = 0; m < MAX_MTX_DEPS; ++m) {
-        // sort pointers in decreasing order to avoid potential livelock
-        if (node->dn.mtx_locks[m] < info->mtx_lock) {
-          KMP_DEBUG_ASSERT(node->dn.mtx_locks[node->dn.mtx_num_locks] == NULL);
-          for (int n = node->dn.mtx_num_locks; n > m; --n) {
-            // shift right all lesser non-NULL pointers
-            KMP_DEBUG_ASSERT(node->dn.mtx_locks[n - 1] != NULL);
-            node->dn.mtx_locks[n] = node->dn.mtx_locks[n - 1];
+      info->last_flag = dep->flag; // store dep kind of the last_set
+      info->last_set = __kmp_add_node(thread, info->last_set, node);
+
+      // check if we are processing MTX dependency
+      if (dep->flag == KMP_DEP_MTX) {
+        if (info->mtx_lock == NULL) {
+          info->mtx_lock = (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t));
+          __kmp_init_lock(info->mtx_lock);
+        }
+        KMP_DEBUG_ASSERT(node->dn.mtx_num_locks < MAX_MTX_DEPS);
+        kmp_int32 m;
+        // Save lock in node's array
+        for (m = 0; m < MAX_MTX_DEPS; ++m) {
+          // sort pointers in decreasing order to avoid potential livelock
+          if (node->dn.mtx_locks[m] < info->mtx_lock) {
+            KMP_DEBUG_ASSERT(!node->dn.mtx_locks[node->dn.mtx_num_locks]);
+            for (int n = node->dn.mtx_num_locks; n > m; --n) {
+              // shift right all lesser non-NULL pointers
+              KMP_DEBUG_ASSERT(node->dn.mtx_locks[n - 1] != NULL);
+              node->dn.mtx_locks[n] = node->dn.mtx_locks[n - 1];
+            }
+            node->dn.mtx_locks[m] = info->mtx_lock;
+            break;
           }
-          node->dn.mtx_locks[m] = info->mtx_lock;
-          break;
         }
+        KMP_DEBUG_ASSERT(m < MAX_MTX_DEPS); // must break from loop
+        node->dn.mtx_num_locks++;
       }
-      KMP_DEBUG_ASSERT(m < MAX_MTX_DEPS); // must break from loop
-      node->dn.mtx_num_locks++;
     }
   }
   KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter,
@@ -433,27 +415,25 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
   // TODO: Different algorithm for large dep_list ( > 10 ? )
   for (i = 0; i < ndeps; i++) {
     if (dep_list[i].base_addr != 0) {
+      KMP_DEBUG_ASSERT(
+          dep_list[i].flag == KMP_DEP_IN || dep_list[i].flag == KMP_DEP_OUT ||
+          dep_list[i].flag == KMP_DEP_INOUT ||
+          dep_list[i].flag == KMP_DEP_MTX || dep_list[i].flag == KMP_DEP_SET);
       for (int j = i + 1; j < ndeps; j++) {
         if (dep_list[i].base_addr == dep_list[j].base_addr) {
-          dep_list[i].flags.in |= dep_list[j].flags.in;
-          dep_list[i].flags.out |=
-              (dep_list[j].flags.out ||
-               (dep_list[i].flags.in && dep_list[j].flags.mtx) ||
-               (dep_list[i].flags.mtx && dep_list[j].flags.in));
-          dep_list[i].flags.mtx =
-              dep_list[i].flags.mtx | dep_list[j].flags.mtx &&
-              !dep_list[i].flags.out;
+          if (dep_list[i].flag != dep_list[j].flag) {
+            // two 
diff erent dependences on same address work identical to OUT
+            dep_list[i].flag = KMP_DEP_OUT;
+          }
           dep_list[j].base_addr = 0; // Mark j element as void
         }
       }
-      if (dep_list[i].flags.mtx) {
+      if (dep_list[i].flag == KMP_DEP_MTX) {
         // limit number of mtx deps to MAX_MTX_DEPS per node
         if (n_mtxs < MAX_MTX_DEPS && task != NULL) {
           ++n_mtxs;
         } else {
-          dep_list[i].flags.in = 1; // downgrade mutexinoutset to inout
-          dep_list[i].flags.out = 1;
-          dep_list[i].flags.mtx = 0;
+          dep_list[i].flag = KMP_DEP_OUT; // downgrade mutexinoutset to inout
         }
       }
     }
@@ -562,6 +542,8 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
         ompt_deps[i].dependence_type = ompt_dependence_type_in;
       else if (dep_list[i].flags.mtx)
         ompt_deps[i].dependence_type = ompt_dependence_type_mutexinoutset;
+      else if (dep_list[i].flags.set)
+        ompt_deps[i].dependence_type = ompt_dependence_type_inoutset;
     }
     for (i = 0; i < ndeps_noalias; i++) {
       ompt_deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr;
@@ -574,6 +556,8 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
       else if (noalias_dep_list[i].flags.mtx)
         ompt_deps[ndeps + i].dependence_type =
             ompt_dependence_type_mutexinoutset;
+      else if (noalias_dep_list[i].flags.set)
+        ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset;
     }
     ompt_callbacks.ompt_callback(ompt_callback_dependences)(
         &(new_taskdata->ompt_task_info.task_data), ompt_deps, ompt_ndeps);
@@ -723,6 +707,8 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
       else if (dep_list[i].flags.mtx)
         ompt_deps[ndeps + i].dependence_type =
             ompt_dependence_type_mutexinoutset;
+      else if (dep_list[i].flags.set)
+        ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset;
     }
     for (i = 0; i < ndeps_noalias; i++) {
       ompt_deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr;
@@ -735,6 +721,8 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
       else if (noalias_dep_list[i].flags.mtx)
         ompt_deps[ndeps + i].dependence_type =
             ompt_dependence_type_mutexinoutset;
+      else if (noalias_dep_list[i].flags.set)
+        ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset;
     }
     ompt_callbacks.ompt_callback(ompt_callback_dependences)(
         taskwait_task_data, ompt_deps, ompt_ndeps);

diff  --git a/openmp/runtime/src/kmp_taskdeps.h b/openmp/runtime/src/kmp_taskdeps.h
index e4953610470a3..2f103453eb992 100644
--- a/openmp/runtime/src/kmp_taskdeps.h
+++ b/openmp/runtime/src/kmp_taskdeps.h
@@ -58,8 +58,8 @@ static inline void __kmp_dephash_free_entries(kmp_info_t *thread,
       kmp_dephash_entry_t *next;
       for (kmp_dephash_entry_t *entry = h->buckets[i]; entry; entry = next) {
         next = entry->next_in_bucket;
-        __kmp_depnode_list_free(thread, entry->last_ins);
-        __kmp_depnode_list_free(thread, entry->last_mtxs);
+        __kmp_depnode_list_free(thread, entry->last_set);
+        __kmp_depnode_list_free(thread, entry->prev_set);
         __kmp_node_deref(thread, entry->last_out);
         if (entry->mtx_lock) {
           __kmp_destroy_lock(entry->mtx_lock);

diff  --git a/openmp/runtime/test/tasking/hidden_helper_task/common.h b/openmp/runtime/test/tasking/hidden_helper_task/common.h
index 0761091e11df7..3f9a77d8d23f4 100644
--- a/openmp/runtime/test/tasking/hidden_helper_task/common.h
+++ b/openmp/runtime/test/tasking/hidden_helper_task/common.h
@@ -13,11 +13,14 @@ using kmp_intptr_t = intptr_t;
 typedef struct kmp_depend_info {
   kmp_intptr_t base_addr;
   size_t len;
-  struct {
-    bool in : 1;
-    bool out : 1;
-    bool mtx : 1;
-  } flags;
+  union {
+    unsigned char flag;
+    struct {
+      bool in : 1;
+      bool out : 1;
+      bool mtx : 1;
+    } flags;
+  };
 } kmp_depend_info_t;
 
 typedef union kmp_cmplrdata {

diff  --git a/openmp/runtime/test/tasking/hidden_helper_task/depend.cpp b/openmp/runtime/test/tasking/hidden_helper_task/depend.cpp
index 3eb28607e4629..4bc27c1d406d0 100644
--- a/openmp/runtime/test/tasking/hidden_helper_task/depend.cpp
+++ b/openmp/runtime/test/tasking/hidden_helper_task/depend.cpp
@@ -65,7 +65,7 @@ int main(int argc, char *argv[]) {
 
     kmp_depend_info_t depinfo1;
     depinfo1.base_addr = reinterpret_cast<intptr_t>(&data);
-    depinfo1.flags.out = 1;
+    depinfo1.flag = 2; // OUT
     depinfo1.len = 4;
 
     __kmpc_omp_task_with_deps(nullptr, gtid, task1, 1, &depinfo1, 0, nullptr);
@@ -80,8 +80,7 @@ int main(int argc, char *argv[]) {
 
     kmp_depend_info_t depinfo2;
     depinfo2.base_addr = reinterpret_cast<intptr_t>(&data);
-    depinfo2.flags.in = 1;
-    depinfo2.flags.out = 1;
+    depinfo2.flag = 3; // INOUT
     depinfo2.len = 4;
 
     __kmpc_omp_task_with_deps(nullptr, gtid, task2, 1, &depinfo2, 0, nullptr);
@@ -96,8 +95,7 @@ int main(int argc, char *argv[]) {
 
     kmp_depend_info_t depinfo3;
     depinfo3.base_addr = reinterpret_cast<intptr_t>(&data);
-    depinfo3.flags.in = 1;
-    depinfo3.flags.out = 1;
+    depinfo3.flag = 3; // INOUT
     depinfo3.len = 4;
 
     __kmpc_omp_task_with_deps(nullptr, gtid, task3, 1, &depinfo3, 0, nullptr);
@@ -112,8 +110,7 @@ int main(int argc, char *argv[]) {
 
     kmp_depend_info_t depinfo4;
     depinfo4.base_addr = reinterpret_cast<intptr_t>(&data);
-    depinfo4.flags.in = 1;
-    depinfo4.flags.out = 1;
+    depinfo4.flag = 3; // INOUT
     depinfo4.len = 4;
 
     __kmpc_omp_task_with_deps(nullptr, gtid, task4, 1, &depinfo4, 0, nullptr);

diff  --git a/openmp/runtime/test/tasking/hidden_helper_task/gtid.cpp b/openmp/runtime/test/tasking/hidden_helper_task/gtid.cpp
index d5af89553caf0..8cec95be0306e 100644
--- a/openmp/runtime/test/tasking/hidden_helper_task/gtid.cpp
+++ b/openmp/runtime/test/tasking/hidden_helper_task/gtid.cpp
@@ -81,8 +81,7 @@ int main(int argc, char *argv[]) {
 
     kmp_depend_info_t depinfo1;
     depinfo1.base_addr = reinterpret_cast<intptr_t>(&depvar);
-    depinfo1.flags.in = 1;
-    depinfo1.flags.out = 1;
+    depinfo1.flag = 3; // INOUT
     depinfo1.len = 4;
 
     __kmpc_omp_task_with_deps(nullptr, gtid, task1, 1, &depinfo1, 0, nullptr);
@@ -96,8 +95,7 @@ int main(int argc, char *argv[]) {
 
     kmp_depend_info_t depinfo2;
     depinfo2.base_addr = reinterpret_cast<intptr_t>(&depvar);
-    depinfo2.flags.in = 1;
-    depinfo2.flags.out = 1;
+    depinfo2.flag = 3; // INOUT
     depinfo2.len = 4;
 
     __kmpc_omp_task_with_deps(nullptr, gtid, task2, 1, &depinfo2, 0, nullptr);
@@ -111,8 +109,7 @@ int main(int argc, char *argv[]) {
 
     kmp_depend_info_t depinfo3;
     depinfo3.base_addr = reinterpret_cast<intptr_t>(&depvar);
-    depinfo3.flags.in = 1;
-    depinfo3.flags.out = 1;
+    depinfo3.flag = 3; // INOUT
     depinfo3.len = 4;
 
     __kmpc_omp_task_with_deps(nullptr, gtid, task3, 1, &depinfo3, 0, nullptr);

diff  --git a/openmp/runtime/test/tasking/omp51_task_dep_inoutset.c b/openmp/runtime/test/tasking/omp51_task_dep_inoutset.c
new file mode 100644
index 0000000000000..a7787ff3da53a
--- /dev/null
+++ b/openmp/runtime/test/tasking/omp51_task_dep_inoutset.c
@@ -0,0 +1,258 @@
+// RUN: %libomp-compile-and-run
+// RUN: %libomp-cxx-compile-and-run
+// UNSUPPORTED: gcc
+
+// Tests OMP 5.0 task dependences "mutexinoutset" and 5.1 "inoutset",
+// emulates compiler codegen for new dep kinds
+// Mutually exclusive tasks get same input dependency info array
+//
+// Task tree created:
+//      task0 - task1 (in)
+//             \
+//        task2 - task3 (inoutset)
+//             /
+//      task3 - task4 (in)
+//           /
+//  task6 <-->task7  (mutexinoutset)
+//       \    /
+//       task8 (in)
+//
+#include <stdio.h>
+#include <omp.h>
+
+#ifdef _WIN32
+#include <windows.h>
+#define mysleep(n) Sleep(n)
+#else
+#include <unistd.h>
+#define mysleep(n) usleep((n)*1000)
+#endif
+
+// to check the # of concurrent tasks (must be 1 for MTX, <3 for other kinds)
+static int volatile checker = 0;
+static int err = 0;
+#ifndef DELAY
+#define DELAY 100
+#endif
+
+// ---------------------------------------------------------------------------
+// internal data to emulate compiler codegen
+typedef struct DEP {
+  size_t addr;
+  size_t len;
+  unsigned char flags;
+} dep;
+typedef struct task {
+  void** shareds;
+  void* entry;
+  int part_id;
+  void* destr_thunk;
+  int priority;
+  long long device_id;
+  int f_priv;
+} task_t;
+#define TIED 1
+typedef int(*entry_t)(int, task_t*);
+typedef struct ID {
+  int reserved_1;
+  int flags;
+  int reserved_2;
+  int reserved_3;
+  char *psource;
+} id;
+// thunk routine for tasks with MTX dependency
+int thunk_m(int gtid, task_t* ptask) {
+  int th = omp_get_thread_num();
+  #pragma omp atomic
+    ++checker;
+  printf("task _%d, th %d\n", ptask->f_priv, th);
+  if (checker != 1) { // no more than 1 task at a time
+    err++;
+    printf("Error1, checker %d != 1\n", checker);
+  }
+  mysleep(DELAY);
+  if (checker != 1) { // no more than 1 task at a time
+    err++;
+    printf("Error2, checker %d != 1\n", checker);
+  }
+  #pragma omp atomic
+    --checker;
+  return 0;
+}
+// thunk routine for tasks with inoutset dependency
+int thunk_s(int gtid, task_t* ptask) {
+  int th = omp_get_thread_num();
+  #pragma omp atomic
+    ++checker;
+  printf("task _%d, th %d\n", ptask->f_priv, th);
+  if (checker > 2) { // no more than 2 tasks concurrently
+    err++;
+    printf("Error1, checker %d > 2\n", checker);
+  }
+  mysleep(DELAY);
+  if (checker > 2) { // no more than 2 tasks concurrently
+    err++;
+    printf("Error2, checker %d > 2\n", checker);
+  }
+  #pragma omp atomic
+    --checker;
+  return 0;
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+int __kmpc_global_thread_num(id*);
+extern task_t* __kmpc_omp_task_alloc(id *loc, int gtid, int flags,
+                                     size_t sz, size_t shar, entry_t rtn);
+int
+__kmpc_omp_task_with_deps(id *loc, int gtid, task_t *task, int nd, dep *dep_lst,
+                          int nd_noalias, dep *noalias_dep_lst);
+static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
+#ifdef __cplusplus
+} // extern "C"
+#endif
+// End of internal data
+// ---------------------------------------------------------------------------
+
+int main()
+{
+  int i1,i2,i3;
+  omp_set_num_threads(4);
+  omp_set_dynamic(0);
+  #pragma omp parallel
+  {
+    #pragma omp single nowait
+    {
+      dep sdep[2];
+      task_t *ptr;
+      int gtid = __kmpc_global_thread_num(&loc);
+      int t = omp_get_thread_num();
+      #pragma omp task depend(in: i1, i2)
+      { int th = omp_get_thread_num();
+        printf("task 0_%d, th %d\n", t, th);
+        #pragma omp atomic
+          ++checker;
+        if (checker > 2) { // no more than 2 tasks concurrently
+          err++;
+          printf("Error1, checker %d > 2\n", checker);
+        }
+        mysleep(DELAY);
+        if (checker > 2) { // no more than 2 tasks concurrently
+          err++;
+          printf("Error1, checker %d > 2\n", checker);
+        }
+        #pragma omp atomic
+          --checker;
+      }
+      #pragma omp task depend(in: i1, i2)
+      { int th = omp_get_thread_num();
+        printf("task 1_%d, th %d\n", t, th);
+        #pragma omp atomic
+          ++checker;
+        if (checker > 2) { // no more than 2 tasks concurrently
+          err++;
+          printf("Error1, checker %d > 2\n", checker);
+        }
+        mysleep(DELAY);
+        if (checker > 2) { // no more than 2 tasks concurrently
+          err++;
+          printf("Error1, checker %d > 2\n", checker);
+        }
+        #pragma omp atomic
+          --checker;
+      }
+// compiler codegen start
+      // task2
+      ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_s);
+      sdep[0].addr = (size_t)&i1;
+      sdep[0].len = 0;   // not used
+      sdep[0].flags = 1; // IN
+      sdep[1].addr = (size_t)&i2;
+      sdep[1].len = 0;   // not used
+      sdep[1].flags = 8; // INOUTSET
+      ptr->f_priv = t + 10; // init single first-private variable
+      __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
+
+      // task3
+      ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_s);
+      ptr->f_priv = t + 20; // init single first-private variable
+      __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
+// compiler codegen end
+      t = omp_get_thread_num();
+      #pragma omp task depend(in: i1, i2)
+      { int th = omp_get_thread_num();
+        printf("task 4_%d, th %d\n", t, th);
+        #pragma omp atomic
+          ++checker;
+        if (checker > 2) { // no more than 2 tasks concurrently
+          err++;
+          printf("Error1, checker %d > 2\n", checker);
+        }
+        mysleep(DELAY);
+        if (checker > 2) { // no more than 2 tasks concurrently
+          err++;
+          printf("Error1, checker %d > 2\n", checker);
+        }
+        #pragma omp atomic
+          --checker;
+      }
+      #pragma omp task depend(in: i1, i2)
+      { int th = omp_get_thread_num();
+        printf("task 5_%d, th %d\n", t, th);
+        #pragma omp atomic
+          ++checker;
+        if (checker > 2) { // no more than 2 tasks concurrently
+          err++;
+          printf("Error1, checker %d > 2\n", checker);
+        }
+        mysleep(DELAY);
+        if (checker > 2) { // no more than 2 tasks concurrently
+          err++;
+          printf("Error1, checker %d > 2\n", checker);
+        }
+        #pragma omp atomic
+          --checker;
+      }
+// compiler codegen start
+      // task6
+      ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
+      sdep[0].addr = (size_t)&i1;
+      sdep[0].len = 0;   // not used
+      sdep[0].flags = 4; // MUTEXINOUTSET
+      sdep[1].addr = (size_t)&i3;
+      sdep[1].len = 0;   // not used
+      sdep[1].flags = 4; // MUTEXINOUTSET
+      ptr->f_priv = t + 30; // init single first-private variable
+      __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
+
+      // task7
+      ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
+      ptr->f_priv = t + 40; // init single first-private variable
+      __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
+// compiler codegen end
+      #pragma omp task depend(in: i3)
+      { int th = omp_get_thread_num();
+        printf("task 8_%d, th %d\n", t, th);
+        #pragma omp atomic
+          ++checker;
+        if (checker != 1) { // last task should run exclusively
+          err++;
+          printf("Error1, checker %d != 1\n", checker); }
+        mysleep(DELAY);
+        if (checker != 1) { // last task should run exclusively
+          err++;
+          printf("Error1, checker %d != 1\n", checker); }
+        #pragma omp atomic
+          --checker;
+      }
+    } // single
+  } // parallel
+  if (err == 0) {
+    printf("passed\n");
+    return 0;
+  } else {
+    printf("failed\n");
+    return 1;
+  }
+}


        


More information about the Openmp-commits mailing list