[Openmp-commits] [openmp] d40108e - [OpenMP] libomp: runtime part of omp_all_memory task dependence implementation.

via Openmp-commits openmp-commits at lists.llvm.org
Wed Sep 8 06:55:41 PDT 2021


Author: AndreyChurbanov
Date: 2021-09-08T16:55:32+03:00
New Revision: d40108e0af08389a791c7b6783f416486068be96

URL: https://github.com/llvm/llvm-project/commit/d40108e0af08389a791c7b6783f416486068be96
DIFF: https://github.com/llvm/llvm-project/commit/d40108e0af08389a791c7b6783f416486068be96.diff

LOG: [OpenMP] libomp: runtime part of omp_all_memory task dependence implementation.

New omp_all_memory task dependence type is implemented.
Library recognizes the new type via either
(dependence_address == NULL && dependence_flag == 0x80)
or
(dependence_address == SIZE_MAX).
A task with new dependence type depends on each preceding task
with any dependence type (kind of a dependence barrier).

Differential Revision: https://reviews.llvm.org/D108574

Added: 
    openmp/runtime/test/tasking/kmp_task_depend_all.c
    openmp/runtime/test/tasking/kmp_taskwait_depend_all.c

Modified: 
    openmp/runtime/src/kmp.h
    openmp/runtime/src/kmp_taskdeps.cpp
    openmp/runtime/src/kmp_taskdeps.h

Removed: 
    


################################################################################
diff  --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index b120a19276557..90973d0667ea0 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -2255,22 +2255,26 @@ typedef union kmp_depnode kmp_depnode_t;
 typedef struct kmp_depnode_list kmp_depnode_list_t;
 typedef struct kmp_dephash_entry kmp_dephash_entry_t;
 
+// macros for checking dep flag as an integer
 #define KMP_DEP_IN 0x1
 #define KMP_DEP_OUT 0x2
 #define KMP_DEP_INOUT 0x3
 #define KMP_DEP_MTX 0x4
 #define KMP_DEP_SET 0x8
+#define KMP_DEP_ALL 0x80
 // Compiler sends us this info:
 typedef struct kmp_depend_info {
   kmp_intptr_t base_addr;
   size_t len;
   union {
-    kmp_uint8 flag;
-    struct {
+    kmp_uint8 flag; // flag as an unsigned char
+    struct { // flag as a set of 8 bits
       unsigned in : 1;
       unsigned out : 1;
       unsigned mtx : 1;
       unsigned set : 1;
+      unsigned unused : 3;
+      unsigned all : 1;
     } flags;
   };
 } kmp_depend_info_t;
@@ -2316,6 +2320,7 @@ struct kmp_dephash_entry {
 typedef struct kmp_dephash {
   kmp_dephash_entry_t **buckets;
   size_t size;
+  kmp_depnode_t *last_all;
   size_t generation;
   kmp_uint32 nelements;
   kmp_uint32 nconflicts;

diff  --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index dd3e7688d33f7..87b02b99d2e0e 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -86,6 +86,7 @@ static kmp_dephash_t *__kmp_dephash_extend(kmp_info_t *thread,
   h->buckets = (kmp_dephash_entry **)(h + 1);
   h->generation = gen;
   h->nconflicts = 0;
+  h->last_all = current_dephash->last_all;
 
   // make sure buckets are properly initialized
   for (size_t i = 0; i < new_size; i++) {
@@ -142,6 +143,7 @@ static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
   h->nelements = 0;
   h->nconflicts = 0;
   h->buckets = (kmp_dephash_entry **)(h + 1);
+  h->last_all = NULL;
 
   for (size_t i = 0; i < h_size; i++)
     h->buckets[i] = 0;
@@ -174,7 +176,10 @@ static kmp_dephash_entry *__kmp_dephash_find(kmp_info_t *thread,
         thread, sizeof(kmp_dephash_entry_t));
 #endif
     entry->addr = addr;
-    entry->last_out = NULL;
+    if (!h->last_all) // no predecessor task with omp_all_memory dependence
+      entry->last_out = NULL;
+    else // else link the omp_all_memory depnode to the new entry
+      entry->last_out = __kmp_node_ref(h->last_all);
     entry->last_set = NULL;
     entry->prev_set = NULL;
     entry->last_flag = 0;
@@ -290,6 +295,63 @@ static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid,
   return npredecessors;
 }
 
+static inline kmp_int32
+__kmp_process_dep_all(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *h,
+                      bool dep_barrier, kmp_task_t *task) {
+  KA_TRACE(30, ("__kmp_process_dep_all: T#%d processing dep_all, "
+                "dep_barrier = %d\n",
+                gtid, dep_barrier));
+  kmp_info_t *thread = __kmp_threads[gtid];
+  kmp_int32 npredecessors = 0;
+
+  // process previous omp_all_memory node if any
+  npredecessors +=
+      __kmp_depnode_link_successor(gtid, thread, task, node, h->last_all);
+  __kmp_node_deref(thread, h->last_all);
+  if (!dep_barrier) {
+    h->last_all = __kmp_node_ref(node);
+  } else {
+    // if this is a sync point in the serial sequence, then the previous
+    // outputs are guaranteed to be completed after the execution of this
+    // task so the previous output nodes can be cleared.
+    h->last_all = NULL;
+  }
+
+  // process all regular dependences
+  for (size_t i = 0; i < h->size; i++) {
+    kmp_dephash_entry_t *info = h->buckets[i];
+    if (!info) // skip empty slots in dephash
+      continue;
+    for (; info; info = info->next_in_bucket) {
+      // for each entry the omp_all_memory works as OUT dependence
+      kmp_depnode_t *last_out = info->last_out;
+      kmp_depnode_list_t *last_set = info->last_set;
+      kmp_depnode_list_t *prev_set = info->prev_set;
+      if (last_set) {
+        npredecessors +=
+            __kmp_depnode_link_successor(gtid, thread, task, node, last_set);
+        __kmp_depnode_list_free(thread, last_set);
+        __kmp_depnode_list_free(thread, prev_set);
+        info->last_set = NULL;
+        info->prev_set = NULL;
+        info->last_flag = 0; // no sets in this dephash entry
+      } else {
+        npredecessors +=
+            __kmp_depnode_link_successor(gtid, thread, task, node, last_out);
+      }
+      __kmp_node_deref(thread, last_out);
+      if (!dep_barrier) {
+        info->last_out = __kmp_node_ref(node);
+      } else {
+        info->last_out = NULL;
+      }
+    }
+  }
+  KA_TRACE(30, ("__kmp_process_dep_all: T#%d found %d predecessors\n", gtid,
+                npredecessors));
+  return npredecessors;
+}
+
 template <bool filter>
 static inline kmp_int32
 __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
@@ -417,7 +479,7 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
                              kmp_depend_info_t *dep_list,
                              kmp_int32 ndeps_noalias,
                              kmp_depend_info_t *noalias_dep_list) {
-  int i, n_mtxs = 0;
+  int i, n_mtxs = 0, dep_all = 0;
 #if KMP_DEBUG
   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
 #endif
@@ -429,7 +491,7 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
   // Filter deps in dep_list
   // TODO: Different algorithm for large dep_list ( > 10 ? )
   for (i = 0; i < ndeps; i++) {
-    if (dep_list[i].base_addr != 0) {
+    if (dep_list[i].base_addr != 0 && dep_list[i].base_addr != KMP_SIZE_T_MAX) {
       KMP_DEBUG_ASSERT(
           dep_list[i].flag == KMP_DEP_IN || dep_list[i].flag == KMP_DEP_OUT ||
           dep_list[i].flag == KMP_DEP_INOUT ||
@@ -451,6 +513,13 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
           dep_list[i].flag = KMP_DEP_OUT; // downgrade mutexinoutset to inout
         }
       }
+    } else if (dep_list[i].flag == KMP_DEP_ALL ||
+               dep_list[i].base_addr == KMP_SIZE_T_MAX) {
+      // omp_all_memory dependence can be marked by compiler by either
+      // (addr=0 && flag=0x80) (flag KMP_DEP_ALL), or (addr=-1).
+      // omp_all_memory overrides all other dependences if any
+      dep_all = 1;
+      break;
     }
   }
 
@@ -464,10 +533,14 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
   // the end
   int npredecessors;
 
-  npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier, ndeps,
-                                           dep_list, task);
-  npredecessors += __kmp_process_deps<false>(
-      gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list, task);
+  if (!dep_all) { // regular dependences
+    npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier,
+                                             ndeps, dep_list, task);
+    npredecessors += __kmp_process_deps<false>(
+        gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list, task);
+  } else { // omp_all_memory dependence
+    npredecessors = __kmp_process_dep_all(gtid, node, *hash, dep_barrier, task);
+  }
 
   node->dn.task = task;
   KMP_MB();

diff  --git a/openmp/runtime/src/kmp_taskdeps.h b/openmp/runtime/src/kmp_taskdeps.h
index 02ceae9aa6a45..99f182bbd050f 100644
--- a/openmp/runtime/src/kmp_taskdeps.h
+++ b/openmp/runtime/src/kmp_taskdeps.h
@@ -73,6 +73,8 @@ static inline void __kmp_dephash_free_entries(kmp_info_t *thread,
       h->buckets[i] = 0;
     }
   }
+  __kmp_node_deref(thread, h->last_all);
+  h->last_all = NULL;
 }
 
 static inline void __kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h) {

diff  --git a/openmp/runtime/test/tasking/kmp_task_depend_all.c b/openmp/runtime/test/tasking/kmp_task_depend_all.c
new file mode 100644
index 0000000000000..9a2999657abdc
--- /dev/null
+++ b/openmp/runtime/test/tasking/kmp_task_depend_all.c
@@ -0,0 +1,298 @@
+// RUN: %libomp-compile-and-run
+// The runtime currently does not get dependency information from GCC.
+// UNSUPPORTED: gcc
+
+// Tests OMP 5.x task dependence "omp_all_memory",
+// emulates compiler codegen versions for new dep kind
+//
+// Task tree created:
+//      task0 - task1 (in: i1, i2)
+//             \
+//        task2 (inoutset: i2), (in: i1)
+//             /
+//        task3 (omp_all_memory) via flag=0x80
+//             /
+//      task4 - task5 (in: i1, i2)
+//           /
+//       task6 (omp_all_memory) via addr=-1
+//           /
+//       task7 (omp_all_memory) via flag=0x80
+//           /
+//       task8 (in: i3)
+//
+#include <stdio.h>
+#include <omp.h>
+
+#ifdef _WIN32
+#include <windows.h>
+#define mysleep(n) Sleep(n)
+#else
+#include <unistd.h>
+#define mysleep(n) usleep((n)*1000)
+#endif
+
+// to check the # of concurrent tasks (must be 1 for MTX, <3 for other kinds)
+static int checker = 0;
+static int err = 0;
+#ifndef DELAY
+#define DELAY 100
+#endif
+
+// ---------------------------------------------------------------------------
+// internal data to emulate compiler codegen
+typedef struct DEP {
+  size_t addr;
+  size_t len;
+  unsigned char flags;
+} dep;
+#define DEP_ALL_MEM 0x80
+typedef struct task {
+  void** shareds;
+  void* entry;
+  int part_id;
+  void* destr_thunk;
+  int priority;
+  long long device_id;
+  int f_priv;
+} task_t;
+#define TIED 1
+typedef int(*entry_t)(int, task_t*);
+typedef struct ID {
+  int reserved_1;
+  int flags;
+  int reserved_2;
+  int reserved_3;
+  char *psource;
+} id;
+// thunk routine for tasks with ALL dependency
+int thunk_m(int gtid, task_t* ptask) {
+  int lcheck, th;
+  #pragma omp atomic capture
+    lcheck = ++checker;
+  th = omp_get_thread_num();
+  printf("task m_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck);
+  if (lcheck != 1) { // no more than 1 task at a time
+    err++;
+    printf("Error m1, checker %d != 1\n", lcheck);
+  }
+  mysleep(DELAY);
+  #pragma omp atomic read
+    lcheck = checker; // must still be equal to 1
+  if (lcheck != 1) {
+    err++;
+    printf("Error m2, checker %d != 1\n", lcheck);
+  }
+  #pragma omp atomic
+    --checker;
+  return 0;
+}
+// thunk routine for tasks with inoutset dependency
+int thunk_s(int gtid, task_t* ptask) {
+  int lcheck, th;
+  #pragma omp atomic capture
+    lcheck = ++checker; // 1
+  th = omp_get_thread_num();
+  printf("task 2_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck);
+  if (lcheck != 1) { // no more than 1 task at a time
+    err++;
+    printf("Error s1, checker %d != 1\n", lcheck);
+  }
+  mysleep(DELAY);
+  #pragma omp atomic read
+    lcheck = checker; // must still be equal to 1
+  if (lcheck != 1) {
+    err++;
+    printf("Error s2, checker %d != 1\n", lcheck);
+  }
+  #pragma omp atomic
+    --checker;
+  return 0;
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+int __kmpc_global_thread_num(id*);
+task_t *__kmpc_omp_task_alloc(id *loc, int gtid, int flags,
+                              size_t sz, size_t shar, entry_t rtn);
+int __kmpc_omp_task_with_deps(id *loc, int gtid, task_t *task, int ndeps,
+                              dep *dep_lst, int nd_noalias, dep *noalias_lst);
+static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
+#ifdef __cplusplus
+} // extern "C"
+#endif
+// End of internal data
+// ---------------------------------------------------------------------------
+
+int main()
+{
+  int i1,i2,i3;
+  omp_set_num_threads(8);
+  omp_set_dynamic(0);
+  #pragma omp parallel
+  {
+    #pragma omp single nowait
+    {
+      dep sdep[2];
+      task_t *ptr;
+      int gtid = __kmpc_global_thread_num(&loc);
+      int t = omp_get_thread_num();
+      #pragma omp task depend(in: i1, i2)
+      { // task 0
+        int lcheck, th;
+        #pragma omp atomic capture
+          lcheck = ++checker; // 1 or 2
+        th = omp_get_thread_num();
+        printf("task 0_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck > 2 || lcheck < 1) {
+          err++; // no more than 2 tasks concurrently
+          printf("Error1, checker %d, not 1 or 2\n", lcheck);
+        }
+        mysleep(DELAY);
+        #pragma omp atomic read
+          lcheck = checker; // 1 or 2
+        if (lcheck > 2 || lcheck < 1) {
+          #pragma omp atomic
+            err++;
+          printf("Error2, checker %d, not 1 or 2\n", lcheck);
+        }
+        #pragma omp atomic
+          --checker;
+      }
+      #pragma omp task depend(in: i1, i2)
+      { // task 1
+        int lcheck, th;
+        #pragma omp atomic capture
+          lcheck = ++checker; // 1 or 2
+        th = omp_get_thread_num();
+        printf("task 1_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck > 2 || lcheck < 1) {
+          err++; // no more than 2 tasks concurrently
+          printf("Error3, checker %d, not 1 or 2\n", lcheck);
+        }
+        mysleep(DELAY);
+        #pragma omp atomic read
+          lcheck = checker; // 1 or 2
+        if (lcheck > 2 || lcheck < 1) {
+          err++;
+          printf("Error4, checker %d, not 1 or 2\n", lcheck);
+        }
+        #pragma omp atomic
+          --checker;
+      }
+// compiler codegen start
+      // task2
+      ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_s);
+      sdep[0].addr = (size_t)&i1;
+      sdep[0].len = 0;   // not used
+      sdep[0].flags = 1; // IN
+      sdep[1].addr = (size_t)&i2;
+      sdep[1].len = 0;   // not used
+      sdep[1].flags = 8; // INOUTSET
+      ptr->f_priv = t + 10; // init single first-private variable
+      __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
+
+      // task3
+      ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
+      sdep[0].addr = (size_t)&i1; // to be ignored
+      sdep[0].len = 0;   // not used
+      sdep[0].flags = 1; // IN
+      sdep[1].addr = 0;
+      sdep[1].len = 0;   // not used
+      sdep[1].flags = DEP_ALL_MEM; // omp_all_memory
+      ptr->f_priv = t + 20; // init single first-private variable
+      __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
+// compiler codegen end
+      #pragma omp task depend(in: i1, i2)
+      { // task 4
+        int lcheck, th;
+        #pragma omp atomic capture
+          lcheck = ++checker; // 1 or 2
+        th = omp_get_thread_num();
+        printf("task 4_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck > 2 || lcheck < 1) {
+          err++; // no more than 2 tasks concurrently
+          printf("Error5, checker %d, not 1 or 2\n", lcheck);
+        }
+        mysleep(DELAY);
+        #pragma omp atomic read
+          lcheck = checker; // 1 or 2
+        if (lcheck > 2 || lcheck < 1) {
+          err++;
+          printf("Error6, checker %d, not 1 or 2\n", lcheck);
+        }
+        #pragma omp atomic
+          --checker;
+      }
+      #pragma omp task depend(in: i1, i2)
+      { // task 5
+        int lcheck, th;
+        #pragma omp atomic capture
+          lcheck = ++checker; // 1 or 2
+        th = omp_get_thread_num();
+        printf("task 5_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck > 2 || lcheck < 1) {
+          err++; // no more than 2 tasks concurrently
+          printf("Error7, checker %d, not 1 or 2\n", lcheck);
+        }
+        mysleep(DELAY);
+        #pragma omp atomic read
+          lcheck = checker; // 1 or 2
+        if (lcheck > 2 || lcheck < 1) {
+          err++;
+          printf("Error8, checker %d, not 1 or 2\n", lcheck);
+        }
+        #pragma omp atomic
+          --checker;
+      }
+// compiler codegen start
+      // task6
+      ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
+      sdep[0].addr = (size_t)(-1); // omp_all_memory
+      sdep[0].len = 0;   // not used
+      sdep[0].flags = 2; // OUT
+      ptr->f_priv = t + 30; // init single first-private variable
+      __kmpc_omp_task_with_deps(&loc, gtid, ptr, 1, sdep, 0, 0);
+
+      // task7
+      ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
+      sdep[0].addr = 0;
+      sdep[0].len = 0;   // not used
+      sdep[0].flags = DEP_ALL_MEM; // omp_all_memory
+      sdep[1].addr = (size_t)&i3; // to be ignored
+      sdep[1].len = 0;   // not used
+      sdep[1].flags = 4; // MUTEXINOUTSET
+      ptr->f_priv = t + 40; // init single first-private variable
+      __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
+// compiler codegen end
+      #pragma omp task depend(in: i3)
+      { // task 8
+        int lcheck, th;
+        #pragma omp atomic capture
+          lcheck = ++checker; // 1
+        th = omp_get_thread_num();
+        printf("task 8_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck != 1) {
+          err++;
+          printf("Error9, checker %d, != 1\n", lcheck);
+        }
+        mysleep(DELAY);
+        #pragma omp atomic read
+          lcheck = checker;
+        if (lcheck != 1) {
+          err++;
+          printf("Error10, checker %d, != 1\n", lcheck);
+        }
+        #pragma omp atomic
+          --checker;
+      }
+    } // single
+  } // parallel
+  if (err == 0 && checker == 0) {
+    printf("passed\n");
+    return 0;
+  } else {
+    printf("failed, err = %d, checker = %d\n", err, checker);
+    return 1;
+  }
+}

diff  --git a/openmp/runtime/test/tasking/kmp_taskwait_depend_all.c b/openmp/runtime/test/tasking/kmp_taskwait_depend_all.c
new file mode 100644
index 0000000000000..98ce1f8347f37
--- /dev/null
+++ b/openmp/runtime/test/tasking/kmp_taskwait_depend_all.c
@@ -0,0 +1,334 @@
+// RUN: %libomp-compile-and-run
+// The runtime currently does not get dependency information from GCC.
+// UNSUPPORTED: gcc
+
+// Tests OMP 5.x task dependence "omp_all_memory",
+// emulates compiler codegen versions for new dep kind
+//
+// Task tree created:
+//      task0 - task1 (in: i1, i2)
+//             \
+//        task2 (inoutset: i2), (in: i1)
+//             /
+//        task3 (omp_all_memory) via flag=0x80
+//             /
+//      task4 - task5 (in: i1, i2)
+//           /
+//       task6 (omp_all_memory) via addr=-1
+//           /
+//       task7 (omp_all_memory) via flag=0x80
+//           /
+//       task8 (in: i3)
+//           /
+//       task9 - no dependences
+//           /
+//       taskwait (omp_all_memory) (should not wait for task9, see prints)
+//
+#include <stdio.h>
+#include <omp.h>
+
+#ifdef _WIN32
+#include <windows.h>
+#define mysleep(n) Sleep(n)
+#else
+#include <unistd.h>
+#define mysleep(n) usleep((n)*1000)
+#endif
+
+// to check the # of concurrent tasks (must be 1 for MTX, <3 for other kinds)
+static int checker = 0;
+static int err = 0;
+static int taskwait_flag = 0;
+#ifndef DELAY
+// set delay interval in ms for dependent tasks
+#define DELAY 100
+#endif
+
+// ---------------------------------------------------------------------------
+// internal data to emulate compiler codegen
+typedef struct DEP {
+  size_t addr;
+  size_t len;
+  unsigned char flags;
+} dep;
+#define DEP_ALL_MEM 0x80
+typedef struct task {
+  void** shareds;
+  void* entry;
+  int part_id;
+  void* destr_thunk;
+  int priority;
+  long long device_id;
+  int f_priv;
+} task_t;
+#define TIED 1
+typedef int(*entry_t)(int, task_t*);
+typedef struct ID {
+  int reserved_1;
+  int flags;
+  int reserved_2;
+  int reserved_3;
+  char *psource;
+} id;
+// thunk routine for tasks with ALL dependency
+int thunk_m(int gtid, task_t* ptask) {
+  int lcheck, th;
+  #pragma omp atomic capture
+    lcheck = ++checker;
+  th = omp_get_thread_num();
+  printf("task m_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck);
+  if (lcheck != 1) { // no more than 1 task at a time
+    err++;
+    printf("Error m1, checker %d != 1\n", lcheck);
+  }
+  mysleep(DELAY);
+  #pragma omp atomic read
+    lcheck = checker; // must still be equal to 1
+  if (lcheck != 1) {
+    err++;
+    printf("Error m2, checker %d != 1\n", lcheck);
+  }
+  #pragma omp atomic
+    --checker;
+  return 0;
+}
+// thunk routine for tasks with inoutset dependency
+int thunk_s(int gtid, task_t* ptask) {
+  int lcheck, th;
+  #pragma omp atomic capture
+    lcheck = ++checker; // 1
+  th = omp_get_thread_num();
+  printf("task 2_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck);
+  if (lcheck != 1) { // no more than 1 task at a time
+    err++;
+    printf("Error s1, checker %d != 1\n", lcheck);
+  }
+  mysleep(DELAY);
+  #pragma omp atomic read
+    lcheck = checker; // must still be equal to 1
+  if (lcheck != 1) {
+    err++;
+    printf("Error s2, checker %d != 1\n", lcheck);
+  }
+  #pragma omp atomic
+    --checker;
+  return 0;
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+int __kmpc_global_thread_num(id*);
+task_t *__kmpc_omp_task_alloc(id *loc, int gtid, int flags,
+                              size_t sz, size_t shar, entry_t rtn);
+int __kmpc_omp_task_with_deps(id *loc, int gtid, task_t *task, int ndeps,
+                              dep *dep_lst, int nd_noalias, dep *noalias_lst);
+void __kmpc_omp_wait_deps(id *loc, int gtid, int ndeps, dep *dep_lst,
+                          int ndeps_noalias, dep *noalias_dep_lst);
+static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
+#ifdef __cplusplus
+} // extern "C"
+#endif
+// End of internal data
+// ---------------------------------------------------------------------------
+
+int main()
+{
+  int i1,i2,i3;
+  omp_set_num_threads(8);
+  omp_set_dynamic(0);
+  #pragma omp parallel
+  {
+    #pragma omp single nowait
+    {
+      dep sdep[2];
+      task_t *ptr;
+      int gtid = __kmpc_global_thread_num(&loc);
+      int t = omp_get_thread_num();
+      // Create longest task first to ensure it is stolen.
+      // The test may hang if the task created last and
+      // executed by a thread which executes taskwait.
+      #pragma omp task
+      { // task 9 - long running task
+        int flag;
+        int th = omp_get_thread_num();
+        printf("signalled independent task 9_%d, th %d started....\n", t, th);
+        // Wait for taskwait depend() to finish
+        // If the taskwait depend() improperly depends on this task
+        // to finish, then the test will hang and a timeout should trigger
+        while (1) {
+          #pragma omp atomic read
+          flag = taskwait_flag;
+          if (flag == 1)
+            break;
+        }
+        printf("signalled independent task 9_%d, th %d finished....\n", t, th);
+      }
+      #pragma omp task depend(in: i1, i2)
+      { // task 0
+        int lcheck, th;
+        #pragma omp atomic capture
+          lcheck = ++checker; // 1 or 2
+        th = omp_get_thread_num();
+        printf("task 0_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck > 2 || lcheck < 1) {
+          err++; // no more than 2 tasks concurrently
+          printf("Error1, checker %d, not 1 or 2\n", lcheck);
+        }
+        mysleep(DELAY);
+        #pragma omp atomic read
+          lcheck = checker; // 1 or 2
+        if (lcheck > 2 || lcheck < 1) {
+          #pragma omp atomic
+            err++;
+          printf("Error2, checker %d, not 1 or 2\n", lcheck);
+        }
+        #pragma omp atomic
+          --checker;
+      }
+      #pragma omp task depend(in: i1, i2)
+      { // task 1
+        int lcheck, th;
+        #pragma omp atomic capture
+          lcheck = ++checker; // 1 or 2
+        th = omp_get_thread_num();
+        printf("task 1_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck > 2 || lcheck < 1) {
+          err++; // no more than 2 tasks concurrently
+          printf("Error3, checker %d, not 1 or 2\n", lcheck);
+        }
+        mysleep(DELAY);
+        #pragma omp atomic read
+          lcheck = checker; // 1 or 2
+        if (lcheck > 2 || lcheck < 1) {
+          err++;
+          printf("Error4, checker %d, not 1 or 2\n", lcheck);
+        }
+        #pragma omp atomic
+          --checker;
+      }
+// compiler codegen start
+      // task2
+      ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_s);
+      sdep[0].addr = (size_t)&i1;
+      sdep[0].len = 0;   // not used
+      sdep[0].flags = 1; // IN
+      sdep[1].addr = (size_t)&i2;
+      sdep[1].len = 0;   // not used
+      sdep[1].flags = 8; // INOUTSET
+      ptr->f_priv = t + 10; // init single first-private variable
+      __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
+
+      // task3
+      ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
+      sdep[0].addr = (size_t)&i1; // to be ignored
+      sdep[0].len = 0;   // not used
+      sdep[0].flags = 1; // IN
+      sdep[1].addr = 0;
+      sdep[1].len = 0;   // not used
+      sdep[1].flags = DEP_ALL_MEM; // omp_all_memory
+      ptr->f_priv = t + 20; // init single first-private variable
+      __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
+// compiler codegen end
+      #pragma omp task depend(in: i1, i2)
+      { // task 4
+        int lcheck, th;
+        #pragma omp atomic capture
+          lcheck = ++checker; // 1 or 2
+        th = omp_get_thread_num();
+        printf("task 4_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck > 2 || lcheck < 1) {
+          err++; // no more than 2 tasks concurrently
+          printf("Error5, checker %d, not 1 or 2\n", lcheck);
+        }
+        mysleep(DELAY);
+        #pragma omp atomic read
+          lcheck = checker; // 1 or 2
+        if (lcheck > 2 || lcheck < 1) {
+          err++;
+          printf("Error6, checker %d, not 1 or 2\n", lcheck);
+        }
+        #pragma omp atomic
+          --checker;
+      }
+      #pragma omp task depend(in: i1, i2)
+      { // task 5
+        int lcheck, th;
+        #pragma omp atomic capture
+          lcheck = ++checker; // 1 or 2
+        th = omp_get_thread_num();
+        printf("task 5_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck > 2 || lcheck < 1) {
+          err++; // no more than 2 tasks concurrently
+          printf("Error7, checker %d, not 1 or 2\n", lcheck);
+        }
+        mysleep(DELAY);
+        #pragma omp atomic read
+          lcheck = checker; // 1 or 2
+        if (lcheck > 2 || lcheck < 1) {
+          err++;
+          printf("Error8, checker %d, not 1 or 2\n", lcheck);
+        }
+        #pragma omp atomic
+          --checker;
+      }
+// compiler codegen start
+      // task6
+      ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
+      sdep[0].addr = (size_t)(-1); // omp_all_memory
+      sdep[0].len = 0;   // not used
+      sdep[0].flags = 2; // OUT
+      ptr->f_priv = t + 30; // init single first-private variable
+      __kmpc_omp_task_with_deps(&loc, gtid, ptr, 1, sdep, 0, 0);
+
+      // task7
+      ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
+      sdep[0].addr = 0;
+      sdep[0].len = 0;   // not used
+      sdep[0].flags = DEP_ALL_MEM; // omp_all_memory
+      sdep[1].addr = (size_t)&i3; // to be ignored
+      sdep[1].len = 0;   // not used
+      sdep[1].flags = 4; // MUTEXINOUTSET
+      ptr->f_priv = t + 40; // init single first-private variable
+      __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
+// compiler codegen end
+      #pragma omp task depend(in: i3)
+      { // task 8
+        int lcheck, th;
+        #pragma omp atomic capture
+          lcheck = ++checker; // 1
+        th = omp_get_thread_num();
+        printf("task 8_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck != 1) {
+          err++;
+          printf("Error9, checker %d, != 1\n", lcheck);
+        }
+        mysleep(DELAY);
+        #pragma omp atomic read
+          lcheck = checker;
+        if (lcheck != 1) {
+          err++;
+          printf("Error10, checker %d, != 1\n", lcheck);
+        }
+        #pragma omp atomic
+          --checker;
+      }
+      mysleep(1); // wait a bit to ensure at least first task is stolen
+//  #pragma omp taskwait depend(omp_all_memory: out)
+      printf("all 10 tasks generated;\n"
+             "taskwait depend(omp_all_memory: out)  started, th %d\n", t);
+      __kmpc_omp_wait_deps(&loc, gtid, 1, sdep, 0, 0);
+      #pragma omp atomic write
+        taskwait_flag = 1;
+      printf("taskwait depend(omp_all_memory: out)  passed, th %d\n", t);
+      fflush(0);
+    } // single
+  } // parallel
+  if (err == 0 && checker == 0) {
+    printf("passed\n");
+    return 0;
+  } else {
+    printf("failed, err = %d, checker = %d\n", err, checker);
+    return 1;
+  }
+}


        


More information about the Openmp-commits mailing list