[Openmp-commits] [openmp] 6ef16f2 - [OpenMP] Add OMPT support for omp_all_memory task dependence

Joachim Jenke via Openmp-commits openmp-commits at lists.llvm.org
Fri Jul 7 04:45:21 PDT 2023


Author: Joachim Jenke
Date: 2023-07-07T13:44:53+02:00
New Revision: 6ef16f2618093e3f3def6e905437013de71785f5

URL: https://github.com/llvm/llvm-project/commit/6ef16f2618093e3f3def6e905437013de71785f5
DIFF: https://github.com/llvm/llvm-project/commit/6ef16f2618093e3f3def6e905437013de71785f5.diff

LOG: [OpenMP] Add OMPT support for omp_all_memory task dependence

omp_all_memory currently has no representation in OMPT.

Adding new dependency flags as suggested by omp-lang issue #3007.

Differential Revision: https://reviews.llvm.org/D111788

Added: 
    openmp/runtime/test/ompt/tasks/kmp_task_depend_all.c
    openmp/runtime/test/ompt/tasks/omp_task_depend_all.c

Modified: 
    openmp/runtime/src/include/omp-tools.h.var
    openmp/runtime/src/kmp_taskdeps.cpp
    openmp/runtime/test/ompt/callback.h

Removed: 
    


################################################################################
diff  --git a/openmp/runtime/src/include/omp-tools.h.var b/openmp/runtime/src/include/omp-tools.h.var
index 53defaa5d09e6e..a3ec0309db18c1 100644
--- a/openmp/runtime/src/include/omp-tools.h.var
+++ b/openmp/runtime/src/include/omp-tools.h.var
@@ -413,13 +413,15 @@ typedef enum ompt_target_map_flag_t {
 } ompt_target_map_flag_t;
 
 typedef enum ompt_dependence_type_t {
-  ompt_dependence_type_in              = 1,
-  ompt_dependence_type_out             = 2,
-  ompt_dependence_type_inout           = 3,
-  ompt_dependence_type_mutexinoutset   = 4,
-  ompt_dependence_type_source          = 5,
-  ompt_dependence_type_sink            = 6,
-  ompt_dependence_type_inoutset        = 7
+  ompt_dependence_type_in               = 1,
+  ompt_dependence_type_out              = 2,
+  ompt_dependence_type_inout            = 3,
+  ompt_dependence_type_mutexinoutset    = 4,
+  ompt_dependence_type_source           = 5,
+  ompt_dependence_type_sink             = 6,
+  ompt_dependence_type_inoutset         = 7,
+  ompt_dependence_type_out_all_memory   = 34,
+  ompt_dependence_type_inout_all_memory = 35
 } ompt_dependence_type_t;
 
 typedef enum ompt_severity_t {

diff  --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index 22119a9d2d453b..3b39f503973635 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -745,7 +745,9 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
 
     for (i = 0; i < ndeps; i++) {
       ompt_deps[i].variable.ptr = (void *)dep_list[i].base_addr;
-      if (dep_list[i].flags.in && dep_list[i].flags.out)
+      if (dep_list[i].base_addr == KMP_SIZE_T_MAX)
+        ompt_deps[i].dependence_type = ompt_dependence_type_out_all_memory;
+      else if (dep_list[i].flags.in && dep_list[i].flags.out)
         ompt_deps[i].dependence_type = ompt_dependence_type_inout;
       else if (dep_list[i].flags.out)
         ompt_deps[i].dependence_type = ompt_dependence_type_out;
@@ -755,10 +757,15 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
         ompt_deps[i].dependence_type = ompt_dependence_type_mutexinoutset;
       else if (dep_list[i].flags.set)
         ompt_deps[i].dependence_type = ompt_dependence_type_inoutset;
+      else if (dep_list[i].flags.all)
+        ompt_deps[i].dependence_type = ompt_dependence_type_out_all_memory;
     }
     for (i = 0; i < ndeps_noalias; i++) {
       ompt_deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr;
-      if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out)
+      if (noalias_dep_list[i].base_addr == KMP_SIZE_T_MAX)
+        ompt_deps[ndeps + i].dependence_type =
+            ompt_dependence_type_out_all_memory;
+      else if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out)
         ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inout;
       else if (noalias_dep_list[i].flags.out)
         ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_out;
@@ -769,6 +776,9 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
             ompt_dependence_type_mutexinoutset;
       else if (noalias_dep_list[i].flags.set)
         ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset;
+      else if (noalias_dep_list[i].flags.all)
+        ompt_deps[ndeps + i].dependence_type =
+            ompt_dependence_type_out_all_memory;
     }
     ompt_callbacks.ompt_callback(ompt_callback_dependences)(
         &(new_taskdata->ompt_task_info.task_data), ompt_deps, ompt_ndeps);

diff  --git a/openmp/runtime/test/ompt/callback.h b/openmp/runtime/test/ompt/callback.h
index dc228e449725f9..925351ec35d624 100644
--- a/openmp/runtime/test/ompt/callback.h
+++ b/openmp/runtime/test/ompt/callback.h
@@ -47,7 +47,7 @@ static const char* ompt_cancel_flag_t_values[] = {
   "ompt_cancel_discarded_task"
 };
 
-static const char *ompt_dependence_type_t_values[] = {
+static const char *ompt_dependence_type_t_values[36] = {
     "ompt_dependence_type_UNDEFINED",
     "ompt_dependence_type_in", // 1
     "ompt_dependence_type_out", // 2
@@ -55,7 +55,12 @@ static const char *ompt_dependence_type_t_values[] = {
     "ompt_dependence_type_mutexinoutset", // 4
     "ompt_dependence_type_source", // 5
     "ompt_dependence_type_sink", // 6
-    "ompt_dependence_type_inoutset" // 7
+    "ompt_dependence_type_inoutset", // 7
+    "", "", "", "", "", "", // 8-13
+    "", "", "", "", "", "", "", "", "", "", // 14-23
+    "", "", "", "", "", "", "", "", "", "", // 24-33
+    "ompt_dependence_type_out_all_memory", // 34
+    "ompt_dependence_type_inout_all_memory" // 35
 };
 
 static void format_task_type(int type, char *buffer) {

diff  --git a/openmp/runtime/test/ompt/tasks/kmp_task_depend_all.c b/openmp/runtime/test/ompt/tasks/kmp_task_depend_all.c
new file mode 100644
index 00000000000000..a18fe5a726e777
--- /dev/null
+++ b/openmp/runtime/test/ompt/tasks/kmp_task_depend_all.c
@@ -0,0 +1,345 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+
+// RUN: %libomp-compile-and-run
+// The runtime currently does not get dependency information from GCC.
+// UNSUPPORTED: gcc
+
+// Tests OMP 5.x task dependence "omp_all_memory",
+// emulates compiler codegen versions for new dep kind
+//
+// Task tree created:
+//      task0 - task1 (in: i1, i2)
+//             \
+//        task2 (inoutset: i2), (in: i1)
+//             /
+//        task3 (omp_all_memory) via flag=0x80
+//             /
+//      task4 - task5 (in: i1, i2)
+//           /
+//       task6 (omp_all_memory) via addr=-1
+//           /
+//       task7 (omp_all_memory) via flag=0x80
+//           /
+//       task8 (in: i3)
+//
+
+// CHECK: ompt_event_dependences:
+// CHECK-SAME: ompt_dependence_type_in
+// CHECK-SAME: ompt_dependence_type_in
+// CHECK-SAME: ndeps=2
+
+// CHECK: ompt_event_dependences:
+// CHECK-SAME: ompt_dependence_type_in
+// CHECK-SAME: ompt_dependence_type_in
+// CHECK-SAME: ndeps=2
+
+// CHECK: ompt_event_dependences:
+// CHECK-SAME: ompt_dependence_type_in
+// CHECK-SAME: ompt_dependence_type_inoutset
+// CHECK-SAME: ndeps=2
+
+// CHECK: ompt_event_dependences:
+// CHECK-SAME: ompt_dependence_type_in
+// CHECK-SAME: ompt_dependence_type_out_all_memory
+// CHECK-SAME: ndeps=2
+
+// CHECK: ompt_event_dependences:
+// CHECK-SAME: ompt_dependence_type_in
+// CHECK-SAME: ompt_dependence_type_in
+// CHECK-SAME: ndeps=2
+
+// CHECK: ompt_event_dependences:
+// CHECK-SAME: ompt_dependence_type_in
+// CHECK-SAME: ompt_dependence_type_in
+// CHECK-SAME: ndeps=2
+
+// CHECK: ompt_event_dependences:
+// CHECK-SAME: ompt_dependence_type_out_all_memory
+// CHECK-SAME: ndeps=1
+
+// CHECK: ompt_event_dependences:
+// CHECK-SAME: ompt_dependence_type_out_all_memory
+// CHECK-SAME: ompt_dependence_type_mutexinoutset
+// CHECK-SAME: ndeps=2
+
+// CHECK: ompt_event_dependences:
+// CHECK-SAME: ompt_dependence_type_in
+// CHECK-SAME: ndeps=1
+
+#include "callback.h"
+#include <stdio.h>
+#include <omp.h>
+
+#ifdef _WIN32
+#include <windows.h>
+#define mysleep(n) Sleep(n)
+#else
+#include <unistd.h>
+#define mysleep(n) usleep((n)*1000)
+#endif
+
+// to check the # of concurrent tasks (must be 1 for MTX, <3 for other kinds)
+static int checker = 0;
+static int err = 0;
+#ifndef DELAY
+#define DELAY 100
+#endif
+
+// ---------------------------------------------------------------------------
+// internal data to emulate compiler codegen
+typedef struct DEP {
+  size_t addr;
+  size_t len;
+  unsigned char flags;
+} dep;
+#define DEP_ALL_MEM 0x80
+typedef struct task {
+  void **shareds;
+  void *entry;
+  int part_id;
+  void *destr_thunk;
+  int priority;
+  long long device_id;
+  int f_priv;
+} task_t;
+#define TIED 1
+typedef int (*entry_t)(int, task_t *);
+typedef struct ID {
+  int reserved_1;
+  int flags;
+  int reserved_2;
+  int reserved_3;
+  char *psource;
+} id;
+// thunk routine for tasks with ALL dependency
+int thunk_m(int gtid, task_t *ptask) {
+  int lcheck, th;
+#pragma omp atomic capture
+  lcheck = ++checker;
+  th = omp_get_thread_num();
+  printf("task m_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck);
+  if (lcheck != 1) { // no more than 1 task at a time
+    err++;
+    printf("Error m1, checker %d != 1\n", lcheck);
+  }
+  mysleep(DELAY);
+#pragma omp atomic read
+  lcheck = checker; // must still be equal to 1
+  if (lcheck != 1) {
+    err++;
+    printf("Error m2, checker %d != 1\n", lcheck);
+  }
+#pragma omp atomic
+  --checker;
+  return 0;
+}
+// thunk routine for tasks with inoutset dependency
+int thunk_s(int gtid, task_t *ptask) {
+  int lcheck, th;
+#pragma omp atomic capture
+  lcheck = ++checker; // 1
+  th = omp_get_thread_num();
+  printf("task 2_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck);
+  if (lcheck != 1) { // no more than 1 task at a time
+    err++;
+    printf("Error s1, checker %d != 1\n", lcheck);
+  }
+  mysleep(DELAY);
+#pragma omp atomic read
+  lcheck = checker; // must still be equal to 1
+  if (lcheck != 1) {
+    err++;
+    printf("Error s2, checker %d != 1\n", lcheck);
+  }
+#pragma omp atomic
+  --checker;
+  return 0;
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+int __kmpc_global_thread_num(id *);
+task_t *__kmpc_omp_task_alloc(id *loc, int gtid, int flags, size_t sz,
+                              size_t shar, entry_t rtn);
+int __kmpc_omp_task_with_deps(id *loc, int gtid, task_t *task, int ndeps,
+                              dep *dep_lst, int nd_noalias, dep *noalias_lst);
+static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
+#ifdef __cplusplus
+} // extern "C"
+#endif
+// End of internal data
+// ---------------------------------------------------------------------------
+
+int main() {
+  int i1, i2, i3;
+  omp_set_num_threads(8);
+  omp_set_dynamic(0);
+#pragma omp parallel
+  {
+#pragma omp single nowait
+    {
+      dep sdep[2];
+      task_t *ptr;
+      int gtid = __kmpc_global_thread_num(&loc);
+      int t = omp_get_thread_num();
+#pragma omp task depend(in : i1, i2)
+      { // task 0
+        int lcheck, th;
+#pragma omp atomic capture
+        lcheck = ++checker; // 1 or 2
+        th = omp_get_thread_num();
+        printf("task 0_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck > 2 || lcheck < 1) {
+          err++; // no more than 2 tasks concurrently
+          printf("Error1, checker %d, not 1 or 2\n", lcheck);
+        }
+        mysleep(DELAY);
+#pragma omp atomic read
+        lcheck = checker; // 1 or 2
+        if (lcheck > 2 || lcheck < 1) {
+#pragma omp atomic
+          err++;
+          printf("Error2, checker %d, not 1 or 2\n", lcheck);
+        }
+#pragma omp atomic
+        --checker;
+      }
+#pragma omp task depend(in : i1, i2)
+      { // task 1
+        int lcheck, th;
+#pragma omp atomic capture
+        lcheck = ++checker; // 1 or 2
+        th = omp_get_thread_num();
+        printf("task 1_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck > 2 || lcheck < 1) {
+          err++; // no more than 2 tasks concurrently
+          printf("Error3, checker %d, not 1 or 2\n", lcheck);
+        }
+        mysleep(DELAY);
+#pragma omp atomic read
+        lcheck = checker; // 1 or 2
+        if (lcheck > 2 || lcheck < 1) {
+          err++;
+          printf("Error4, checker %d, not 1 or 2\n", lcheck);
+        }
+#pragma omp atomic
+        --checker;
+      }
+      // compiler codegen start
+      // task2
+      ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_s);
+      sdep[0].addr = (size_t)&i1;
+      sdep[0].len = 0; // not used
+      sdep[0].flags = 1; // IN
+      sdep[1].addr = (size_t)&i2;
+      sdep[1].len = 0; // not used
+      sdep[1].flags = 8; // INOUTSET
+      ptr->f_priv = t + 10; // init single first-private variable
+      __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
+
+      // task3
+      ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
+      sdep[0].addr = (size_t)&i1; // to be ignored
+      sdep[0].len = 0; // not used
+      sdep[0].flags = 1; // IN
+      sdep[1].addr = 0;
+      sdep[1].len = 0; // not used
+      sdep[1].flags = DEP_ALL_MEM; // omp_all_memory
+      ptr->f_priv = t + 20; // init single first-private variable
+      __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
+      // compiler codegen end
+#pragma omp task depend(in : i1, i2)
+      { // task 4
+        int lcheck, th;
+#pragma omp atomic capture
+        lcheck = ++checker; // 1 or 2
+        th = omp_get_thread_num();
+        printf("task 4_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck > 2 || lcheck < 1) {
+          err++; // no more than 2 tasks concurrently
+          printf("Error5, checker %d, not 1 or 2\n", lcheck);
+        }
+        mysleep(DELAY);
+#pragma omp atomic read
+        lcheck = checker; // 1 or 2
+        if (lcheck > 2 || lcheck < 1) {
+          err++;
+          printf("Error6, checker %d, not 1 or 2\n", lcheck);
+        }
+#pragma omp atomic
+        --checker;
+      }
+#pragma omp task depend(in : i1, i2)
+      { // task 5
+        int lcheck, th;
+#pragma omp atomic capture
+        lcheck = ++checker; // 1 or 2
+        th = omp_get_thread_num();
+        printf("task 5_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck > 2 || lcheck < 1) {
+          err++; // no more than 2 tasks concurrently
+          printf("Error7, checker %d, not 1 or 2\n", lcheck);
+        }
+        mysleep(DELAY);
+#pragma omp atomic read
+        lcheck = checker; // 1 or 2
+        if (lcheck > 2 || lcheck < 1) {
+          err++;
+          printf("Error8, checker %d, not 1 or 2\n", lcheck);
+        }
+#pragma omp atomic
+        --checker;
+      }
+      // compiler codegen start
+      // task6
+      ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
+      sdep[0].addr = (size_t)(-1); // omp_all_memory
+      sdep[0].len = 0; // not used
+      sdep[0].flags = 2; // OUT
+      ptr->f_priv = t + 30; // init single first-private variable
+      __kmpc_omp_task_with_deps(&loc, gtid, ptr, 1, sdep, 0, 0);
+
+      // task7
+      ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
+      sdep[0].addr = 0;
+      sdep[0].len = 0; // not used
+      sdep[0].flags = DEP_ALL_MEM; // omp_all_memory
+      sdep[1].addr = (size_t)&i3; // to be ignored
+      sdep[1].len = 0; // not used
+      sdep[1].flags = 4; // MUTEXINOUTSET
+      ptr->f_priv = t + 40; // init single first-private variable
+      __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
+      // compiler codegen end
+#pragma omp task depend(in : i3)
+      { // task 8
+        int lcheck, th;
+#pragma omp atomic capture
+        lcheck = ++checker; // 1
+        th = omp_get_thread_num();
+        printf("task 8_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck != 1) {
+          err++;
+          printf("Error9, checker %d, != 1\n", lcheck);
+        }
+        mysleep(DELAY);
+#pragma omp atomic read
+        lcheck = checker;
+        if (lcheck != 1) {
+          err++;
+          printf("Error10, checker %d, != 1\n", lcheck);
+        }
+#pragma omp atomic
+        --checker;
+      }
+    } // single
+  } // parallel
+  if (err == 0 && checker == 0) {
+    printf("passed\n");
+    return 0;
+  } else {
+    printf("failed, err = %d, checker = %d\n", err, checker);
+    return 1;
+  }
+}

diff  --git a/openmp/runtime/test/ompt/tasks/omp_task_depend_all.c b/openmp/runtime/test/ompt/tasks/omp_task_depend_all.c
new file mode 100644
index 00000000000000..eff6ea5444b511
--- /dev/null
+++ b/openmp/runtime/test/ompt/tasks/omp_task_depend_all.c
@@ -0,0 +1,381 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// REQUIRES: ompt
+
+// The runtime currently does not get dependency information from GCC.
+// UNSUPPORTED: gcc
+
+// Tests OMP 5.x task dependence "omp_all_memory",
+// emulates compiler codegen versions for new dep kind
+//
+// Task tree created:
+//      task0 - task1 (in: i1, i2)
+//             \
+//        task2 (inoutset: i2), (in: i1)
+//             /
+//        task3 (omp_all_memory) via flag=0x80
+//             /
+//      task4 - task5 (in: i1, i2)
+//           /
+//       task6 (omp_all_memory) via addr=-1
+//           /
+//       task7 (omp_all_memory) via flag=0x80
+//           /
+//       task8 (in: i3)
+//
+
+// CHECK: ompt_event_dependences:
+// CHECK-SAME: ompt_dependence_type_in
+// CHECK-SAME: ompt_dependence_type_in
+// CHECK-SAME: ndeps=2
+
+// CHECK: ompt_event_dependences:
+// CHECK-SAME: ompt_dependence_type_in
+// CHECK-SAME: ompt_dependence_type_in
+// CHECK-SAME: ndeps=2
+
+// CHECK: ompt_event_dependences:
+// CHECK-SAME: ompt_dependence_type_in
+// CHECK-SAME: ompt_dependence_type_inoutset
+// CHECK-SAME: ndeps=2
+
+// CHECK: ompt_event_dependences:
+// CHECK-SAME: ompt_dependence_type_in
+// CHECK-SAME: ompt_dependence_type_out_all_memory
+// CHECK-SAME: ndeps=2
+
+// CHECK: ompt_event_dependences:
+// CHECK-SAME: ompt_dependence_type_in
+// CHECK-SAME: ompt_dependence_type_in
+// CHECK-SAME: ndeps=2
+
+// CHECK: ompt_event_dependences:
+// CHECK-SAME: ompt_dependence_type_in
+// CHECK-SAME: ompt_dependence_type_in
+// CHECK-SAME: ndeps=2
+
+// CHECK: ompt_event_dependences:
+// CHECK-SAME: ompt_dependence_type_out_all_memory
+// CHECK-SAME: ndeps=1
+
+// CHECK: ompt_event_dependences:
+// CHECK-SAME: ompt_dependence_type_out_all_memory
+// CHECK-SAME: ompt_dependence_type_mutexinoutset
+// CHECK-SAME: ndeps=2
+
+// CHECK: ompt_event_dependences:
+// CHECK-SAME: ompt_dependence_type_in
+// CHECK-SAME: ndeps=1
+
+#include "callback.h"
+#include <stdio.h>
+#include <omp.h>
+
+#ifdef _WIN32
+#include <windows.h>
+#define mysleep(n) Sleep(n)
+#else
+#include <unistd.h>
+#define mysleep(n) usleep((n)*1000)
+#endif
+
+// to check the # of concurrent tasks (must be 1 for MTX, <3 for other kinds)
+static int checker = 0;
+static int err = 0;
+#ifndef DELAY
+#define DELAY 100
+#endif
+
+// ---------------------------------------------------------------------------
+// internal data to emulate compiler codegen
+typedef struct DEP {
+  size_t addr;
+  size_t len;
+  unsigned char flags;
+} dep;
+#define DEP_ALL_MEM 0x80
+typedef struct task {
+  void **shareds;
+  void *entry;
+  int part_id;
+  void *destr_thunk;
+  int priority;
+  long long device_id;
+  int f_priv;
+} task_t;
+#define TIED 1
+typedef int (*entry_t)(int, task_t *);
+typedef struct ID {
+  int reserved_1;
+  int flags;
+  int reserved_2;
+  int reserved_3;
+  char *psource;
+} id;
+// thunk routine for tasks with ALL dependency
+int thunk_m(int gtid, task_t *ptask) {
+  int lcheck, th;
+#pragma omp atomic capture
+  lcheck = ++checker;
+  th = omp_get_thread_num();
+  printf("task m_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck);
+  if (lcheck != 1) { // no more than 1 task at a time
+    err++;
+    printf("Error m1, checker %d != 1\n", lcheck);
+  }
+  mysleep(DELAY);
+#pragma omp atomic read
+  lcheck = checker; // must still be equal to 1
+  if (lcheck != 1) {
+    err++;
+    printf("Error m2, checker %d != 1\n", lcheck);
+  }
+#pragma omp atomic
+  --checker;
+  return 0;
+}
+// thunk routine for tasks with inoutset dependency
+int thunk_s(int gtid, task_t *ptask) {
+  int lcheck, th;
+#pragma omp atomic capture
+  lcheck = ++checker; // 1
+  th = omp_get_thread_num();
+  printf("task 2_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck);
+  if (lcheck != 1) { // no more than 1 task at a time
+    err++;
+    printf("Error s1, checker %d != 1\n", lcheck);
+  }
+  mysleep(DELAY);
+#pragma omp atomic read
+  lcheck = checker; // must still be equal to 1
+  if (lcheck != 1) {
+    err++;
+    printf("Error s2, checker %d != 1\n", lcheck);
+  }
+#pragma omp atomic
+  --checker;
+  return 0;
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+int __kmpc_global_thread_num(id *);
+task_t *__kmpc_omp_task_alloc(id *loc, int gtid, int flags, size_t sz,
+                              size_t shar, entry_t rtn);
+int __kmpc_omp_task_with_deps(id *loc, int gtid, task_t *task, int ndeps,
+                              dep *dep_lst, int nd_noalias, dep *noalias_lst);
+static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
+#ifdef __cplusplus
+} // extern "C"
+#endif
+// End of internal data
+// ---------------------------------------------------------------------------
+
+int main() {
+  char *ompx_all_memory = (void *)0xffffffffffffffff;
+  int i1, i2, i3;
+  omp_set_num_threads(8);
+  omp_set_dynamic(0);
+#pragma omp parallel
+  {
+#pragma omp single nowait
+    {
+      dep sdep[2];
+      task_t *ptr;
+      int gtid = __kmpc_global_thread_num(&loc);
+      int t = omp_get_thread_num();
+#pragma omp task depend(in : i1, i2)
+      { // task 0
+        int lcheck, th;
+#pragma omp atomic capture
+        lcheck = ++checker; // 1 or 2
+        th = omp_get_thread_num();
+        printf("task 0_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck > 2 || lcheck < 1) {
+          err++; // no more than 2 tasks concurrently
+          printf("Error1, checker %d, not 1 or 2\n", lcheck);
+        }
+        mysleep(DELAY);
+#pragma omp atomic read
+        lcheck = checker; // 1 or 2
+        if (lcheck > 2 || lcheck < 1) {
+#pragma omp atomic
+          err++;
+          printf("Error2, checker %d, not 1 or 2\n", lcheck);
+        }
+#pragma omp atomic
+        --checker;
+      }
+#pragma omp task depend(in : i1, i2)
+      { // task 1
+        int lcheck, th;
+#pragma omp atomic capture
+        lcheck = ++checker; // 1 or 2
+        th = omp_get_thread_num();
+        printf("task 1_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck > 2 || lcheck < 1) {
+          err++; // no more than 2 tasks concurrently
+          printf("Error3, checker %d, not 1 or 2\n", lcheck);
+        }
+        mysleep(DELAY);
+#pragma omp atomic read
+        lcheck = checker; // 1 or 2
+        if (lcheck > 2 || lcheck < 1) {
+          err++;
+          printf("Error4, checker %d, not 1 or 2\n", lcheck);
+        }
+#pragma omp atomic
+        --checker;
+      }
+      // compiler codegen start
+      // task2
+      ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_s);
+      sdep[0].addr = (size_t)&i1;
+      sdep[0].len = 0; // not used
+      sdep[0].flags = 1; // IN
+      sdep[1].addr = (size_t)&i2;
+      sdep[1].len = 0; // not used
+      sdep[1].flags = 8; // INOUTSET
+      ptr->f_priv = t + 10; // init single first-private variable
+      __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
+
+// task3
+#pragma omp task depend(in : i1) depend(inout : ompx_all_memory[0])
+      {
+        int lcheck, th;
+#pragma omp atomic capture
+        lcheck = ++checker;
+        th = omp_get_thread_num();
+        printf("task m_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck != 1) { // no more than 1 task at a time
+          err++;
+          printf("Error m1, checker %d != 1\n", lcheck);
+        }
+        mysleep(DELAY);
+#pragma omp atomic read
+        lcheck = checker; // must still be equal to 1
+        if (lcheck != 1) {
+          err++;
+          printf("Error m2, checker %d != 1\n", lcheck);
+        }
+#pragma omp atomic
+        --checker;
+      }
+      // compiler codegen end
+#pragma omp task depend(in : i1, i2)
+      { // task 4
+        int lcheck, th;
+#pragma omp atomic capture
+        lcheck = ++checker; // 1 or 2
+        th = omp_get_thread_num();
+        printf("task 4_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck > 2 || lcheck < 1) {
+          err++; // no more than 2 tasks concurrently
+          printf("Error5, checker %d, not 1 or 2\n", lcheck);
+        }
+        mysleep(DELAY);
+#pragma omp atomic read
+        lcheck = checker; // 1 or 2
+        if (lcheck > 2 || lcheck < 1) {
+          err++;
+          printf("Error6, checker %d, not 1 or 2\n", lcheck);
+        }
+#pragma omp atomic
+        --checker;
+      }
+#pragma omp task depend(in : i1, i2)
+      { // task 5
+        int lcheck, th;
+#pragma omp atomic capture
+        lcheck = ++checker; // 1 or 2
+        th = omp_get_thread_num();
+        printf("task 5_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck > 2 || lcheck < 1) {
+          err++; // no more than 2 tasks concurrently
+          printf("Error7, checker %d, not 1 or 2\n", lcheck);
+        }
+        mysleep(DELAY);
+#pragma omp atomic read
+        lcheck = checker; // 1 or 2
+        if (lcheck > 2 || lcheck < 1) {
+          err++;
+          printf("Error8, checker %d, not 1 or 2\n", lcheck);
+        }
+#pragma omp atomic
+        --checker;
+      }
+// task6
+#pragma omp task depend(inout : ompx_all_memory[0])
+      {
+        int lcheck, th;
+#pragma omp atomic capture
+        lcheck = ++checker;
+        th = omp_get_thread_num();
+        printf("task m_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck != 1) { // no more than 1 task at a time
+          err++;
+          printf("Error m1, checker %d != 1\n", lcheck);
+        }
+        mysleep(DELAY);
+#pragma omp atomic read
+        lcheck = checker; // must still be equal to 1
+        if (lcheck != 1) {
+          err++;
+          printf("Error m2, checker %d != 1\n", lcheck);
+        }
+#pragma omp atomic
+        --checker;
+      }
+// task7
+#pragma omp task depend(inout : ompx_all_memory[0]) depend(mutexinoutset : i3)
+      {
+        int lcheck, th;
+#pragma omp atomic capture
+        lcheck = ++checker;
+        th = omp_get_thread_num();
+        printf("task m_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck != 1) { // no more than 1 task at a time
+          err++;
+          printf("Error m1, checker %d != 1\n", lcheck);
+        }
+        mysleep(DELAY);
+#pragma omp atomic read
+        lcheck = checker; // must still be equal to 1
+        if (lcheck != 1) {
+          err++;
+          printf("Error m2, checker %d != 1\n", lcheck);
+        }
+#pragma omp atomic
+        --checker;
+      }
+#pragma omp task depend(in : i3)
+      { // task 8
+        int lcheck, th;
+#pragma omp atomic capture
+        lcheck = ++checker; // 1
+        th = omp_get_thread_num();
+        printf("task 8_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck != 1) {
+          err++;
+          printf("Error9, checker %d, != 1\n", lcheck);
+        }
+        mysleep(DELAY);
+#pragma omp atomic read
+        lcheck = checker;
+        if (lcheck != 1) {
+          err++;
+          printf("Error10, checker %d, != 1\n", lcheck);
+        }
+#pragma omp atomic
+        --checker;
+      }
+    } // single
+  } // parallel
+  if (err == 0 && checker == 0) {
+    printf("passed\n");
+    return 0;
+  } else {
+    printf("failed, err = %d, checker = %d\n", err, checker);
+    return 1;
+  }
+}


        


More information about the Openmp-commits mailing list