[Openmp-commits] [openmp] 73d411d - [OpenMP][Tools] Add omp_all_memory support for Archer

Joachim Jenke via Openmp-commits openmp-commits at lists.llvm.org
Fri Jul 7 04:56:10 PDT 2023

Author: Joachim Jenke
Date: 2023-07-07T13:55:46+02:00
New Revision: 73d411d1b2c9c842bbaa167606720fed6ffc2243

URL: https://github.com/llvm/llvm-project/commit/73d411d1b2c9c842bbaa167606720fed6ffc2243
DIFF: https://github.com/llvm/llvm-project/commit/73d411d1b2c9c842bbaa167606720fed6ffc2243.diff

LOG: [OpenMP][Tools] Add omp_all_memory support for Archer

The semantic of depend(out:omp_all_memory) is quite similar to taskwait in
that it separates all tasks (with dependency) created before an
all_memory-task from all tasks (with dependency) created after an
Only a single of such tasks can execute at a time. Similar to taskwait, we
have a CV (AllMemory[1]) in the generating task to express the dependency
sink semantic of an all_memory-task. In addition, AllMemory[0] describes the
dependency source semantic of an all_memory-task. All tasks with dependency
create an HB-arc towards the sink and terminate an HB-arc from the source.

Since we expect that not many applications will use such dependency, the
support for handling the synchronization semantic is off by default and
can be turned on using ARCHER_OPTION="all_memory=1". The most costly part
is the precautionary posting of an HB-arc towards the sink, which represents
a potentially contentious write from all concurrently executing sibling tasks.
A warning is printed at runtime, when the option is off while such dependency
is observed. In most cases the lazy activation will still lead to false alerts.

Differential Revision: https://reviews.llvm.org/D111895




diff  --git a/openmp/tools/archer/README.md b/openmp/tools/archer/README.md
index 0b02c638607e04..b52626116d0f9c 100644
--- a/openmp/tools/archer/README.md
+++ b/openmp/tools/archer/README.md
@@ -131,6 +131,18 @@ statement in main!)</td>
+<td class="org-left">all_memory</td>
+<td class="org-right">0</td>
+<td class="org-left">Turn on tracking and analysis of omp_all_memory
+dependencies. Archer will activate the support automatically when
+such dependency is seen during execution. At this time the analysis
+already missed synchronization semantics, which will lead to false
+reports in most cases.</td>
 <td class="org-left">report_data_leak</td>

diff  --git a/openmp/tools/archer/ompt-tsan.cpp b/openmp/tools/archer/ompt-tsan.cpp
index 1bc5e57c72f34b..cd921347ce1d04 100644
--- a/openmp/tools/archer/ompt-tsan.cpp
+++ b/openmp/tools/archer/ompt-tsan.cpp
@@ -63,6 +63,7 @@ class ArcherFlags {
   int enabled{1};
   int report_data_leak{0};
   int ignore_serial{0};
+  std::atomic<int> all_memory{0};
   ArcherFlags(const char *env) {
     if (env) {
@@ -70,6 +71,7 @@ class ArcherFlags {
       std::string token;
       std::string str(env);
       std::istringstream iss(str);
+      int tmp_int;
       while (std::getline(iss, token, ' '))
@@ -89,6 +91,10 @@ class ArcherFlags {
         if (sscanf(it->c_str(), "ignore_serial=%d", &ignore_serial))
+        if (sscanf(it->c_str(), "all_memory=%d", &tmp_int)) {
+          all_memory = tmp_int;
+          continue;
+        }
         std::cerr << "Illegal values for ARCHER_OPTIONS variable: " << token
                   << std::endl;
@@ -451,6 +457,9 @@ struct TaskData final : DataPoolEntry<TaskData> {
   /// this task.
   ompt_tsan_clockid Taskwait{0};
+  /// Child tasks use its address to model omp_all_memory dependencies
+  ompt_tsan_clockid AllMemory[2]{0};
   /// Whether this task is currently executing a barrier.
   bool InBarrier{false};
@@ -506,10 +515,16 @@ struct TaskData final : DataPoolEntry<TaskData> {
   bool isInitial() { return TaskType & ompt_task_initial; }
   bool isTarget() { return TaskType & ompt_task_target; }
+  void setAllMemoryDep() { AllMemory[0] = 1; }
+  bool hasAllMemoryDep() { return AllMemory[0]; }
   void *GetTaskPtr() { return &Task; }
   void *GetTaskwaitPtr() { return &Taskwait; }
+  void *GetLastAllMemoryPtr() { return AllMemory; }
+  void *GetNextAllMemoryPtr() { return AllMemory + 1; }
   TaskData *Init(TaskData *parent, int taskType) {
     TaskType = taskType;
     Parent = parent;
@@ -855,13 +870,30 @@ static void freeTask(TaskData *task) {
+// LastAllMemoryPtr marks the beginning of an all_memory epoch
+// NextAllMemoryPtr marks the end of an all_memory epoch
+// All tasks with depend begin execution after LastAllMemoryPtr
+// and end before NextAllMemoryPtr
 static void releaseDependencies(TaskData *task) {
+  if (archer_flags->all_memory) {
+    if (task->hasAllMemoryDep()) {
+      TsanHappensBefore(task->Parent->GetLastAllMemoryPtr());
+      TsanHappensBefore(task->Parent->GetNextAllMemoryPtr());
+    } else if (task->DependencyCount)
+      TsanHappensBefore(task->Parent->GetNextAllMemoryPtr());
+  }
   for (unsigned i = 0; i < task->DependencyCount; i++) {
 static void acquireDependencies(TaskData *task) {
+  if (archer_flags->all_memory) {
+    if (task->hasAllMemoryDep())
+      TsanHappensAfter(task->Parent->GetNextAllMemoryPtr());
+    else if (task->DependencyCount)
+      TsanHappensAfter(task->Parent->GetLastAllMemoryPtr());
+  }
   for (unsigned i = 0; i < task->DependencyCount; i++) {
@@ -983,13 +1015,28 @@ static void ompt_tsan_dependences(ompt_data_t *task_data,
     Data->Dependencies =
         (TaskDependency *)malloc(sizeof(TaskDependency) * ndeps);
     Data->DependencyCount = ndeps;
-    for (int i = 0; i < ndeps; i++) {
+    for (int i = 0, d = 0; i < ndeps; i++, d++) {
+      if (deps[i].dependence_type == ompt_dependence_type_out_all_memory ||
+          deps[i].dependence_type == ompt_dependence_type_inout_all_memory) {
+        Data->setAllMemoryDep();
+        Data->DependencyCount--;
+        if (!archer_flags->all_memory) {
+          printf("The application uses omp_all_memory, but Archer was\n"
+                 "started to not consider omp_all_memory. This can lead\n"
+                 "to false data race alerts.\n"
+                 "Include all_memory=1 in ARCHER_OPTIONS to consider\n"
+                 "omp_all_memory from the beginning.\n");
+          archer_flags->all_memory = 1;
+        }
+        d--;
+        continue;
+      }
       auto ret = Data->Parent->DependencyMap->insert(
           std::make_pair(deps[i].variable.ptr, nullptr));
       if (ret.second) {
         ret.first->second = DependencyData::New();
-      new ((void *)(Data->Dependencies + i))
+      new ((void *)(Data->Dependencies + d))
           TaskDependency(ret.first->second, deps[i].dependence_type);

diff  --git a/openmp/tools/archer/tests/task/omp_task_depend_all.c b/openmp/tools/archer/tests/task/omp_task_depend_all.c
new file mode 100644
index 00000000000000..7a8063cc925d75
--- /dev/null
+++ b/openmp/tools/archer/tests/task/omp_task_depend_all.c
@@ -0,0 +1,350 @@
+// RUN--: %libarcher-compile-and-run | FileCheck %s --check-prefix=NOENV
+// RUN: %libarcher-compile && env ARCHER_OPTIONS="all_memory=1" \
+// RUN:   %libarcher-run | FileCheck %s --check-prefix=ENV
+// REQUIRES: tsan
+// The runtime currently does not get dependency information from GCC.
+// Tests OMP 5.x task dependence "omp_all_memory",
+// emulates compiler codegen versions for new dep kind
+// Task tree created:
+//      task0 - task1 (in: i1, i2)
+//             \
+//        task2 (inoutset: i2), (in: i1)
+//             /
+//        task3 (omp_all_memory) via flag=0x80
+//             /
+//      task4 - task5 (in: i1, i2)
+//           /
+//       task6 (omp_all_memory) via addr=-1
+//           /
+//       task7 (omp_all_memory) via flag=0x80
+//           /
+//       task8 (in: i3)
+#include <omp.h>
+#include <stdio.h>
+#ifdef _WIN32
+#include <windows.h>
+#define mysleep(n) Sleep(n)
+#include <unistd.h>
+#define mysleep(n) usleep((n)*1000)
+// to check the # of concurrent tasks (must be 1 for MTX, <3 for other kinds)
+static int checker = 0;
+static int err = 0;
+#ifndef DELAY
+#define DELAY 100
+// ---------------------------------------------------------------------------
+// internal data to emulate compiler codegen
+typedef struct DEP {
+  size_t addr;
+  size_t len;
+  unsigned char flags;
+} dep;
+#define DEP_ALL_MEM 0x80
+typedef struct task {
+  void **shareds;
+  void *entry;
+  int part_id;
+  void *destr_thunk;
+  int priority;
+  long long device_id;
+  int f_priv;
+} task_t;
+#define TIED 1
+typedef int (*entry_t)(int, task_t *);
+typedef struct ID {
+  int reserved_1;
+  int flags;
+  int reserved_2;
+  int reserved_3;
+  char *psource;
+} id;
+// thunk routine for tasks with ALL dependency
+int thunk_m(int gtid, task_t *ptask) {
+  int lcheck, th;
+#pragma omp atomic capture
+  lcheck = ++checker;
+  th = omp_get_thread_num();
+  printf("task m_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck);
+  if (lcheck != 1) { // no more than 1 task at a time
+    err++;
+    printf("Error m1, checker %d != 1\n", lcheck);
+  }
+  mysleep(DELAY);
+#pragma omp atomic read
+  lcheck = checker; // must still be equal to 1
+  if (lcheck != 1) {
+    err++;
+    printf("Error m2, checker %d != 1\n", lcheck);
+  }
+#pragma omp atomic
+  --checker;
+  return 0;
+// thunk routine for tasks with inoutset dependency
+int thunk_s(int gtid, task_t *ptask) {
+  int lcheck, th;
+#pragma omp atomic capture
+  lcheck = ++checker; // 1
+  th = omp_get_thread_num();
+  printf("task 2_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck);
+  if (lcheck != 1) { // no more than 1 task at a time
+    err++;
+    printf("Error s1, checker %d != 1\n", lcheck);
+  }
+  mysleep(DELAY);
+#pragma omp atomic read
+  lcheck = checker; // must still be equal to 1
+  if (lcheck != 1) {
+    err++;
+    printf("Error s2, checker %d != 1\n", lcheck);
+  }
+#pragma omp atomic
+  --checker;
+  return 0;
+#ifdef __cplusplus
+extern "C" {
+int __kmpc_global_thread_num(id *);
+task_t *__kmpc_omp_task_alloc(id *loc, int gtid, int flags, size_t sz,
+                              size_t shar, entry_t rtn);
+int __kmpc_omp_task_with_deps(id *loc, int gtid, task_t *task, int ndeps,
+                              dep *dep_lst, int nd_noalias, dep *noalias_lst);
+static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
+#ifdef __cplusplus
+} // extern "C"
+// End of internal data
+// ---------------------------------------------------------------------------
+int main() {
+  char *ompx_all_memory = (void *)0xffffffffffffffff;
+  int i1, i2, i3;
+  omp_set_num_threads(8);
+  omp_set_dynamic(0);
+#pragma omp parallel
+  {
+#pragma omp single nowait
+    {
+      dep sdep[2];
+      task_t *ptr;
+      int gtid = __kmpc_global_thread_num(&loc);
+      int t = omp_get_thread_num();
+#pragma omp task depend(in : i1, i2)
+      { // task 0
+        int lcheck, th;
+#pragma omp atomic capture
+        lcheck = ++checker; // 1 or 2
+        th = omp_get_thread_num();
+        printf("task 0_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck > 2 || lcheck < 1) {
+          err++; // no more than 2 tasks concurrently
+          printf("Error1, checker %d, not 1 or 2\n", lcheck);
+        }
+        mysleep(DELAY);
+#pragma omp atomic read
+        lcheck = checker; // 1 or 2
+        if (lcheck > 2 || lcheck < 1) {
+#pragma omp atomic
+          err++;
+          printf("Error2, checker %d, not 1 or 2\n", lcheck);
+        }
+#pragma omp atomic
+        --checker;
+      }
+#pragma omp task depend(in : i1, i2)
+      { // task 1
+        int lcheck, th;
+#pragma omp atomic capture
+        lcheck = ++checker; // 1 or 2
+        th = omp_get_thread_num();
+        printf("task 1_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck > 2 || lcheck < 1) {
+          err++; // no more than 2 tasks concurrently
+          printf("Error3, checker %d, not 1 or 2\n", lcheck);
+        }
+        mysleep(DELAY);
+#pragma omp atomic read
+        lcheck = checker; // 1 or 2
+        if (lcheck > 2 || lcheck < 1) {
+          err++;
+          printf("Error4, checker %d, not 1 or 2\n", lcheck);
+        }
+#pragma omp atomic
+        --checker;
+      }
+      // compiler codegen start
+      // task2
+      ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_s);
+      sdep[0].addr = (size_t)&i1;
+      sdep[0].len = 0;   // not used
+      sdep[0].flags = 1; // IN
+      sdep[1].addr = (size_t)&i2;
+      sdep[1].len = 0;      // not used
+      sdep[1].flags = 8;    // INOUTSET
+      ptr->f_priv = t + 10; // init single first-private variable
+      __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
+// task3
+#pragma omp task depend(in : i1) depend(inout : ompx_all_memory[0])
+      {
+        int lcheck, th;
+#pragma omp atomic capture
+        lcheck = ++checker;
+        th = omp_get_thread_num();
+        printf("task 3_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck != 1) { // no more than 1 task at a time
+          err++;
+          printf("Error m1, checker %d != 1\n", lcheck);
+        }
+        mysleep(DELAY);
+#pragma omp atomic read
+        lcheck = checker; // must still be equal to 1
+        if (lcheck != 1) {
+          err++;
+          printf("Error m2, checker %d != 1\n", lcheck);
+        }
+#pragma omp atomic
+        --checker;
+      }
+      // compiler codegen end
+#pragma omp task depend(in : i1, i2)
+      { // task 4
+        int lcheck, th;
+#pragma omp atomic capture
+        lcheck = ++checker; // 1 or 2
+        th = omp_get_thread_num();
+        printf("task 4_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck > 2 || lcheck < 1) {
+          err++; // no more than 2 tasks concurrently
+          printf("Error5, checker %d, not 1 or 2\n", lcheck);
+        }
+        mysleep(DELAY);
+#pragma omp atomic read
+        lcheck = checker; // 1 or 2
+        if (lcheck > 2 || lcheck < 1) {
+          err++;
+          printf("Error6, checker %d, not 1 or 2\n", lcheck);
+        }
+#pragma omp atomic
+        --checker;
+      }
+#pragma omp task depend(in : i1, i2)
+      { // task 5
+        int lcheck, th;
+#pragma omp atomic capture
+        lcheck = ++checker; // 1 or 2
+        th = omp_get_thread_num();
+        printf("task 5_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck > 2 || lcheck < 1) {
+          err++; // no more than 2 tasks concurrently
+          printf("Error7, checker %d, not 1 or 2\n", lcheck);
+        }
+        mysleep(DELAY);
+#pragma omp atomic read
+        lcheck = checker; // 1 or 2
+        if (lcheck > 2 || lcheck < 1) {
+          err++;
+          printf("Error8, checker %d, not 1 or 2\n", lcheck);
+        }
+#pragma omp atomic
+        --checker;
+      }
+// task6
+#pragma omp task depend(inout : ompx_all_memory[0])
+      {
+        int lcheck, th;
+#pragma omp atomic capture
+        lcheck = ++checker;
+        th = omp_get_thread_num();
+        printf("task 6_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck != 1) { // no more than 1 task at a time
+          err++;
+          printf("Error m1, checker %d != 1\n", lcheck);
+        }
+        mysleep(DELAY);
+#pragma omp atomic read
+        lcheck = checker; // must still be equal to 1
+        if (lcheck != 1) {
+          err++;
+          printf("Error m2, checker %d != 1\n", lcheck);
+        }
+#pragma omp atomic
+        --checker;
+      }
+// task7
+#pragma omp task depend(inout : ompx_all_memory[0]) depend(mutexinoutset : i3)
+      {
+        int lcheck, th;
+#pragma omp atomic capture
+        lcheck = ++checker;
+        th = omp_get_thread_num();
+        printf("task 7_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck != 1) { // no more than 1 task at a time
+          err++;
+          printf("Error m1, checker %d != 1\n", lcheck);
+        }
+        mysleep(DELAY);
+#pragma omp atomic read
+        lcheck = checker; // must still be equal to 1
+        if (lcheck != 1) {
+          err++;
+          printf("Error m2, checker %d != 1\n", lcheck);
+        }
+#pragma omp atomic
+        --checker;
+      }
+#pragma omp task depend(in : i3)
+      { // task 8
+        int lcheck, th;
+#pragma omp atomic capture
+        lcheck = ++checker; // 1
+        th = omp_get_thread_num();
+        printf("task 8_%d, th %d, checker %d\n", t, th, lcheck);
+        if (lcheck != 1) {
+          err++;
+          printf("Error9, checker %d, != 1\n", lcheck);
+        }
+        mysleep(DELAY);
+#pragma omp atomic read
+        lcheck = checker;
+        if (lcheck != 1) {
+          err++;
+          printf("Error10, checker %d, != 1\n", lcheck);
+        }
+#pragma omp atomic
+        --checker;
+      }
+    } // single
+  }   // parallel
+  if (err == 0 && checker == 0) {
+    printf("passed\n");
+    return 0;
+  } else {
+    printf("failed, err = %d, checker = %d\n", err, checker);
+    return 1;
+  }
+// NOENV-NOT: ThreadSanitizer: data race
+// NOENV-NOT: ThreadSanitizer: reported
+// NOENV: omp_all_memory
+// NOENV-NOT: ThreadSanitizer: data race
+// NOENV-NOT: ThreadSanitizer: reported
+// NOENV: passed
+// ENV-NOT: ThreadSanitizer: data race
+// ENV-NOT: ThreadSanitizer: reported
+// ENV: passed


More information about the Openmp-commits mailing list