[Openmp-commits] [openmp] 10995c7 - [OpenMP][OMPT] Fix and add event callbacks for detached tasks

Joachim Protze via Openmp-commits openmp-commits at lists.llvm.org
Tue Jun 2 00:53:58 PDT 2020


Author: Joachim Protze
Date: 2020-06-02T09:52:40+02:00
New Revision: 10995c77b4766ad2d416919854228fd7a03db5ef

URL: https://github.com/llvm/llvm-project/commit/10995c77b4766ad2d416919854228fd7a03db5ef
DIFF: https://github.com/llvm/llvm-project/commit/10995c77b4766ad2d416919854228fd7a03db5ef.diff

LOG: [OpenMP][OMPT] Fix and add event callbacks for detached tasks

The OpenMP spec has the task-fulfill event for a call to omp_fulfill_event.
If the task did not yet finish execution, ompt_task_early_fulfill is used,
otherwise ompt_task_late_fulfill.
If a task does not complete, when the execution finishes (i.e., the task goes
in detached mode), ompt_task_detach instead of ompt_task_complete must be
used, when the next task is scheduled.

A test for both cases is included, which only work with clang-11+

Reviewed By: hbae

Differential revision: https://reviews.llvm.org/D80843

Added: 
    openmp/runtime/test/ompt/tasks/task_early_fulfill.c
    openmp/runtime/test/ompt/tasks/task_late_fulfill.c

Modified: 
    openmp/runtime/src/kmp_tasking.cpp
    openmp/runtime/test/ompt/callback.h

Removed: 
    


################################################################################
diff  --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index a8da6146064c..001992475028 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -577,24 +577,20 @@ static inline void __ompt_task_start(kmp_task_t *task,
 
 // __ompt_task_finish:
 //   Build and trigger final task-schedule event
-static inline void
-__ompt_task_finish(kmp_task_t *task, kmp_taskdata_t *resumed_task,
-                   ompt_task_status_t status = ompt_task_complete) {
-  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
-  if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
-      taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
-    status = ompt_task_cancel;
-  }
-
-  /* let OMPT know that we're returning to the callee task */
+static inline void __ompt_task_finish(kmp_task_t *task,
+                                      kmp_taskdata_t *resumed_task,
+                                      ompt_task_status_t status) {
   if (ompt_enabled.ompt_callback_task_schedule) {
+    kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
+    if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
+        taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
+      status = ompt_task_cancel;
+    }
+
+    /* let OMPT know that we're returning to the callee task */
     ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
         &(taskdata->ompt_task_info.task_data), status,
-        &((resumed_task ? resumed_task
-                        : (taskdata->ompt_task_info.scheduling_parent
-                               ? taskdata->ompt_task_info.scheduling_parent
-                               : taskdata->td_parent))
-              ->ompt_task_info.task_data));
+        (resumed_task ? &(resumed_task->ompt_task_info.task_data) : NULL));
   }
 }
 #endif
@@ -803,6 +799,10 @@ static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
 // gtid: global thread ID for calling thread
 // task: task to be finished
 // resumed_task: task to be resumed.  (may be NULL if task is serialized)
+//
+// template<ompt>: effectively ompt_enabled.enabled!=0
+// the version with ompt=false is inlined, allowing to optimize away all ompt
+// code in this case
 template <bool ompt>
 static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
                               kmp_taskdata_t *resumed_task) {
@@ -849,10 +849,6 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
       return;
     }
   }
-#if OMPT_SUPPORT
-  if (ompt)
-    __ompt_task_finish(task, resumed_task);
-#endif
 
   // Check mutexinoutset dependencies, release locks
   kmp_depnode_t *node = taskdata->td_depnode;
@@ -907,8 +903,18 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
         // task finished execution
         KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
         taskdata->td_flags.executing = 0; // suspend the finishing task
+
+#if OMPT_SUPPORT
+        // For a detached task, which is not completed, we switch back
+        // the omp_fulfill_event signals completion
+        // locking is necessary to avoid a race with ompt_task_late_fulfill
+        if (ompt)
+          __ompt_task_finish(task, resumed_task, ompt_task_detach);
+#endif
+
         // no access to taskdata after this point!
         // __kmp_fulfill_event might free taskdata at any time from now
+
         taskdata->td_flags.proxy = TASK_PROXY; // proxify!
         detach = true;
       }
@@ -919,6 +925,12 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
   if (!detach) {
     taskdata->td_flags.complete = 1; // mark the task as completed
 
+#if OMPT_SUPPORT
+    // This is not a detached task, we are done here
+    if (ompt)
+      __ompt_task_finish(task, resumed_task, ompt_task_complete);
+#endif
+
     // Only need to keep track of count if team parallel and tasking not
     // serialized, or task is detachable and event has already been fulfilled 
     if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) ||
@@ -3867,12 +3879,26 @@ void __kmp_fulfill_event(kmp_event_t *event) {
     // point.
     // We need to take the lock to avoid races
     __kmp_acquire_tas_lock(&event->lock, gtid);
-    if (taskdata->td_flags.proxy == TASK_PROXY)
+    if (taskdata->td_flags.proxy == TASK_PROXY) {
       detached = true;
+    } else {
+#if OMPT_SUPPORT
+      // The OMPT event must occur under mutual exclusion,
+      // otherwise the tool might access ptask after free
+      if (UNLIKELY(ompt_enabled.enabled))
+        __ompt_task_finish(ptask, NULL, ompt_task_early_fulfill);
+#endif
+    }
     event->type = KMP_EVENT_UNINITIALIZED;
     __kmp_release_tas_lock(&event->lock, gtid);
 
     if (detached) {
+#if OMPT_SUPPORT
+      // We free ptask afterwards and know the task is finished,
+      // so locking is not necessary
+      if (UNLIKELY(ompt_enabled.enabled))
+        __ompt_task_finish(ptask, NULL, ompt_task_late_fulfill);
+#endif
       // If the task detached complete the proxy task
       if (gtid >= 0) {
         kmp_team_t *team = taskdata->td_team;

diff  --git a/openmp/runtime/test/ompt/callback.h b/openmp/runtime/test/ompt/callback.h
index cd507ad541ea..2bc5e39f2706 100644
--- a/openmp/runtime/test/ompt/callback.h
+++ b/openmp/runtime/test/ompt/callback.h
@@ -734,9 +734,13 @@ on_ompt_callback_task_schedule(
     ompt_task_status_t prior_task_status,
     ompt_data_t *second_task_data)
 {
-  printf("%" PRIu64 ": ompt_event_task_schedule: first_task_id=%" PRIu64 ", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n", ompt_get_thread_data()->value, first_task_data->value, second_task_data->value, ompt_task_status_t_values[prior_task_status], prior_task_status);
-  if(prior_task_status == ompt_task_complete)
-  {
+  printf("%" PRIu64 ": ompt_event_task_schedule: first_task_id=%" PRIu64
+         ", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n",
+         ompt_get_thread_data()->value, first_task_data->value,
+         (second_task_data ? second_task_data->value : -1),
+         ompt_task_status_t_values[prior_task_status], prior_task_status);
+  if (prior_task_status == ompt_task_complete ||
+      prior_task_status == ompt_task_late_fulfill) {
     printf("%" PRIu64 ": ompt_event_task_end: task_id=%" PRIu64 "\n", ompt_get_thread_data()->value, first_task_data->value);
   }
 }

diff  --git a/openmp/runtime/test/ompt/tasks/task_early_fulfill.c b/openmp/runtime/test/ompt/tasks/task_early_fulfill.c
new file mode 100644
index 000000000000..c1cef5850b77
--- /dev/null
+++ b/openmp/runtime/test/ompt/tasks/task_early_fulfill.c
@@ -0,0 +1,68 @@
+// RUN: %libomp-compile -fopenmp-version=50 && env OMP_NUM_THREADS='3' \
+// RUN:    %libomp-run | %sort-threads | FileCheck %s
+
+// Checked gcc 9.2 still does not support detach clause on task construct.
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8, gcc-9
+// clang supports detach clause since version 11.
+// UNSUPPORTED: clang-10, clang-9, clang-8, clang-7
+// icc compiler does not support detach clause.
+// UNSUPPORTED: icc
+
+#include "callback.h"
+#include <omp.h>
+
+int main() {
+#pragma omp parallel
+#pragma omp master
+  {
+    omp_event_handle_t event;
+#pragma omp task detach(event) if (0)
+    { omp_fulfill_event(event); }
+#pragma omp taskwait
+  }
+  return 0;
+}
+
+// Check if libomp supports the callbacks for this test.
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+
+// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+
+// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin:
+// CHECK-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]],
+// CHECK-SAME: parent_task_frame.exit=[[NULL]],
+// CHECK-SAME: parent_task_frame.reenter=0x{{[0-f]+}},
+// CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]],
+// CHECK-SAME: requested_team_size=3,
+
+// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin:
+// CHECK-SAME: parallel_id=[[PARALLEL_ID]],
+// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+
+// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
+// CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID]],
+// CHECK-SAME: parent_task_frame.exit=0x{{[0-f]+}},
+// CHECK-SAME: parent_task_frame.reenter=0x{{[0-f]+}},
+// CHECK-SAME: new_task_id=[[TASK_ID:[0-9]+]],
+
+// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule:
+// CHECK-SAME: first_task_id=[[IMPLICIT_TASK_ID]],
+// CHECK-SAME: second_task_id=[[TASK_ID]],
+// CHECK-SAME: prior_task_status=ompt_task_switch=7
+
+// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule:
+// CHECK-SAME: first_task_id=[[TASK_ID]],
+// CHECK-SAME: second_task_id=18446744073709551615,
+// CHECK-SAME: prior_task_status=ompt_task_early_fulfill=5
+
+// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule:
+// CHECK-SAME: first_task_id=[[TASK_ID]],
+// CHECK-SAME: second_task_id=[[IMPLICIT_TASK_ID]],
+// CHECK-SAME: prior_task_status=ompt_task_complete=1

diff  --git a/openmp/runtime/test/ompt/tasks/task_late_fulfill.c b/openmp/runtime/test/ompt/tasks/task_late_fulfill.c
new file mode 100644
index 000000000000..05e50dfbd270
--- /dev/null
+++ b/openmp/runtime/test/ompt/tasks/task_late_fulfill.c
@@ -0,0 +1,76 @@
+// RUN: %libomp-compile -fopenmp-version=50 && env OMP_NUM_THREADS='3' \
+// RUN:    %libomp-run | %sort-threads | FileCheck %s
+
+// Checked gcc 9.2 still does not support detach clause on task construct.
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8, gcc-9
+// clang supports detach clause since version 11.
+// UNSUPPORTED: clang-10, clang-9, clang-8, clang-7
+// icc compiler does not support detach clause.
+// UNSUPPORTED: icc
+
+#include "callback.h"
+#include <omp.h>
+
+int main() {
+#pragma omp parallel
+#pragma omp master
+  {
+    omp_event_handle_t event;
+    omp_event_handle_t *f_event;
+#pragma omp task detach(event) depend(out : f_event) shared(f_event) if (0)
+    {
+      printf("task 1\n");
+      f_event = &event;
+    }
+#pragma omp task depend(in : f_event)
+    { printf("task 2\n"); }
+    printf("calling omp_fulfill_event\n");
+    omp_fulfill_event(*f_event);
+#pragma omp taskwait
+  }
+  return 0;
+}
+
+// Check if libomp supports the callbacks for this test.
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire'
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired'
+// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released'
+
+// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+
+// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin:
+// CHECK-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]],
+// CHECK-SAME: parent_task_frame.exit=[[NULL]],
+// CHECK-SAME: parent_task_frame.reenter=0x{{[0-f]+}},
+// CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]],
+// CHECK-SAME: requested_team_size=3,
+
+// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin:
+// CHECK-SAME: parallel_id=[[PARALLEL_ID]],
+// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
+
+// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
+// CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID]],
+// CHECK-SAME: parent_task_frame.exit=0x{{[0-f]+}},
+// CHECK-SAME: parent_task_frame.reenter=0x{{[0-f]+}},
+// CHECK-SAME: new_task_id=[[TASK_ID:[0-9]+]],
+
+// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule:
+// CHECK-SAME: first_task_id=[[IMPLICIT_TASK_ID]],
+// CHECK-SAME: second_task_id=[[TASK_ID]],
+// CHECK-SAME: prior_task_status=ompt_task_switch=7
+
+// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule:
+// CHECK-SAME: first_task_id=[[TASK_ID]],
+// CHECK-SAME: second_task_id=[[IMPLICIT_TASK_ID]],
+// CHECK-SAME: prior_task_status=ompt_task_detach=4
+
+// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule:
+// CHECK-SAME: first_task_id=[[TASK_ID]],
+// CHECK-SAME: second_task_id=18446744073709551615,
+// CHECK-SAME: prior_task_status=ompt_task_late_fulfill=6


        


More information about the Openmp-commits mailing list