[Openmp-commits] [openmp] [OMPT] Fix issue #84180 (PR #84215)

PEREIRA Romain via Openmp-commits openmp-commits at lists.llvm.org
Thu Mar 28 06:49:50 PDT 2024


https://github.com/rpereira-dev updated https://github.com/llvm/llvm-project/pull/84215

>From 8c98c3c507896a3162739e182fc496176a0ea6a5 Mon Sep 17 00:00:00 2001
From: Romain PEREIRA <romain.pereira at inria.fr>
Date: Wed, 6 Mar 2024 19:00:05 +0100
Subject: [PATCH 1/2] Removed extra 'ompt_callback_task_schedule' callback
 after discarding

---
 openmp/runtime/src/kmp_tasking.cpp | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 6e8b948efa064f..148018cc4af4b5 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1030,13 +1030,14 @@ static bool __kmp_track_children_task(kmp_taskdata_t *taskdata) {
 // gtid: global thread ID for calling thread
 // task: task to be finished
 // resumed_task: task to be resumed.  (may be NULL if task is serialized)
+// discard: true if 'task' had been discarded
 //
 // template<ompt>: effectively ompt_enabled.enabled!=0
 // the version with ompt=false is inlined, allowing to optimize away all ompt
 // code in this case
 template <bool ompt>
 static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
-                              kmp_taskdata_t *resumed_task) {
+                              kmp_taskdata_t *resumed_task, int discard) {
   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
   kmp_info_t *thread = __kmp_threads[gtid];
   kmp_task_team_t *task_team =
@@ -1170,7 +1171,7 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
 
 #if OMPT_SUPPORT
     // This is not a detached task, we are done here
-    if (ompt)
+    if (ompt && !discard)
       __ompt_task_finish(task, resumed_task, ompt_task_complete);
 #endif
     // TODO: What would be the balance between the conditions in the function
@@ -1254,7 +1255,7 @@ static void __kmpc_omp_task_complete_if0_template(ident_t *loc_ref,
                 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
   KMP_DEBUG_ASSERT(gtid >= 0);
   // this routine will provide task to resume
-  __kmp_task_finish<ompt>(gtid, task, NULL);
+  __kmp_task_finish<ompt>(gtid, task, NULL, 0);
 
   KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
                 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
@@ -1304,8 +1305,8 @@ void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid,
   KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid,
                 loc_ref, KMP_TASK_TO_TASKDATA(task)));
 
-  __kmp_task_finish<false>(gtid, task,
-                           NULL); // Not sure how to find task to resume
+  __kmp_task_finish<false>(gtid, task, NULL,
+                           0); // Not sure how to find task to resume
 
   KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid,
                 loc_ref, KMP_TASK_TO_TASKDATA(task)));
@@ -1923,7 +1924,7 @@ __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
     KMP_FSYNC_CANCEL(taskdata); // destroy self (just executed)
     KMP_FSYNC_RELEASING(taskdata->td_parent); // releasing parent
 #endif
-  }
+  } /* !discard */
 
 #if OMPD_SUPPORT
   if (ompd_state & OMPD_ENABLE_BP)
@@ -1938,10 +1939,10 @@ __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
       if (taskdata->td_flags.tiedness == TASK_TIED) {
         taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
       }
-      __kmp_task_finish<true>(gtid, task, current_task);
+      __kmp_task_finish<true>(gtid, task, current_task, discard);
     } else
 #endif
-      __kmp_task_finish<false>(gtid, task, current_task);
+      __kmp_task_finish<false>(gtid, task, current_task, discard);
   }
 
   KA_TRACE(
@@ -4969,7 +4970,7 @@ void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
   // free the pattern task and exit
   __kmp_task_start(gtid, task, current_task); // make internal bookkeeping
   // do not execute the pattern task, just do internal bookkeeping
-  __kmp_task_finish<false>(gtid, task, current_task);
+  __kmp_task_finish<false>(gtid, task, current_task, 0);
 }
 
 // Structure to keep taskloop parameters for auxiliary task
@@ -5247,7 +5248,7 @@ static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
     // free the pattern task and exit
     __kmp_task_start(gtid, task, current_task);
     // do not execute anything for zero-trip loop
-    __kmp_task_finish<false>(gtid, task, current_task);
+    __kmp_task_finish<false>(gtid, task, current_task, 0);
     return;
   }
 

>From 645cd7fd9f16b01ba54e458ad3fc7a8f29d099e6 Mon Sep 17 00:00:00 2001
From: Romain PEREIRA <romain.pereira at inria.fr>
Date: Thu, 28 Mar 2024 12:32:26 +0100
Subject: [PATCH 2/2] Added checks for task_schedule events in the callback.h
 tools

---
 openmp/runtime/test/ompt/callback.h           | 230 +++++++++++-------
 .../test/ompt/cancel/cancel_taskgroup.c       |  12 +
 .../runtime/test/ompt/misc/api_calls_misc.c   |  14 +-
 .../runtime/test/ompt/misc/api_calls_places.c |  18 +-
 .../ompt/parallel/nested_lwt_thread_num.c     |   2 +-
 .../test/ompt/parallel/repeated_calls.c       |   1 +
 .../runtime/test/ompt/synchronization/lock.c  |   2 +-
 .../test/ompt/synchronization/masked.c        |   2 +-
 .../test/ompt/synchronization/master.c        |   1 +
 .../test/ompt/synchronization/nest_lock.c     |   2 +-
 openmp/runtime/test/ompt/tasks/dependences.c  |   2 +-
 .../ompt/tasks/dependences_mutexinoutset.c    |   2 +-
 .../ompt/tasks/explicit_task_thread_num.c     |   2 +-
 .../runtime/test/ompt/tasks/task_if0-depend.c |   2 +-
 openmp/runtime/test/ompt/tasks/task_memory.c  |   1 +
 openmp/runtime/test/ompt/tasks/task_types.c   |   6 +-
 .../test/ompt/tasks/task_types_serialized.c   |   2 +-
 .../runtime/test/ompt/tasks/taskwait-depend.c |   2 +-
 openmp/runtime/test/ompt/tasks/taskyield.c    |   4 +-
 .../runtime/test/ompt/worksharing/sections.c  |   4 +-
 .../test/ompt/worksharing/sections_dispatch.c |   4 +-
 openmp/runtime/test/ompt/worksharing/single.c |   2 +-
 22 files changed, 193 insertions(+), 124 deletions(-)

diff --git a/openmp/runtime/test/ompt/callback.h b/openmp/runtime/test/ompt/callback.h
index 05b2006a280db5..52181db20ab744 100644
--- a/openmp/runtime/test/ompt/callback.h
+++ b/openmp/runtime/test/ompt/callback.h
@@ -22,6 +22,7 @@
 #define _OMPT_TESTS
 #endif
 
+// ompt types to string
 static const char *ompt_thread_t_values[] = {
     "ompt_thread_UNDEFINED", "ompt_thread_initial", "ompt_thread_worker",
     "ompt_thread_other"};
@@ -105,6 +106,41 @@ static ompt_get_proc_id_t ompt_get_proc_id;
 static ompt_enumerate_states_t ompt_enumerate_states;
 static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls;
 
+#ifndef USE_PRIVATE_TOOL
+
+// per-thread data storage
+typedef struct
+{
+    uint64_t tid;       // thread id
+    uint64_t taskid;    // current task id
+} thread_t;
+
+static inline thread_t *
+get_thread(ompt_data_t * thread_data)
+{
+    return ((thread_t *)thread_data->ptr);
+}
+
+static inline uint64_t
+get_thread_id(ompt_data_t * thread_data)
+{
+    return get_thread(thread_data)->tid;
+}
+
+static inline thread_t *
+get_current_thread(void)
+{
+    return get_thread(ompt_get_thread_data());
+}
+
+static inline uint64_t
+get_current_thread_id(void)
+{
+    return get_current_thread()->tid;
+}
+
+# endif /* ! USE_PRIVATE_TOOL */
+
 static void print_ids(int level)
 {
   int task_type, thread_num;
@@ -119,7 +155,7 @@ static void print_ids(int level)
     printf("%" PRIu64 ": task level %d: parallel_id=%" PRIu64
            ", task_id=%" PRIu64 ", exit_frame=%p, reenter_frame=%p, "
            "task_type=%s=%d, thread_num=%d\n",
-           ompt_get_thread_data()->value, level,
+           get_current_thread_id(), level,
            exists_task ? task_parallel_data->value : 0,
            exists_task ? task_data->value : 0, frame->exit_frame.ptr,
            frame->enter_frame.ptr, buffer, task_type, thread_num);
@@ -129,7 +165,7 @@ static void print_ids(int level)
 
 #define print_frame(level)                                                     \
   printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n",                      \
-         ompt_get_thread_data()->value, level, get_frame_address(level))
+         get_current_thread_id(), level, get_frame_address(level))
 
 // clang (version 5.0 and above) adds an intermediate function call with debug flag (-g)
 #if defined(TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN)
@@ -177,14 +213,14 @@ ompt_label_##id:
 // a MOV instruction for non-void runtime functions which is 3 bytes long.
 #define print_possible_return_addresses(addr) \
   printf("%" PRIu64 ": current_address=%p or %p for non-void functions\n", \
-         ompt_get_thread_data()->value, ((char *)addr) - 1, ((char *)addr) - 4)
+         get_current_thread_id(), ((char *)addr) - 1, ((char *)addr) - 4)
 #elif KMP_ARCH_PPC64
 // On Power the NOP instruction is 4 bytes long. In addition, the compiler
 // inserts a second NOP instruction (another 4 bytes). For non-void runtime
 // functions Clang inserts a STW instruction (but only if compiling under
 // -fno-PIC which will be the default with Clang 8.0, another 4 bytes).
 #define print_possible_return_addresses(addr) \
-  printf("%" PRIu64 ": current_address=%p or %p\n", ompt_get_thread_data()->value, \
+  printf("%" PRIu64 ": current_address=%p or %p\n", get_current_thread_id(), \
          ((char *)addr) - 8, ((char *)addr) - 12)
 #elif KMP_ARCH_AARCH64
 // On AArch64 the NOP instruction is 4 bytes long, can be followed by inserted
@@ -195,7 +231,7 @@ ompt_label_##id:
 // the AArch64 backend. See issue #69627.
 #define print_possible_return_addresses(addr)                                  \
   printf("%" PRIu64 ": current_address=%p or %p or %p\n",                      \
-         ompt_get_thread_data()->value, ((char *)addr) - 4,                    \
+         get_current_thread_id(), ((char *)addr) - 4,                    \
          ((char *)addr) - 8, ((char *)addr) - 12)
 #elif KMP_ARCH_RISCV64
 #if __riscv_compressed
@@ -206,7 +242,7 @@ ompt_label_##id:
 // another branch).
 #define print_possible_return_addresses(addr) \
   printf("%" PRIu64 ": current_address=%p or %p\n", \
-         ompt_get_thread_data()->value, ((char *)addr) - 6, ((char *)addr) - 10)
+         get_current_thread_id(), ((char *)addr) - 6, ((char *)addr) - 10)
 #else
 // On RV64G the NOP instruction is 4 byte long. In addition, the compiler
 // inserts a J instruction (targeting the successor basic block), which
@@ -215,7 +251,7 @@ ompt_label_##id:
 // another branch).
 #define print_possible_return_addresses(addr) \
   printf("%" PRIu64 ": current_address=%p or %p\n", \
-         ompt_get_thread_data()->value, ((char *)addr) - 8, ((char *)addr) - 12)
+         get_current_thread_id(), ((char *)addr) - 8, ((char *)addr) - 12)
 #endif
 #elif KMP_ARCH_LOONGARCH64
 // On LoongArch64 the NOP instruction is 4 bytes long, can be followed by
@@ -224,14 +260,14 @@ ompt_label_##id:
 // elsewhere (ie. another branch).
 #define print_possible_return_addresses(addr)                                  \
   printf("%" PRIu64 ": current_address=%p or %p or %p\n",                      \
-         ompt_get_thread_data()->value, ((char *)addr) - 4,                    \
+         get_current_thread_id(), ((char *)addr) - 4,                    \
          ((char *)addr) - 8, ((char *)addr) - 12)
 #elif KMP_ARCH_VE
 // On VE the NOP instruction is 8 byte long. In addition, the compiler inserts
 // a ??? instruction for non-void runtime functions which is ? bytes long.
 #define print_possible_return_addresses(addr)                                  \
   printf("%" PRIu64 ": current_address=%p or %p\n",                            \
-         ompt_get_thread_data()->value, ((char *)addr) - 8,                    \
+         get_current_thread_id(), ((char *)addr) - 8,                    \
          ((char *)addr) - 8)
 #elif KMP_ARCH_S390X
 // On s390x the NOP instruction is 2 bytes long. For non-void runtime
@@ -247,7 +283,7 @@ ompt_label_##id:
 //                addr:
 #define print_possible_return_addresses(addr)                                  \
   printf("%" PRIu64 ": current_address=%p or %p or %p\n",                      \
-         ompt_get_thread_data()->value, ((char *)addr) - 2,                    \
+         get_current_thread_id(), ((char *)addr) - 2,                    \
          ((char *)addr) - 8, ((char *)addr) - 12)
 #else
 #error Unsupported target architecture, cannot determine address offset!
@@ -273,7 +309,7 @@ ompt_label_##id:
 #define print_fuzzy_address_blocks(addr)                                       \
   printf("%" PRIu64 ": fuzzy_address=0x%" PRIx64 " or 0x%" PRIx64              \
          " or 0x%" PRIx64 " or 0x%" PRIx64 " (%p)\n",                          \
-         ompt_get_thread_data()->value,                                        \
+         get_current_thread_id(),                                              \
          ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES - 1,                   \
          ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES,                       \
          ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 1,                   \
@@ -303,43 +339,43 @@ on_ompt_callback_mutex_acquire(
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_wait_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
+             get_current_thread_id(), wait_id, hint, impl, codeptr_ra);
       break;
     case ompt_mutex_test_lock:
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_wait_test_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
+             get_current_thread_id(), wait_id, hint, impl, codeptr_ra);
       break;
     case ompt_mutex_nest_lock:
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_wait_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
+             get_current_thread_id(), wait_id, hint, impl, codeptr_ra);
       break;
     case ompt_mutex_test_nest_lock:
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_wait_test_nest_lock: wait_id=%" PRIu64
              ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
+             get_current_thread_id(), wait_id, hint, impl, codeptr_ra);
       break;
     case ompt_mutex_critical:
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_wait_critical: wait_id=%" PRIu64 ", hint=%" PRIu32
              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
+             get_current_thread_id(), wait_id, hint, impl, codeptr_ra);
       break;
     case ompt_mutex_atomic:
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_wait_atomic: wait_id=%" PRIu64 ", hint=%" PRIu32
              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
+             get_current_thread_id(), wait_id, hint, impl, codeptr_ra);
       break;
     case ompt_mutex_ordered:
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_wait_ordered: wait_id=%" PRIu64 ", hint=%" PRIu32
              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
+             get_current_thread_id(), wait_id, hint, impl, codeptr_ra);
       break;
     default:
       break;
@@ -357,43 +393,43 @@ on_ompt_callback_mutex_acquired(
     case ompt_mutex_lock:
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_acquired_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, codeptr_ra);
+             get_current_thread_id(), wait_id, codeptr_ra);
       break;
     case ompt_mutex_test_lock:
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_acquired_test_lock: wait_id=%" PRIu64
              ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, codeptr_ra);
+             get_current_thread_id(), wait_id, codeptr_ra);
       break;
     case ompt_mutex_nest_lock:
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_acquired_nest_lock_first: wait_id=%" PRIu64
              ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, codeptr_ra);
+             get_current_thread_id(), wait_id, codeptr_ra);
       break;
     case ompt_mutex_test_nest_lock:
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_acquired_test_nest_lock_first: wait_id=%" PRIu64
              ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, codeptr_ra);
+             get_current_thread_id(), wait_id, codeptr_ra);
       break;
     case ompt_mutex_critical:
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_acquired_critical: wait_id=%" PRIu64
              ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, codeptr_ra);
+             get_current_thread_id(), wait_id, codeptr_ra);
       break;
     case ompt_mutex_atomic:
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_acquired_atomic: wait_id=%" PRIu64
              ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, codeptr_ra);
+             get_current_thread_id(), wait_id, codeptr_ra);
       break;
     case ompt_mutex_ordered:
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_acquired_ordered: wait_id=%" PRIu64
              ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, codeptr_ra);
+             get_current_thread_id(), wait_id, codeptr_ra);
       break;
     default:
       break;
@@ -411,31 +447,31 @@ on_ompt_callback_mutex_released(
     case ompt_mutex_lock:
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_release_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, codeptr_ra);
+             get_current_thread_id(), wait_id, codeptr_ra);
       break;
     case ompt_mutex_nest_lock:
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_release_nest_lock_last: wait_id=%" PRIu64
              ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, codeptr_ra);
+             get_current_thread_id(), wait_id, codeptr_ra);
       break;
     case ompt_mutex_critical:
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_release_critical: wait_id=%" PRIu64
              ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, codeptr_ra);
+             get_current_thread_id(), wait_id, codeptr_ra);
       break;
     case ompt_mutex_atomic:
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_release_atomic: wait_id=%" PRIu64
              ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, codeptr_ra);
+             get_current_thread_id(), wait_id, codeptr_ra);
       break;
     case ompt_mutex_ordered:
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_release_ordered: wait_id=%" PRIu64
              ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, codeptr_ra);
+             get_current_thread_id(), wait_id, codeptr_ra);
       break;
     default:
       break;
@@ -454,13 +490,13 @@ on_ompt_callback_nest_lock(
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_acquired_nest_lock_next: wait_id=%" PRIu64
              ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, codeptr_ra);
+             get_current_thread_id(), wait_id, codeptr_ra);
       break;
     case ompt_scope_end:
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_release_nest_lock_prev: wait_id=%" PRIu64
              ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, codeptr_ra);
+             get_current_thread_id(), wait_id, codeptr_ra);
       break;
     case ompt_scope_beginend:
       printf("ompt_scope_beginend should never be passed to %s\n", __func__);
@@ -491,7 +527,7 @@ on_ompt_callback_sync_region(
           printf("%" PRIu64 ":" _TOOL_PREFIX
                  " ompt_event_barrier_begin: parallel_id=%" PRIu64
                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
-                 ompt_get_thread_data()->value, parallel_data->value,
+                 get_current_thread_id(), parallel_data->value,
                  task_data->value, codeptr_ra);
           print_ids(0);
           break;
@@ -499,14 +535,14 @@ on_ompt_callback_sync_region(
           printf("%" PRIu64 ":" _TOOL_PREFIX
                  " ompt_event_taskwait_begin: parallel_id=%" PRIu64
                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
-                 ompt_get_thread_data()->value, parallel_data->value,
+                 get_current_thread_id(), parallel_data->value,
                  task_data->value, codeptr_ra);
           break;
         case ompt_sync_region_taskgroup:
           printf("%" PRIu64 ":" _TOOL_PREFIX
                  " ompt_event_taskgroup_begin: parallel_id=%" PRIu64
                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
-                 ompt_get_thread_data()->value, parallel_data->value,
+                 get_current_thread_id(), parallel_data->value,
                  task_data->value, codeptr_ra);
           break;
         case ompt_sync_region_reduction:
@@ -529,7 +565,7 @@ on_ompt_callback_sync_region(
           printf("%" PRIu64 ":" _TOOL_PREFIX
                  " ompt_event_barrier_end: parallel_id=%" PRIu64
                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
-                 ompt_get_thread_data()->value,
+                 get_current_thread_id(),
                  (parallel_data) ? parallel_data->value : 0, task_data->value,
                  codeptr_ra);
           break;
@@ -537,7 +573,7 @@ on_ompt_callback_sync_region(
           printf("%" PRIu64 ":" _TOOL_PREFIX
                  " ompt_event_taskwait_end: parallel_id=%" PRIu64
                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
-                 ompt_get_thread_data()->value,
+                 get_current_thread_id(),
                  (parallel_data) ? parallel_data->value : 0, task_data->value,
                  codeptr_ra);
           break;
@@ -545,7 +581,7 @@ on_ompt_callback_sync_region(
           printf("%" PRIu64 ":" _TOOL_PREFIX
                  " ompt_event_taskgroup_end: parallel_id=%" PRIu64
                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
-                 ompt_get_thread_data()->value,
+                 get_current_thread_id(),
                  (parallel_data) ? parallel_data->value : 0, task_data->value,
                  codeptr_ra);
           break;
@@ -585,21 +621,21 @@ on_ompt_callback_sync_region_wait(
           printf("%" PRIu64 ":" _TOOL_PREFIX
                  " ompt_event_wait_barrier_begin: parallel_id=%" PRIu64
                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
-                 ompt_get_thread_data()->value, parallel_data->value,
+                 get_current_thread_id(), parallel_data->value,
                  task_data->value, codeptr_ra);
           break;
         case ompt_sync_region_taskwait:
           printf("%" PRIu64 ":" _TOOL_PREFIX
                  " ompt_event_wait_taskwait_begin: parallel_id=%" PRIu64
                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
-                 ompt_get_thread_data()->value, parallel_data->value,
+                 get_current_thread_id(), parallel_data->value,
                  task_data->value, codeptr_ra);
           break;
         case ompt_sync_region_taskgroup:
           printf("%" PRIu64 ":" _TOOL_PREFIX
                  " ompt_event_wait_taskgroup_begin: parallel_id=%" PRIu64
                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
-                 ompt_get_thread_data()->value, parallel_data->value,
+                 get_current_thread_id(), parallel_data->value,
                  task_data->value, codeptr_ra);
           break;
         case ompt_sync_region_reduction:
@@ -622,7 +658,7 @@ on_ompt_callback_sync_region_wait(
           printf("%" PRIu64 ":" _TOOL_PREFIX
                  " ompt_event_wait_barrier_end: parallel_id=%" PRIu64
                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
-                 ompt_get_thread_data()->value,
+                 get_current_thread_id(),
                  (parallel_data) ? parallel_data->value : 0, task_data->value,
                  codeptr_ra);
           break;
@@ -630,7 +666,7 @@ on_ompt_callback_sync_region_wait(
           printf("%" PRIu64 ":" _TOOL_PREFIX
                  " ompt_event_wait_taskwait_end: parallel_id=%" PRIu64
                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
-                 ompt_get_thread_data()->value,
+                 get_current_thread_id(),
                  (parallel_data) ? parallel_data->value : 0, task_data->value,
                  codeptr_ra);
           break;
@@ -638,7 +674,7 @@ on_ompt_callback_sync_region_wait(
           printf("%" PRIu64 ":" _TOOL_PREFIX
                  " ompt_event_wait_taskgroup_end: parallel_id=%" PRIu64
                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
-                 ompt_get_thread_data()->value,
+                 get_current_thread_id(),
                  (parallel_data) ? parallel_data->value : 0, task_data->value,
                  codeptr_ra);
           break;
@@ -665,7 +701,7 @@ static void on_ompt_callback_reduction(ompt_sync_region_t kind,
     printf("%" PRIu64 ":" _TOOL_PREFIX
            " ompt_event_reduction_begin: parallel_id=%" PRIu64
            ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
-           ompt_get_thread_data()->value,
+           get_current_thread_id(),
            (parallel_data) ? parallel_data->value : 0, task_data->value,
            codeptr_ra);
     break;
@@ -673,7 +709,7 @@ static void on_ompt_callback_reduction(ompt_sync_region_t kind,
     printf("%" PRIu64 ":" _TOOL_PREFIX
            " ompt_event_reduction_end: parallel_id=%" PRIu64
            ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
-           ompt_get_thread_data()->value,
+           get_current_thread_id(),
            (parallel_data) ? parallel_data->value : 0, task_data->value,
            codeptr_ra);
     break;
@@ -689,7 +725,7 @@ on_ompt_callback_flush(
     const void *codeptr_ra)
 {
   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_flush: codeptr_ra=%p\n",
-         thread_data->value, codeptr_ra);
+         get_thread_id(thread_data), codeptr_ra);
 }
 
 static void
@@ -718,7 +754,7 @@ on_ompt_callback_cancel(
 
   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_cancel: task_data=%" PRIu64
          ", flags=%s|%s=%" PRIu32 ", codeptr_ra=%p\n",
-         ompt_get_thread_data()->value, task_data->value, first_flag_value,
+         get_current_thread_id(), task_data->value, first_flag_value,
          second_flag_value, flags, codeptr_ra);
 }
 
@@ -737,6 +773,7 @@ on_ompt_callback_implicit_task(
       if(task_data->ptr)
         printf("%s\n", "0: task_data initially not null");
       task_data->value = ompt_get_unique_id();
+      get_current_thread()->taskid = task_data->value;
 
       //there is no parallel_begin callback for implicit parallel region
       //thus it is initialized in initial task
@@ -753,25 +790,26 @@ on_ompt_callback_implicit_task(
                " ompt_event_initial_task_begin: parallel_id=%" PRIu64
                ", task_id=%" PRIu64 ", actual_parallelism=%" PRIu32
                ", index=%" PRIu32 ", flags=%" PRIu32 "\n",
-               ompt_get_thread_data()->value, parallel_data->value,
+               get_current_thread_id(), parallel_data->value,
                task_data->value, team_size, thread_num, flags);
       } else {
         printf("%" PRIu64 ":" _TOOL_PREFIX
                " ompt_event_implicit_task_begin: parallel_id=%" PRIu64
                ", task_id=%" PRIu64 ", team_size=%" PRIu32
                ", thread_num=%" PRIu32 "\n",
-               ompt_get_thread_data()->value, parallel_data->value,
+               get_current_thread_id(), parallel_data->value,
                task_data->value, team_size, thread_num);
       }
 
       break;
     case ompt_scope_end:
+      get_current_thread()->taskid = 0;
       if(flags & ompt_task_initial){
         printf("%" PRIu64 ":" _TOOL_PREFIX
                " ompt_event_initial_task_end: parallel_id=%" PRIu64
                ", task_id=%" PRIu64 ", actual_parallelism=%" PRIu32
                ", index=%" PRIu32 "\n",
-               ompt_get_thread_data()->value,
+               get_current_thread_id(),
                (parallel_data) ? parallel_data->value : 0, task_data->value,
                team_size, thread_num);
       } else {
@@ -779,7 +817,7 @@ on_ompt_callback_implicit_task(
                " ompt_event_implicit_task_end: parallel_id=%" PRIu64
                ", task_id=%" PRIu64 ", team_size=%" PRIu32
                ", thread_num=%" PRIu32 "\n",
-               ompt_get_thread_data()->value,
+               get_current_thread_id(),
                (parallel_data) ? parallel_data->value : 0, task_data->value,
                team_size, thread_num);
       }
@@ -804,13 +842,13 @@ on_ompt_callback_lock_init(
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_init_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
+             get_current_thread_id(), wait_id, hint, impl, codeptr_ra);
       break;
     case ompt_mutex_nest_lock:
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_init_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
+             get_current_thread_id(), wait_id, hint, impl, codeptr_ra);
       break;
     default:
       break;
@@ -828,13 +866,13 @@ on_ompt_callback_lock_destroy(
     case ompt_mutex_lock:
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_destroy_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, codeptr_ra);
+             get_current_thread_id(), wait_id, codeptr_ra);
       break;
     case ompt_mutex_nest_lock:
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_destroy_nest_lock: wait_id=%" PRIu64
              ", codeptr_ra=%p \n",
-             ompt_get_thread_data()->value, wait_id, codeptr_ra);
+             get_current_thread_id(), wait_id, codeptr_ra);
       break;
     default:
       break;
@@ -866,7 +904,7 @@ on_ompt_callback_work(
                  " ompt_event_loop_begin: parallel_id=%" PRIu64
                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
                  "\n",
-                 ompt_get_thread_data()->value, parallel_data->value,
+                 get_current_thread_id(), parallel_data->value,
                  task_data->value, codeptr_ra, count);
           break;
         case ompt_work_sections:
@@ -874,7 +912,7 @@ on_ompt_callback_work(
                  " ompt_event_sections_begin: parallel_id=%" PRIu64
                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
                  "\n",
-                 ompt_get_thread_data()->value, parallel_data->value,
+                 get_current_thread_id(), parallel_data->value,
                  task_data->value, codeptr_ra, count);
           break;
         case ompt_work_single_executor:
@@ -882,14 +920,14 @@ on_ompt_callback_work(
                  " ompt_event_single_in_block_begin: parallel_id=%" PRIu64
                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
                  "\n",
-                 ompt_get_thread_data()->value, parallel_data->value,
+                 get_current_thread_id(), parallel_data->value,
                  task_data->value, codeptr_ra, count);
           break;
         case ompt_work_single_other:
           printf("%" PRIu64 ":" _TOOL_PREFIX
                  " ompt_event_single_others_begin: parallel_id=%" PRIu64
                  ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
-                 ompt_get_thread_data()->value, parallel_data->value,
+                 get_current_thread_id(), parallel_data->value,
                  task_data->value, codeptr_ra, count);
           break;
         case ompt_work_workshare:
@@ -900,7 +938,7 @@ on_ompt_callback_work(
                  " ompt_event_distribute_begin: parallel_id=%" PRIu64
                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
                  "\n",
-                 ompt_get_thread_data()->value, parallel_data->value,
+                 get_current_thread_id(), parallel_data->value,
                  task_data->value, codeptr_ra, count);
           break;
         case ompt_work_taskloop:
@@ -909,7 +947,7 @@ on_ompt_callback_work(
                  " ompt_event_taskloop_begin: parallel_id=%" PRIu64
                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
                  "\n",
-                 ompt_get_thread_data()->value, parallel_data->value,
+                 get_current_thread_id(), parallel_data->value,
                  task_data->value, codeptr_ra, count);
           break;
         case ompt_work_scope:
@@ -917,7 +955,7 @@ on_ompt_callback_work(
                  " ompt_event_scope_begin: parallel_id=%" PRIu64
                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
                  "\n",
-                 ompt_get_thread_data()->value, parallel_data->value,
+                 get_current_thread_id(), parallel_data->value,
                  task_data->value, codeptr_ra, count);
           break;
       }
@@ -933,28 +971,28 @@ on_ompt_callback_work(
           printf("%" PRIu64 ":" _TOOL_PREFIX
                  " ompt_event_loop_end: parallel_id=%" PRIu64
                  ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
-                 ompt_get_thread_data()->value, parallel_data->value,
+                 get_current_thread_id(), parallel_data->value,
                  task_data->value, codeptr_ra, count);
           break;
         case ompt_work_sections:
           printf("%" PRIu64 ":" _TOOL_PREFIX
                  " ompt_event_sections_end: parallel_id=%" PRIu64
                  ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
-                 ompt_get_thread_data()->value, parallel_data->value,
+                 get_current_thread_id(), parallel_data->value,
                  task_data->value, codeptr_ra, count);
           break;
         case ompt_work_single_executor:
           printf("%" PRIu64 ":" _TOOL_PREFIX
                  " ompt_event_single_in_block_end: parallel_id=%" PRIu64
                  ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
-                 ompt_get_thread_data()->value, parallel_data->value,
+                 get_current_thread_id(), parallel_data->value,
                  task_data->value, codeptr_ra, count);
           break;
         case ompt_work_single_other:
           printf("%" PRIu64 ":" _TOOL_PREFIX
                  " ompt_event_single_others_end: parallel_id=%" PRIu64
                  ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
-                 ompt_get_thread_data()->value, parallel_data->value,
+                 get_current_thread_id(), parallel_data->value,
                  task_data->value, codeptr_ra, count);
           break;
         case ompt_work_workshare:
@@ -965,7 +1003,7 @@ on_ompt_callback_work(
                  " ompt_event_distribute_end: parallel_id=%" PRIu64
                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
                  "\n",
-                 ompt_get_thread_data()->value, parallel_data->value,
+                 get_current_thread_id(), parallel_data->value,
                  task_data->value, codeptr_ra, count);
           break;
         case ompt_work_taskloop:
@@ -974,7 +1012,7 @@ on_ompt_callback_work(
                  " ompt_event_taskloop_end: parallel_id=%" PRIu64
                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
                  "\n",
-                 ompt_get_thread_data()->value, parallel_data->value,
+                 get_current_thread_id(), parallel_data->value,
                  task_data->value, codeptr_ra, count);
           break;
         case ompt_work_scope:
@@ -982,7 +1020,7 @@ on_ompt_callback_work(
                  " ompt_event_scope_end: parallel_id=%" PRIu64
                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
                  "\n",
-                 ompt_get_thread_data()->value, parallel_data->value,
+                 get_current_thread_id(), parallel_data->value,
                  task_data->value, codeptr_ra, count);
           break;
       }
@@ -1024,7 +1062,7 @@ static void on_ompt_callback_dispatch(
   printf("%" PRIu64 ":" _TOOL_PREFIX
          " %s: parallel_id=%" PRIu64 ", task_id=%" PRIu64
          ", codeptr_ra=%p, chunk_start=%" PRIu64 ", chunk_iterations=%" PRIu64
-         "\n", ompt_get_thread_data()->value, event_name, parallel_data->value,
+         "\n", get_current_thread_id(), event_name, parallel_data->value,
          task_data->value, codeptr_ra,
          dispatch_chunk ? dispatch_chunk->start : 0,
          dispatch_chunk ? dispatch_chunk->iterations : 0);
@@ -1040,14 +1078,14 @@ static void on_ompt_callback_masked(ompt_scope_endpoint_t endpoint,
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_masked_begin: parallel_id=%" PRIu64
              ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
-             ompt_get_thread_data()->value, parallel_data->value,
+             get_current_thread_id(), parallel_data->value,
              task_data->value, codeptr_ra);
       break;
     case ompt_scope_end:
       printf("%" PRIu64 ":" _TOOL_PREFIX
              " ompt_event_masked_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64
              ", codeptr_ra=%p\n",
-             ompt_get_thread_data()->value, parallel_data->value,
+             get_current_thread_id(), parallel_data->value,
              task_data->value, codeptr_ra);
       break;
     case ompt_scope_beginend:
@@ -1071,7 +1109,7 @@ static void on_ompt_callback_parallel_begin(
          ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
          "parallel_id=%" PRIu64 ", requested_%s=%" PRIu32
          ", codeptr_ra=%p, invoker=%d\n",
-         ompt_get_thread_data()->value, event, encountering_task_data->value,
+         get_current_thread_id(), event, encountering_task_data->value,
          encountering_task_frame->exit_frame.ptr,
          encountering_task_frame->enter_frame.ptr, parallel_data->value, size,
          requested_team_size, codeptr_ra, invoker);
@@ -1084,7 +1122,7 @@ static void on_ompt_callback_parallel_end(ompt_data_t *parallel_data,
   const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams";
   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_%s_end: parallel_id=%" PRIu64
          ", task_id=%" PRIu64 ", invoker=%d, codeptr_ra=%p\n",
-         ompt_get_thread_data()->value, event, parallel_data->value,
+         get_current_thread_id(), event, parallel_data->value,
          encountering_task_data->value, invoker, codeptr_ra);
 }
 
@@ -1110,7 +1148,7 @@ on_ompt_callback_task_create(
       ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
       "new_task_id=%" PRIu64
       ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n",
-      ompt_get_thread_data()->value,
+      get_current_thread_id(),
       encountering_task_data ? encountering_task_data->value : 0,
       encountering_task_frame ? encountering_task_frame->exit_frame.ptr : NULL,
       encountering_task_frame ? encountering_task_frame->enter_frame.ptr : NULL,
@@ -1124,17 +1162,29 @@ on_ompt_callback_task_schedule(
     ompt_task_status_t prior_task_status,
     ompt_data_t *second_task_data)
 {
+  if (second_task_data == NULL)
+  {
+    // occurrence of a task-fulfill event.
+  }
+  else
+  {
+    thread_t * thread = get_current_thread();
+    if (thread->taskid != first_task_data->value)
+      printf("0: wrong task_schedule\n");
+    thread->taskid = second_task_data->value;
+  }
+
   printf("%" PRIu64 ":" _TOOL_PREFIX
          " ompt_event_task_schedule: first_task_id=%" PRIu64
          ", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n",
-         ompt_get_thread_data()->value, first_task_data->value,
+         get_current_thread_id(), first_task_data->value,
          (second_task_data ? second_task_data->value : -1),
          ompt_task_status_t_values[prior_task_status], prior_task_status);
   if (prior_task_status == ompt_task_complete ||
       prior_task_status == ompt_task_late_fulfill ||
       prior_task_status == ompt_taskwait_complete) {
     printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_task_end: task_id=%" PRIu64
-           "\n", ompt_get_thread_data()->value, first_task_data->value);
+           "\n", get_current_thread_id(), first_task_data->value);
   }
 }
 
@@ -1162,7 +1212,7 @@ on_ompt_callback_dependences(
     progress[-2] = 0;
   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_dependences: task_id=%" PRIu64
          ", deps=[%s], ndeps=%d\n",
-         ompt_get_thread_data()->value, task_data->value, buffer, ndeps);
+         get_current_thread_id(), task_data->value, buffer, ndeps);
 }
 
 static void
@@ -1173,7 +1223,7 @@ on_ompt_callback_task_dependence(
   printf("%" PRIu64 ":" _TOOL_PREFIX
          " ompt_event_task_dependence_pair: first_task_id=%" PRIu64
          ", second_task_id=%" PRIu64 "\n",
-         ompt_get_thread_data()->value, first_task_data->value,
+         get_current_thread_id(), first_task_data->value,
          second_task_data->value);
 }
 
@@ -1184,11 +1234,15 @@ on_ompt_callback_thread_begin(
 {
   if(thread_data->ptr)
     printf("%s\n", "0: thread_data initially not null");
-  thread_data->value = ompt_get_unique_id();
+
+  thread_t * thread = (thread_t *) malloc(sizeof(thread_t));
+  thread->tid = ompt_get_unique_id();
+  thread->taskid = (uint64_t) -1;
+  thread_data->ptr = thread;
   printf("%" PRIu64 ":" _TOOL_PREFIX
          " ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n",
-         ompt_get_thread_data()->value, ompt_thread_t_values[thread_type],
-         thread_type, thread_data->value);
+         get_current_thread_id(), ompt_thread_t_values[thread_type],
+         thread_type, thread->tid);
 }
 
 static void
@@ -1197,7 +1251,7 @@ on_ompt_callback_thread_end(
 {
   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_thread_end: thread_id=%" PRIu64
          "\n",
-         ompt_get_thread_data()->value, thread_data->value);
+         get_current_thread_id(), get_thread_id(thread_data));
 }
 
 static int
@@ -1213,7 +1267,7 @@ on_ompt_callback_control_tool(
          ", modifier=%" PRIu64
          ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, "
          "current_task_frame.reenter=%p \n",
-         ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra,
+         get_current_thread_id(), command, modifier, arg, codeptr_ra,
          omptTaskFrame->exit_frame.ptr, omptTaskFrame->enter_frame.ptr);
 
   // the following would interfere with expected output for OMPT tests, so skip
@@ -1224,7 +1278,7 @@ on_ompt_callback_control_tool(
   while (ompt_get_task_info(task_level, NULL, (ompt_data_t **)&task_data, NULL,
                             NULL, NULL)) {
     printf("%" PRIu64 ":" _TOOL_PREFIX " task level %d: task_id=%" PRIu64 "\n",
-           ompt_get_thread_data()->value, task_level, task_data->value);
+           get_current_thread_id(), task_level, task_data->value);
     task_level++;
   }
 
@@ -1235,7 +1289,7 @@ on_ompt_callback_control_tool(
                                 NULL)) {
     printf("%" PRIu64 ":" _TOOL_PREFIX " parallel level %d: parallel_id=%" PRIu64
            "\n",
-           ompt_get_thread_data()->value, parallel_level, parallel_data->value);
+           get_current_thread_id(), parallel_level, parallel_data->value);
     parallel_level++;
   }
 #endif
@@ -1247,7 +1301,7 @@ static void on_ompt_callback_error(ompt_severity_t severity,
                                    const void *codeptr_ra) {
   printf("%" PRIu64 ": ompt_event_runtime_error: severity=%" PRIu32
          ", message=%s, length=%" PRIu64 ", codeptr_ra=%p\n",
-         ompt_get_thread_data()->value, severity, message, (uint64_t)length,
+         get_current_thread_id(), severity, message, (uint64_t)length,
          codeptr_ra);
 }
 
diff --git a/openmp/runtime/test/ompt/cancel/cancel_taskgroup.c b/openmp/runtime/test/ompt/cancel/cancel_taskgroup.c
index 23e5de7ccfd639..7a8d6d3ea87340 100644
--- a/openmp/runtime/test/ompt/cancel/cancel_taskgroup.c
+++ b/openmp/runtime/test/ompt/cancel/cancel_taskgroup.c
@@ -56,6 +56,17 @@ int main()
       }
     }
     #pragma omp barrier
+
+    # pragma omp taskgroup
+    {
+      # pragma omp task if(0)
+      {
+          # pragma omp cancel taskgroup
+      }
+
+      # pragma omp task // B
+      {}
+    }
   }
 
   // Check if libomp supports the callbacks for this test.
@@ -66,6 +77,7 @@ int main()
   // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin'
 
   // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+  // CHECK-NOT: 0: wrong task_schedule
   // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_masked_begin:
   // CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]],
   // CHECK-SAME: task_id=[[PARENT_TASK_ID:[0-9]+]],
diff --git a/openmp/runtime/test/ompt/misc/api_calls_misc.c b/openmp/runtime/test/ompt/misc/api_calls_misc.c
index 884421e62b1e9d..2e993c8b061bf0 100644
--- a/openmp/runtime/test/ompt/misc/api_calls_misc.c
+++ b/openmp/runtime/test/ompt/misc/api_calls_misc.c
@@ -10,12 +10,12 @@ int main() {
     ompt_callback_t callback;
     ompt_get_callback(ompt_callback_thread_begin, &callback);
     printf("%" PRIu64 ": &on_ompt_callback_thread_begin=%p\n",
-           ompt_get_thread_data()->value, &on_ompt_callback_thread_begin);
+           get_current_thread_id(), &on_ompt_callback_thread_begin);
     printf("%" PRIu64 ": ompt_get_callback() result=%p\n",
-           ompt_get_thread_data()->value, callback);
+           get_current_thread_id(), callback);
 
     // ompt_get_state()
-    printf("%" PRIu64 ": ompt_get_state()=%d\n", ompt_get_thread_data()->value,
+    printf("%" PRIu64 ": ompt_get_state()=%d\n", get_current_thread_id(),
            ompt_get_state(NULL));
 
     // ompt_enumerate_states()
@@ -26,12 +26,12 @@ int main() {
       steps++;
       if (!state_name)
         printf("%" PRIu64 ": state_name is NULL\n",
-               ompt_get_thread_data()->value);
+               get_current_thread_id());
     }
     if (steps >= 1000) {
       // enumeration did not end after 1000 steps
       printf("%" PRIu64 ": states enumeration did not end\n",
-             ompt_get_thread_data()->value);
+             get_current_thread_id());
     }
 
     // ompt_enumerate_mutex_impls()
@@ -43,12 +43,12 @@ int main() {
       steps++;
       if (!impl_name)
         printf("%" PRIu64 ": impl_name is NULL\n",
-               ompt_get_thread_data()->value);
+               get_current_thread_id());
     }
     if (steps >= 1000) {
       // enumeration did not end after 1000 steps
       printf("%" PRIu64 ": mutex_impls enumeration did not end\n",
-             ompt_get_thread_data()->value);
+             get_current_thread_id());
     }
   }
 
diff --git a/openmp/runtime/test/ompt/misc/api_calls_places.c b/openmp/runtime/test/ompt/misc/api_calls_places.c
index 3385c9c62abf80..5ae7a223c268b8 100644
--- a/openmp/runtime/test/ompt/misc/api_calls_places.c
+++ b/openmp/runtime/test/ompt/misc/api_calls_places.c
@@ -7,7 +7,7 @@
 #undef __USE_GNU
 
 void print_list(char *function_name, int size, int list[]) {
-  printf("%" PRIu64 ": %s(0)=(%d", ompt_get_thread_data()->value, function_name,
+  printf("%" PRIu64 ": %s(0)=(%d", get_current_thread_id(), function_name,
          list[0]);
   int i;
   for (i = 1; i < size; i++) {
@@ -20,9 +20,9 @@ int main() {
 #pragma omp parallel num_threads(1)
   {
     printf("%" PRIu64 ": omp_get_num_places()=%d\n",
-           ompt_get_thread_data()->value, omp_get_num_places());
+           get_current_thread_id(), omp_get_num_places());
     printf("%" PRIu64 ": ompt_get_num_places()=%d\n",
-           ompt_get_thread_data()->value, ompt_get_num_places());
+           get_current_thread_id(), ompt_get_num_places());
 
     int omp_ids_size = omp_get_place_num_procs(0);
     int omp_ids[omp_ids_size];
@@ -34,9 +34,9 @@ int main() {
     print_list("ompt_get_place_proc_ids", ompt_ids_size, ompt_ids);
 
     printf("%" PRIu64 ": omp_get_place_num()=%d\n",
-           ompt_get_thread_data()->value, omp_get_place_num());
+           get_current_thread_id(), omp_get_place_num());
     printf("%" PRIu64 ": ompt_get_place_num()=%d\n",
-           ompt_get_thread_data()->value, ompt_get_place_num());
+           get_current_thread_id(), ompt_get_place_num());
 
     int omp_nums_size = omp_get_partition_num_places();
     int omp_nums[omp_nums_size];
@@ -47,15 +47,15 @@ int main() {
     ompt_get_partition_place_nums(ompt_nums_size, ompt_nums);
     print_list("ompt_get_partition_place_nums", ompt_nums_size, ompt_nums);
 
-    printf("%" PRIu64 ": sched_getcpu()=%d\n", ompt_get_thread_data()->value,
+    printf("%" PRIu64 ": sched_getcpu()=%d\n", get_current_thread_id(),
            sched_getcpu());
     printf("%" PRIu64 ": ompt_get_proc_id()=%d\n",
-           ompt_get_thread_data()->value, ompt_get_proc_id());
+           get_current_thread_id(), ompt_get_proc_id());
 
     printf("%" PRIu64 ": omp_get_num_procs()=%d\n",
-           ompt_get_thread_data()->value, omp_get_num_procs());
+           get_current_thread_id(), omp_get_num_procs());
     printf("%" PRIu64 ": ompt_get_num_procs()=%d\n",
-           ompt_get_thread_data()->value, ompt_get_num_procs());
+           get_current_thread_id(), ompt_get_num_procs());
   }
 
   // Check if libomp supports the callbacks for this test.
diff --git a/openmp/runtime/test/ompt/parallel/nested_lwt_thread_num.c b/openmp/runtime/test/ompt/parallel/nested_lwt_thread_num.c
index 63d90052637135..9334d928c9a167 100644
--- a/openmp/runtime/test/ompt/parallel/nested_lwt_thread_num.c
+++ b/openmp/runtime/test/ompt/parallel/nested_lwt_thread_num.c
@@ -21,7 +21,7 @@ void print_task_info_at(int ancestor_level, int id)
     printf("%" PRIu64 ": ancestor_level=%d id=%d task_type=%s=%d "
                       "parallel_id=%" PRIu64 " task_id=%" PRIu64
                       " thread_num=%d\n",
-        ompt_get_thread_data()->value, ancestor_level, id, buffer,
+        get_current_thread_id(), ancestor_level, id, buffer,
         task_type, parallel_data->value, task_data->value, thread_num);
   }
 };
diff --git a/openmp/runtime/test/ompt/parallel/repeated_calls.c b/openmp/runtime/test/ompt/parallel/repeated_calls.c
index 84f85019c1ea24..5fd76bf7cec9b8 100644
--- a/openmp/runtime/test/ompt/parallel/repeated_calls.c
+++ b/openmp/runtime/test/ompt/parallel/repeated_calls.c
@@ -2,6 +2,7 @@
 // REQUIRES: ompt
 
 #define USE_PRIVATE_TOOL 1
+#define get_current_thread_id() ompt_get_thread_data()->value
 #include "callback.h"
 
 __attribute__((noinline))
diff --git a/openmp/runtime/test/ompt/synchronization/lock.c b/openmp/runtime/test/ompt/synchronization/lock.c
index 6a4f88a11cc274..3adaa30bd9a34c 100644
--- a/openmp/runtime/test/ompt/synchronization/lock.c
+++ b/openmp/runtime/test/ompt/synchronization/lock.c
@@ -10,7 +10,7 @@ int main()
     print_ids(0);
 
   omp_lock_t lock;
-  printf("%" PRIu64 ": &lock: %" PRIu64 "\n", ompt_get_thread_data()->value, (ompt_wait_id_t)(uintptr_t) &lock);
+  printf("%" PRIu64 ": &lock: %" PRIu64 "\n", get_current_thread_id(), (ompt_wait_id_t)(uintptr_t) &lock);
   omp_init_lock(&lock);
   print_fuzzy_address(1);
   omp_set_lock(&lock);
diff --git a/openmp/runtime/test/ompt/synchronization/masked.c b/openmp/runtime/test/ompt/synchronization/masked.c
index 3eb45d9592d802..8d825f3530dcb2 100644
--- a/openmp/runtime/test/ompt/synchronization/masked.c
+++ b/openmp/runtime/test/ompt/synchronization/masked.c
@@ -18,7 +18,7 @@ int main() {
     print_current_address(2);
   }
 
-  printf("%" PRIu64 ": x=%d\n", ompt_get_thread_data()->value, x);
+  printf("%" PRIu64 ": x=%d\n", get_current_thread_id(), x);
 
   return 0;
 }
diff --git a/openmp/runtime/test/ompt/synchronization/master.c b/openmp/runtime/test/ompt/synchronization/master.c
index 34ecc522b85ffd..c0701718085538 100644
--- a/openmp/runtime/test/ompt/synchronization/master.c
+++ b/openmp/runtime/test/ompt/synchronization/master.c
@@ -4,6 +4,7 @@
 // XFAIL: gcc
 
 #define USE_PRIVATE_TOOL 1
+#define get_current_thread_id() ompt_get_thread_data()->value
 #include "callback.h"
 #include <omp.h>
 
diff --git a/openmp/runtime/test/ompt/synchronization/nest_lock.c b/openmp/runtime/test/ompt/synchronization/nest_lock.c
index 39681ae40069d8..81a49fe804c4eb 100644
--- a/openmp/runtime/test/ompt/synchronization/nest_lock.c
+++ b/openmp/runtime/test/ompt/synchronization/nest_lock.c
@@ -11,7 +11,7 @@ int main()
 
   omp_nest_lock_t nest_lock;
   printf("%" PRIu64 ": &nest_lock: %" PRIu64 "\n",
-         ompt_get_thread_data()->value, (ompt_wait_id_t)(uintptr_t)&nest_lock);
+         get_current_thread_id(), (ompt_wait_id_t)(uintptr_t)&nest_lock);
   omp_init_nest_lock(&nest_lock);
   print_fuzzy_address(1);
   omp_set_nest_lock(&nest_lock);
diff --git a/openmp/runtime/test/ompt/tasks/dependences.c b/openmp/runtime/test/ompt/tasks/dependences.c
index 16732e3fe1f0b1..bd91491900264f 100644
--- a/openmp/runtime/test/ompt/tasks/dependences.c
+++ b/openmp/runtime/test/ompt/tasks/dependences.c
@@ -15,7 +15,7 @@ int main() {
 #pragma omp master
     {
       print_ids(0);
-      printf("%" PRIu64 ": address of x: %p\n", ompt_get_thread_data()->value,
+      printf("%" PRIu64 ": address of x: %p\n", get_current_thread_id(),
              &x);
 #pragma omp task depend(out : x) shared(condition)
       {
diff --git a/openmp/runtime/test/ompt/tasks/dependences_mutexinoutset.c b/openmp/runtime/test/ompt/tasks/dependences_mutexinoutset.c
index 50385b69d44b11..9ba58d5aebc125 100644
--- a/openmp/runtime/test/ompt/tasks/dependences_mutexinoutset.c
+++ b/openmp/runtime/test/ompt/tasks/dependences_mutexinoutset.c
@@ -22,7 +22,7 @@ int main() {
 #pragma omp master
     {
       print_ids(0);
-      printf("%" PRIu64 ": address of x: %p\n", ompt_get_thread_data()->value,
+      printf("%" PRIu64 ": address of x: %p\n", get_current_thread_id(),
              &x);
 #pragma omp task depend(out : x)
       {
diff --git a/openmp/runtime/test/ompt/tasks/explicit_task_thread_num.c b/openmp/runtime/test/ompt/tasks/explicit_task_thread_num.c
index 6f3c106476c001..850ce6c6f5dcfa 100644
--- a/openmp/runtime/test/ompt/tasks/explicit_task_thread_num.c
+++ b/openmp/runtime/test/ompt/tasks/explicit_task_thread_num.c
@@ -21,7 +21,7 @@ void print_task_info_at(int ancestor_level, int id)
     printf("%" PRIu64 ": ancestor_level=%d id=%d task_type=%s=%d "
                       "parallel_id=%" PRIu64 " task_id=%" PRIu64
                       " thread_num=%d\n",
-        ompt_get_thread_data()->value, ancestor_level, id, buffer,
+        get_current_thread_id(), ancestor_level, id, buffer,
         task_type, parallel_data->value, task_data->value, thread_num);
   }
 };
diff --git a/openmp/runtime/test/ompt/tasks/task_if0-depend.c b/openmp/runtime/test/ompt/tasks/task_if0-depend.c
index f7fb6ef1069da5..012d00a90e3c79 100644
--- a/openmp/runtime/test/ompt/tasks/task_if0-depend.c
+++ b/openmp/runtime/test/ompt/tasks/task_if0-depend.c
@@ -11,7 +11,7 @@ int main() {
 #pragma omp master
     {
       print_ids(0);
-      printf("%" PRIu64 ": address of x: %p\n", ompt_get_thread_data()->value,
+      printf("%" PRIu64 ": address of x: %p\n", get_current_thread_id(),
              &x);
 #pragma omp task depend(out : x)
       { x++; }
diff --git a/openmp/runtime/test/ompt/tasks/task_memory.c b/openmp/runtime/test/ompt/tasks/task_memory.c
index 0be157437d57fb..387da2ba09b106 100644
--- a/openmp/runtime/test/ompt/tasks/task_memory.c
+++ b/openmp/runtime/test/ompt/tasks/task_memory.c
@@ -2,6 +2,7 @@
 // REQUIRES: ompt
 // UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
 #define USE_PRIVATE_TOOL 1
+#define get_current_thread_id() ompt_get_thread_data()->value
 #include "callback.h"
 #include <omp.h>
 
diff --git a/openmp/runtime/test/ompt/tasks/task_types.c b/openmp/runtime/test/ompt/tasks/task_types.c
index 15226354d1f034..01bc5cfa8b143d 100644
--- a/openmp/runtime/test/ompt/tasks/task_types.c
+++ b/openmp/runtime/test/ompt/tasks/task_types.c
@@ -41,19 +41,19 @@ int main() {
 #pragma omp task untied
     {
       // Output of thread_id is needed to know on which thread task is executed
-      printf("%" PRIu64 ": explicit_untied\n", ompt_get_thread_data()->value);
+      printf("%" PRIu64 ": explicit_untied\n", get_current_thread_id());
       print_ids(0);
       print_frame(1);
       x++;
 #pragma omp taskyield
       printf("%" PRIu64 ": explicit_untied(2)\n",
-             ompt_get_thread_data()->value);
+             get_current_thread_id());
       print_ids(0);
       print_frame(1);
       x++;
 #pragma omp taskwait
       printf("%" PRIu64 ": explicit_untied(3)\n",
-             ompt_get_thread_data()->value);
+             get_current_thread_id());
       print_ids(0);
       print_frame(1);
       x++;
diff --git a/openmp/runtime/test/ompt/tasks/task_types_serialized.c b/openmp/runtime/test/ompt/tasks/task_types_serialized.c
index 3fe163e1c9689b..9c0496fdab3d52 100644
--- a/openmp/runtime/test/ompt/tasks/task_types_serialized.c
+++ b/openmp/runtime/test/ompt/tasks/task_types_serialized.c
@@ -13,7 +13,7 @@ void print_task_type(int id)
     char buffer[2048];
     ompt_get_task_info(0, &task_type, NULL, NULL, NULL, NULL);
     format_task_type(task_type, buffer);
-    printf("%" PRIu64 ": id=%d task_type=%s=%d\n", ompt_get_thread_data()->value, id, buffer, task_type);
+    printf("%" PRIu64 ": id=%d task_type=%s=%d\n", get_current_thread_id(), id, buffer, task_type);
   }
 };
 
diff --git a/openmp/runtime/test/ompt/tasks/taskwait-depend.c b/openmp/runtime/test/ompt/tasks/taskwait-depend.c
index 74dad2a8c6db2f..73260c2419583f 100644
--- a/openmp/runtime/test/ompt/tasks/taskwait-depend.c
+++ b/openmp/runtime/test/ompt/tasks/taskwait-depend.c
@@ -20,7 +20,7 @@ int main() {
 #pragma omp master
     {
       print_ids(0);
-      printf("%" PRIu64 ": address of x: %p\n", ompt_get_thread_data()->value,
+      printf("%" PRIu64 ": address of x: %p\n", get_current_thread_id(),
              &x);
 #pragma omp task depend(out : x)
       { x++; }
diff --git a/openmp/runtime/test/ompt/tasks/taskyield.c b/openmp/runtime/test/ompt/tasks/taskyield.c
index 2dd0fa1ae49a93..baa081f989b7db 100644
--- a/openmp/runtime/test/ompt/tasks/taskyield.c
+++ b/openmp/runtime/test/ompt/tasks/taskyield.c
@@ -24,9 +24,9 @@ int main()
         {
           x++;
         }
-        printf("%" PRIu64 ": before yield\n", ompt_get_thread_data()->value);
+        printf("%" PRIu64 ": before yield\n", get_current_thread_id());
         #pragma omp taskyield
-        printf("%" PRIu64 ": after yield\n", ompt_get_thread_data()->value);
+        printf("%" PRIu64 ": after yield\n", get_current_thread_id());
         OMPT_SIGNAL(condition);
     }
   }
diff --git a/openmp/runtime/test/ompt/worksharing/sections.c b/openmp/runtime/test/ompt/worksharing/sections.c
index bafb74312ff45d..466644e67cb8f1 100644
--- a/openmp/runtime/test/ompt/worksharing/sections.c
+++ b/openmp/runtime/test/ompt/worksharing/sections.c
@@ -13,11 +13,11 @@ int main()
   {
     #pragma omp section
     {
-      printf("%lu: section 1\n", ompt_get_thread_data()->value);
+      printf("%lu: section 1\n", get_current_thread_id());
     }
     #pragma omp section
     {
-      printf("%lu: section 2\n", ompt_get_thread_data()->value);
+      printf("%lu: section 2\n", get_current_thread_id());
     }
   }
 
diff --git a/openmp/runtime/test/ompt/worksharing/sections_dispatch.c b/openmp/runtime/test/ompt/worksharing/sections_dispatch.c
index bcf0bd0cde6739..d027825fa91150 100644
--- a/openmp/runtime/test/ompt/worksharing/sections_dispatch.c
+++ b/openmp/runtime/test/ompt/worksharing/sections_dispatch.c
@@ -13,11 +13,11 @@ int main()
   {
     #pragma omp section
     {
-      printf("%lu: section 1\n", ompt_get_thread_data()->value);
+      printf("%lu: section 1\n", get_current_thread_id());
     }
     #pragma omp section
     {
-      printf("%lu: section 2\n", ompt_get_thread_data()->value);
+      printf("%lu: section 2\n", get_current_thread_id());
     }
   }
 
diff --git a/openmp/runtime/test/ompt/worksharing/single.c b/openmp/runtime/test/ompt/worksharing/single.c
index 6b24f2d9398fec..1af0acca75d50c 100644
--- a/openmp/runtime/test/ompt/worksharing/single.c
+++ b/openmp/runtime/test/ompt/worksharing/single.c
@@ -13,7 +13,7 @@ int main()
   {
     #pragma omp single
     {
-      printf("%" PRIu64 ": in single\n", ompt_get_thread_data()->value);
+      printf("%" PRIu64 ": in single\n", get_current_thread_id());
       x++;
     }
   }



More information about the Openmp-commits mailing list