[Openmp-commits] [openmp] [OpenMP] Use new OMPT state and sync kinds for barrier events (PR #95602)

Hansang Bae via Openmp-commits openmp-commits at lists.llvm.org
Fri Jun 14 14:08:53 PDT 2024


https://github.com/hansangbae created https://github.com/llvm/llvm-project/pull/95602

This change makes the runtime use new OMPT state and sync kinds introduced in OpenMP 5.1 in place of the deprecated implicit state and sync kinds. Events from implicit barriers use different enumerators for workshare, parallel, and teams.

>From beb2a1d759b673054b7dfa6a9bcb5990aa13e34c Mon Sep 17 00:00:00 2001
From: Hansang Bae <hansang.bae at intel.com>
Date: Fri, 12 Apr 2024 12:00:31 -0500
Subject: [PATCH] [OpenMP] Use new OMPT state and sync kinds for barrier events

This change makes the runtime use new OMPT state and sync kinds
introduced in OpenMP 5.1 in place of the deprecated implicit
state and sync kinds. Events from implicit barriers use different
enumerators for workshare, parallel, and teams.
---
 openmp/runtime/src/include/omp-tools.h.var    |  2 +
 openmp/runtime/src/kmp_barrier.cpp            | 47 ++++++++---
 openmp/runtime/src/kmp_runtime.cpp            | 17 ++--
 openmp/runtime/src/kmp_wait_release.h         | 16 ++--
 openmp/runtime/src/ompt-specific.cpp          | 15 ++--
 .../barrier/implicit_task_data.c              | 82 ++++++++++++-------
 6 files changed, 117 insertions(+), 62 deletions(-)

diff --git a/openmp/runtime/src/include/omp-tools.h.var b/openmp/runtime/src/include/omp-tools.h.var
index 1d1a0f7771e95..fa55a4760f159 100644
--- a/openmp/runtime/src/include/omp-tools.h.var
+++ b/openmp/runtime/src/include/omp-tools.h.var
@@ -78,6 +78,8 @@
                                             /* implicit barrier at the end of worksharing */    \
     macro (ompt_state_wait_barrier_implicit, 0x013)  /* implicit barrier */                      \
     macro (ompt_state_wait_barrier_explicit, 0x014)  /* explicit barrier */                      \
+    macro (ompt_state_wait_barrier_implementation, 0x015) /* implementation barrier */           \
+    macro (ompt_state_wait_barrier_teams, 0x016)          /* teams barrier */                    \
                                                                                                 \
     /* task wait states (32..63) */                                                             \
     macro (ompt_state_wait_taskwait, 0x020)  /* waiting at a taskwait */                         \
diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp
index b381694c0953e..658cee594e48d 100644
--- a/openmp/runtime/src/kmp_barrier.cpp
+++ b/openmp/runtime/src/kmp_barrier.cpp
@@ -1805,7 +1805,25 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split,
     // It is OK to report the barrier state after the barrier begin callback.
     // According to the OMPT specification, a compliant implementation may
     // even delay reporting this state until the barrier begins to wait.
-    this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
+    auto *ompt_thr_info = &this_thr->th.ompt_thread_info;
+    switch (barrier_kind) {
+    case ompt_sync_region_barrier_explicit:
+      ompt_thr_info->state = ompt_state_wait_barrier_explicit;
+      break;
+    case ompt_sync_region_barrier_implicit_workshare:
+      ompt_thr_info->state = ompt_state_wait_barrier_implicit_workshare;
+      break;
+    case ompt_sync_region_barrier_implicit_parallel:
+      ompt_thr_info->state = ompt_state_wait_barrier_implicit_parallel;
+      break;
+    case ompt_sync_region_barrier_teams:
+      ompt_thr_info->state = ompt_state_wait_barrier_teams;
+      break;
+    case ompt_sync_region_barrier_implementation:
+      [[fallthrough]];
+    default:
+      ompt_thr_info->state = ompt_state_wait_barrier_implementation;
+    }
   }
 #endif
 
@@ -2213,20 +2231,24 @@ void __kmp_join_barrier(int gtid) {
       codeptr = team->t.ompt_team_info.master_return_address;
     my_task_data = OMPT_CUR_TASK_DATA(this_thr);
     my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
+    ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
+    ompt_state_t ompt_state = ompt_state_wait_barrier_implicit_parallel;
+    if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league) {
+      sync_kind = ompt_sync_region_barrier_teams;
+      ompt_state = ompt_state_wait_barrier_teams;
+    }
     if (ompt_enabled.ompt_callback_sync_region) {
       ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
-          ompt_sync_region_barrier_implicit, ompt_scope_begin, my_parallel_data,
-          my_task_data, codeptr);
+          sync_kind, ompt_scope_begin, my_parallel_data, my_task_data, codeptr);
     }
     if (ompt_enabled.ompt_callback_sync_region_wait) {
       ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
-          ompt_sync_region_barrier_implicit, ompt_scope_begin, my_parallel_data,
-          my_task_data, codeptr);
+          sync_kind, ompt_scope_begin, my_parallel_data, my_task_data, codeptr);
     }
     if (!KMP_MASTER_TID(ds_tid))
       this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr);
 #endif
-    this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier_implicit;
+    this_thr->th.ompt_thread_info.state = ompt_state;
   }
 #endif
 
@@ -2488,8 +2510,10 @@ void __kmp_fork_barrier(int gtid, int tid) {
   }
 
 #if OMPT_SUPPORT
+  ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
   if (ompt_enabled.enabled &&
-      this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
+      (ompt_state == ompt_state_wait_barrier_teams ||
+       ompt_state == ompt_state_wait_barrier_implicit_parallel)) {
     int ds_tid = this_thr->th.th_info.ds.ds_tid;
     ompt_data_t *task_data = (team)
                                  ? OMPT_CUR_TASK_DATA(this_thr)
@@ -2501,15 +2525,16 @@ void __kmp_fork_barrier(int gtid, int tid) {
         (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
          ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
       codeptr = team ? team->t.ompt_team_info.master_return_address : NULL;
+    ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
+    if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
+      sync_kind = ompt_sync_region_barrier_teams;
     if (ompt_enabled.ompt_callback_sync_region_wait) {
       ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
-          ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
-          codeptr);
+          sync_kind, ompt_scope_end, NULL, task_data, codeptr);
     }
     if (ompt_enabled.ompt_callback_sync_region) {
       ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
-          ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
-          codeptr);
+          sync_kind, ompt_scope_end, NULL, task_data, codeptr);
     }
 #endif
     if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 74b44b5d4d9cc..0bf1dab2ebf14 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -7666,7 +7666,7 @@ int __kmp_invoke_task_func(int gtid) {
   );
 #if OMPT_SUPPORT
   *exit_frame_p = NULL;
-  this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
+  this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_team;
 #endif
 
 #if KMP_STATS_ENABLED
@@ -7764,7 +7764,7 @@ int __kmp_invoke_teams_master(int gtid) {
 #endif
   __kmp_teams_master(gtid);
 #if OMPT_SUPPORT
-  this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
+  this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_league;
 #endif
   __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
   return 1;
@@ -8015,8 +8015,10 @@ void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
 
   __kmp_join_barrier(gtid); /* wait for everyone */
 #if OMPT_SUPPORT
+  ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
   if (ompt_enabled.enabled &&
-      this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
+      (ompt_state == ompt_state_wait_barrier_teams ||
+       ompt_state == ompt_state_wait_barrier_implicit_parallel)) {
     int ds_tid = this_thr->th.th_info.ds.ds_tid;
     ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
     this_thr->th.ompt_thread_info.state = ompt_state_overhead;
@@ -8027,15 +8029,16 @@ void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
          ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
       codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
 
+    ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
+    if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
+      sync_kind = ompt_sync_region_barrier_teams;
     if (ompt_enabled.ompt_callback_sync_region_wait) {
       ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
-          ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
-          codeptr);
+          sync_kind, ompt_scope_end, NULL, task_data, codeptr);
     }
     if (ompt_enabled.ompt_callback_sync_region) {
       ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
-          ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
-          codeptr);
+          sync_kind, ompt_scope_end, NULL, task_data, codeptr);
     }
 #endif
     if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
diff --git a/openmp/runtime/src/kmp_wait_release.h b/openmp/runtime/src/kmp_wait_release.h
index 12d5d0677a90a..97db68943da70 100644
--- a/openmp/runtime/src/kmp_wait_release.h
+++ b/openmp/runtime/src/kmp_wait_release.h
@@ -323,19 +323,21 @@ static void __ompt_implicit_task_end(kmp_info_t *this_thr,
                                      ompt_state_t ompt_state,
                                      ompt_data_t *tId) {
   int ds_tid = this_thr->th.th_info.ds.ds_tid;
-  if (ompt_state == ompt_state_wait_barrier_implicit) {
+  if (ompt_state == ompt_state_wait_barrier_implicit_parallel ||
+      ompt_state == ompt_state_wait_barrier_teams) {
     this_thr->th.ompt_thread_info.state = ompt_state_overhead;
 #if OMPT_OPTIONAL
     void *codeptr = NULL;
+    ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
+    if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
+      sync_kind = ompt_sync_region_barrier_teams;
     if (ompt_enabled.ompt_callback_sync_region_wait) {
       ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
-          ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
-          codeptr);
+          sync_kind, ompt_scope_end, NULL, tId, codeptr);
     }
     if (ompt_enabled.ompt_callback_sync_region) {
       ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
-          ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
-          codeptr);
+          sync_kind, ompt_scope_end, NULL, tId, codeptr);
     }
 #endif
     if (!KMP_MASTER_TID(ds_tid)) {
@@ -455,7 +457,9 @@ final_spin=FALSE)
   ompt_data_t *tId;
   if (ompt_enabled.enabled) {
     ompt_entry_state = this_thr->th.ompt_thread_info.state;
-    if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
+    if (!final_spin ||
+        (ompt_entry_state != ompt_state_wait_barrier_implicit_parallel &&
+         ompt_entry_state != ompt_state_wait_barrier_teams) ||
         KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
       ompt_lw_taskteam_t *team = NULL;
       if (this_thr->th.th_team)
diff --git a/openmp/runtime/src/ompt-specific.cpp b/openmp/runtime/src/ompt-specific.cpp
index 16acbe052d12e..0737c0cdfb160 100644
--- a/openmp/runtime/src/ompt-specific.cpp
+++ b/openmp/runtime/src/ompt-specific.cpp
@@ -503,22 +503,23 @@ static uint64_t __ompt_get_unique_id_internal() {
 
 ompt_sync_region_t __ompt_get_barrier_kind(enum barrier_type bt,
                                            kmp_info_t *thr) {
-  if (bt == bs_forkjoin_barrier)
-    return ompt_sync_region_barrier_implicit;
+  if (bt == bs_forkjoin_barrier) {
+    if (thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
+      return ompt_sync_region_barrier_teams;
+    else
+      return ompt_sync_region_barrier_implicit_parallel;
+  }
 
-  if (bt != bs_plain_barrier)
+  if (bt != bs_plain_barrier || !thr->th.th_ident)
     return ompt_sync_region_barrier_implementation;
 
-  if (!thr->th.th_ident)
-    return ompt_sync_region_barrier;
-
   kmp_int32 flags = thr->th.th_ident->flags;
 
   if ((flags & KMP_IDENT_BARRIER_EXPL) != 0)
     return ompt_sync_region_barrier_explicit;
 
   if ((flags & KMP_IDENT_BARRIER_IMPL) != 0)
-    return ompt_sync_region_barrier_implicit;
+    return ompt_sync_region_barrier_implicit_workshare;
 
   return ompt_sync_region_barrier_implementation;
 }
diff --git a/openmp/runtime/test/ompt/synchronization/barrier/implicit_task_data.c b/openmp/runtime/test/ompt/synchronization/barrier/implicit_task_data.c
index 7ac3e9099c8ee..907c149b58834 100644
--- a/openmp/runtime/test/ompt/synchronization/barrier/implicit_task_data.c
+++ b/openmp/runtime/test/ompt/synchronization/barrier/implicit_task_data.c
@@ -76,20 +76,32 @@ on_ompt_callback_sync_region(
   ompt_data_t *task_data,
   const void *codeptr_ra)
 {
-  switch(endpoint)
-  {
-    case ompt_scope_begin:
-      task_data->value = ompt_get_unique_id();
-      if (kind == ompt_sync_region_barrier_implicit)
-        printf("%" PRIu64 ": ompt_event_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
-      break;
-    case ompt_scope_end:
-      if (kind == ompt_sync_region_barrier_implicit)
-        printf("%" PRIu64 ": ompt_event_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
-      break;
-    case ompt_scope_beginend:
-      printf("ompt_scope_beginend should never be passed to %s\n", __func__);
-      exit(-1);
+  const char *event_name = NULL;
+  if (endpoint == ompt_scope_begin) {
+    event_name = "ompt_event_barrier_begin";
+    task_data->value = ompt_get_unique_id();
+  } else if (endpoint == ompt_scope_end) {
+    event_name = "ompt_event_barrier_end";
+  } else {
+    printf("ompt_scope_beginend should never be passed to %s\n", __func__);
+    exit(-1);
+  }
+
+  switch (kind) {
+  case ompt_sync_region_barrier:
+  case ompt_sync_region_barrier_implicit:
+  case ompt_sync_region_barrier_explicit:
+  case ompt_sync_region_barrier_implicit_workshare:
+  case ompt_sync_region_barrier_implicit_parallel:
+  case ompt_sync_region_barrier_teams:
+  case ompt_sync_region_barrier_implementation:
+    printf("%" PRIu64 ": %s: parallel_id=%" PRIu64 ", task_id=%" PRIu64
+           ", codeptr_ra=%p\n", ompt_get_thread_data()->value, event_name,
+           parallel_data ? parallel_data->value : 0, task_data->value,
+           codeptr_ra);
+    break;
+  default:
+    ; // do nothing
   }
 }
 
@@ -101,23 +113,31 @@ on_ompt_callback_sync_region_wait(
   ompt_data_t *task_data,
   const void *codeptr_ra)
 {
-  switch(endpoint)
-  {
-    case ompt_scope_begin:
-      if (kind == ompt_sync_region_barrier_implicit)
-        printf("%" PRIu64
-               ": ompt_event_wait_barrier_begin: parallel_id=%" PRIu64
-               ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
-               ompt_get_thread_data()->value, parallel_data->value,
-               task_data->value, codeptr_ra);
-      break;
-    case ompt_scope_end:
-      if (kind == ompt_sync_region_barrier_implicit)
-        printf("%" PRIu64 ": ompt_event_wait_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
-      break;
-    case ompt_scope_beginend:
-      printf("ompt_scope_beginend should never be passed to %s\n", __func__);
-      exit(-1);
+  const char *event_name = NULL;
+  if (endpoint == ompt_scope_begin) {
+    event_name = "ompt_event_wait_barrier_begin";
+  } else if (endpoint == ompt_scope_end) {
+    event_name = "ompt_event_wait_barrier_end";
+  } else {
+    printf("ompt_scope_beginend should never be passed to %s\n", __func__);
+    exit(-1);
+  }
+
+  switch (kind) {
+  case ompt_sync_region_barrier:
+  case ompt_sync_region_barrier_implicit:
+  case ompt_sync_region_barrier_explicit:
+  case ompt_sync_region_barrier_implicit_workshare:
+  case ompt_sync_region_barrier_implicit_parallel:
+  case ompt_sync_region_barrier_teams:
+  case ompt_sync_region_barrier_implementation:
+    printf("%" PRIu64 ": %s: parallel_id=%" PRIu64 ", task_id=%" PRIu64
+           ", codeptr_ra=%p\n", ompt_get_thread_data()->value, event_name,
+           parallel_data ? parallel_data->value : 0, task_data->value,
+           codeptr_ra);
+    break;
+  default:
+    ; // do nothing
   }
 }
 



More information about the Openmp-commits mailing list