[Openmp-commits] [openmp] [OpenMP] Emit workshare events correctly for distribute-and-parallel loops (PR #133323)

Hansang Bae via Openmp-commits openmp-commits at lists.llvm.org
Fri Oct 24 09:11:28 PDT 2025


https://github.com/hansangbae updated https://github.com/llvm/llvm-project/pull/133323

>From 8464e03fd1fc95b48373f9fa7b299824455c6497 Mon Sep 17 00:00:00 2001
From: Hansang Bae <hansang.bae at intel.com>
Date: Tue, 18 Mar 2025 16:26:23 -0500
Subject: [PATCH] [OpenMP] Emit workshare events correctly for
 distribute-and-parallel loops

The runtime entry `__kmpc_dist_for_static_init` is supposed to be
invoked when there is a statically scheduled teams distribute
parallel loop. This change makes the entry emit correct OMPT workshare
events for the loop; it was only handling `distribute` part
incorrectly. A new entry `__kmpc_dist_for_static_fini` is also added
to support OMPT.
Current clang does not emit a call to `__kmpc_dist_for_static_init`,
but it does not hurt to have these entries ready.
---
 openmp/runtime/src/dllexports       |  1 +
 openmp/runtime/src/kmp.h            |  1 +
 openmp/runtime/src/kmp_csupport.cpp | 35 +++++++++++++++++++++++++++++
 openmp/runtime/src/kmp_sched.cpp    |  9 +++++++-
 4 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports
index 0667d53c35a18..fb55803de5fe5 100644
--- a/openmp/runtime/src/dllexports
+++ b/openmp/runtime/src/dllexports
@@ -351,6 +351,7 @@ kmpc_set_defaults                           224
         __kmpc_dist_for_static_init_4u      248
         __kmpc_dist_for_static_init_8       249
         __kmpc_dist_for_static_init_8u      250
+        __kmpc_dist_for_static_fini
         __kmpc_dist_dispatch_init_4         251
         __kmpc_dist_dispatch_init_4u        252
         __kmpc_dist_dispatch_init_8         253
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 856f14e5f057f..f2d1b9092c853 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -4238,6 +4238,7 @@ KMP_EXPORT void KMPC_FOR_STATIC_INIT(ident_t *loc, kmp_int32 global_tid,
                                      kmp_int chunk);
 
 KMP_EXPORT void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
+KMP_EXPORT void __kmpc_dist_for_static_fini(ident_t *loc, kmp_int32 gtid);
 
 KMP_EXPORT void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
                                    size_t cpy_size, void *cpy_data,
diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
index fdbf9ff45e354..1c62be87387dc 100644
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -2032,6 +2032,41 @@ void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
     __kmp_pop_workshare(global_tid, ct_pdo, loc);
 }
 
+/*!
+ at ingroup WORK_SHARING
+ at param loc Source location
+ at param global_tid Global thread id
+
+Mark the end of a statically scheduled distribute and parallel loop
+*/
+void __kmpc_dist_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
+  KMP_POP_PARTITIONED_TIMER();
+  KE_TRACE(10, ("__kmpc_dist_for_static_fini called T#%d\n", global_tid));
+
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.ompt_callback_work) {
+    // Workshare type is distribute and parallel loop.
+    // Emit ws-loop-end event for all threads.
+    // Emit distribute-end event for the primary threads.
+    ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
+    ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
+    int tid = __kmp_tid_from_gtid(global_tid);
+
+    ompt_callbacks.ompt_callback(ompt_callback_work)(
+        ompt_work_loop_static, ompt_scope_end, &(team_info->parallel_data),
+        &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
+
+    if (tid == 0)
+      ompt_callbacks.ompt_callback(ompt_callback_work)(
+          ompt_work_distribute, ompt_scope_end, &(team_info->parallel_data),
+          &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
+  }
+#endif
+
+  if (__kmp_env_consistency_check)
+    __kmp_pop_workshare(global_tid, ct_pdo, loc);
+}
+
 // User routines which take C-style arguments (call by value)
 // different from the Fortran equivalent routines
 
diff --git a/openmp/runtime/src/kmp_sched.cpp b/openmp/runtime/src/kmp_sched.cpp
index 2b1bb6f595f9a..d7ddcb6b15bd9 100644
--- a/openmp/runtime/src/kmp_sched.cpp
+++ b/openmp/runtime/src/kmp_sched.cpp
@@ -732,11 +732,18 @@ end:;
   KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
 #if OMPT_SUPPORT && OMPT_OPTIONAL
   if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
+    // Workshare type is distribute and parallel loop.
+    // Emit ws-loop-begin event for all threads.
+    // Emit distribute-begin event for the primary threads.
     ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
     ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
     if (ompt_enabled.ompt_callback_work) {
+      if (tid == 0)
+        ompt_callbacks.ompt_callback(ompt_callback_work)(
+            ompt_work_distribute, ompt_scope_begin, &(team_info->parallel_data),
+            &(task_info->task_data), 0, codeptr);
       ompt_callbacks.ompt_callback(ompt_callback_work)(
-          ompt_work_distribute, ompt_scope_begin, &(team_info->parallel_data),
+          ompt_work_loop_static, ompt_scope_begin, &(team_info->parallel_data),
           &(task_info->task_data), 0, codeptr);
     }
     if (ompt_enabled.ompt_callback_dispatch) {



More information about the Openmp-commits mailing list