[Openmp-commits] [openmp] 77ad061 - [OpenMP] Update OpenMP runtime to adopt taskgraph clause from 6.0 Specs (#130751)

via Openmp-commits openmp-commits at lists.llvm.org
Fri Mar 14 00:02:26 PDT 2025


Author: Josep Pinot
Date: 2025-03-14T08:02:23+01:00
New Revision: 77ad061923418ba0f4c8fd4a0710a5ace825bf8e

URL: https://github.com/llvm/llvm-project/commit/77ad061923418ba0f4c8fd4a0710a5ace825bf8e
DIFF: https://github.com/llvm/llvm-project/commit/77ad061923418ba0f4c8fd4a0710a5ace825bf8e.diff

LOG: [OpenMP] Update OpenMP runtime to adopt taskgraph clause from 6.0 Specs (#130751)

Updating OpenMP runtime taskgraph support(record/replay mechanism):

- Adds a `graph_reset` bit in `kmp_taskgraph_flags_t` to discard
existing TDG records.
- Switches from a strict index-based TDG ID/IDX to a more flexible
integer-based, which can be any integer (e.g. hashed).
- Adds helper functions like `__kmp_find_tdg`, `__kmp_alloc_tdg`, and
`__kmp_free_tdg` to manage TDGs by their IDs.

These changes pave the way for the integration of OpenMP taskgraph (spec
6.0). Taskgraphs are still recorded in an array with a lookup efficiency
reduced to O(n), where n ≤ `__kmp_max_tdgs`. This can be optimized by
moving the TDGs to a hashtable, making lookups more efficient. The
provided helper routines facilitate easier future optimizations.

Added: 
    openmp/runtime/test/tasking/omp_record_replay_random_id.cpp
    openmp/runtime/test/tasking/omp_record_replay_reset.cpp

Modified: 
    openmp/runtime/src/kmp.h
    openmp/runtime/src/kmp_global.cpp
    openmp/runtime/src/kmp_tasking.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 9b8c6102dbee2..856f14e5f057f 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -2606,7 +2606,9 @@ typedef struct {
 typedef struct kmp_taskgraph_flags { /*This needs to be exactly 32 bits */
   unsigned nowait : 1;
   unsigned re_record : 1;
-  unsigned reserved : 30;
+  unsigned graph_reset : 1; /* 1==discard taskgraph record, 0==use taskgraph
+                               record */
+  unsigned reserved : 29;
 } kmp_taskgraph_flags_t;
 
 /// Represents a TDG node
@@ -2650,7 +2652,7 @@ typedef struct kmp_tdg_info {
 extern int __kmp_tdg_dot;
 extern kmp_int32 __kmp_max_tdgs;
 extern kmp_tdg_info_t **__kmp_global_tdgs;
-extern kmp_int32 __kmp_curr_tdg_idx;
+extern kmp_tdg_info_t *__kmp_curr_tdg;
 extern kmp_int32 __kmp_successors_size;
 extern std::atomic<kmp_int32> __kmp_tdg_task_id;
 extern kmp_int32 __kmp_num_tdg;

diff  --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp
index 52e0fdbdfb1da..7b6bfff7c54ea 100644
--- a/openmp/runtime/src/kmp_global.cpp
+++ b/openmp/runtime/src/kmp_global.cpp
@@ -554,8 +554,7 @@ int *__kmp_nesting_nth_level;
 int __kmp_tdg_dot = 0;
 kmp_int32 __kmp_max_tdgs = 100;
 kmp_tdg_info_t **__kmp_global_tdgs = NULL;
-kmp_int32 __kmp_curr_tdg_idx =
-    0; // Id of the current TDG being recorded or executed
+kmp_tdg_info_t *__kmp_curr_tdg = NULL; // Current TDG being recorded or executed
 kmp_int32 __kmp_num_tdg = 0;
 kmp_int32 __kmp_successors_size = 10; // Initial succesor size list for
                                       // recording

diff  --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 563aa29f6265e..90004bfc8afe0 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1651,11 +1651,11 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
   }
 
 #if OMPX_TASKGRAPH
-  kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
+  kmp_tdg_info_t *tdg = __kmp_curr_tdg;
   if (tdg && __kmp_tdg_is_recording(tdg->tdg_status) &&
       (task_entry != (kmp_routine_entry_t)__kmp_taskloop_task)) {
     taskdata->is_taskgraph = 1;
-    taskdata->tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
+    taskdata->tdg = tdg;
     taskdata->td_task_id = KMP_GEN_TASK_ID();
     taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id);
   }
@@ -2577,14 +2577,11 @@ without help of the runtime library.
 */
 void *__kmpc_task_reduction_init(int gtid, int num, void *data) {
 #if OMPX_TASKGRAPH
-  kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
+  kmp_tdg_info_t *tdg = __kmp_curr_tdg;
   if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) {
-    kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
-    this_tdg->rec_taskred_data =
-        __kmp_allocate(sizeof(kmp_task_red_input_t) * num);
-    this_tdg->rec_num_taskred = num;
-    KMP_MEMCPY(this_tdg->rec_taskred_data, data,
-               sizeof(kmp_task_red_input_t) * num);
+    tdg->rec_taskred_data = __kmp_allocate(sizeof(kmp_task_red_input_t) * num);
+    tdg->rec_num_taskred = num;
+    KMP_MEMCPY(tdg->rec_taskred_data, data, sizeof(kmp_task_red_input_t) * num);
   }
 #endif
   return __kmp_task_reduction_init(gtid, num, (kmp_task_red_input_t *)data);
@@ -2604,14 +2601,11 @@ has two parameters, pointer to object to be initialized and pointer to omp_orig
 */
 void *__kmpc_taskred_init(int gtid, int num, void *data) {
 #if OMPX_TASKGRAPH
-  kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
+  kmp_tdg_info_t *tdg = __kmp_curr_tdg;
   if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) {
-    kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
-    this_tdg->rec_taskred_data =
-        __kmp_allocate(sizeof(kmp_task_red_input_t) * num);
-    this_tdg->rec_num_taskred = num;
-    KMP_MEMCPY(this_tdg->rec_taskred_data, data,
-               sizeof(kmp_task_red_input_t) * num);
+    tdg->rec_taskred_data = __kmp_allocate(sizeof(kmp_task_red_input_t) * num);
+    tdg->rec_num_taskred = num;
+    KMP_MEMCPY(tdg->rec_taskred_data, data, sizeof(kmp_task_red_input_t) * num);
   }
 #endif
   return __kmp_task_reduction_init(gtid, num, (kmp_taskred_input_t *)data);
@@ -2662,8 +2656,7 @@ void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) {
 
 #if OMPX_TASKGRAPH
   if ((thread->th.th_current_task->is_taskgraph) &&
-      (!__kmp_tdg_is_recording(
-          __kmp_global_tdgs[__kmp_curr_tdg_idx]->tdg_status))) {
+      (!__kmp_tdg_is_recording(__kmp_curr_tdg->tdg_status))) {
     tg = thread->th.th_current_task->td_taskgroup;
     KMP_ASSERT(tg != NULL);
     KMP_ASSERT(tg->reduce_data != NULL);
@@ -5452,7 +5445,6 @@ bool __kmpc_omp_has_task_team(kmp_int32 gtid) {
 
 #if OMPX_TASKGRAPH
 // __kmp_find_tdg: identify a TDG through its ID
-// gtid:   Global Thread ID
 // tdg_id: ID of the TDG
 // returns: If a TDG corresponding to this ID is found and not
 // its initial state, return the pointer to it, otherwise nullptr
@@ -5465,12 +5457,71 @@ static kmp_tdg_info_t *__kmp_find_tdg(kmp_int32 tdg_id) {
     __kmp_global_tdgs = (kmp_tdg_info_t **)__kmp_allocate(
         sizeof(kmp_tdg_info_t *) * __kmp_max_tdgs);
 
-  if ((__kmp_global_tdgs[tdg_id]) &&
-      (__kmp_global_tdgs[tdg_id]->tdg_status != KMP_TDG_NONE))
-    res = __kmp_global_tdgs[tdg_id];
+  for (kmp_int32 tdg_idx = 0; tdg_idx < __kmp_max_tdgs; tdg_idx++) {
+    if (__kmp_global_tdgs[tdg_idx] &&
+        __kmp_global_tdgs[tdg_idx]->tdg_id == tdg_id) {
+      if (__kmp_global_tdgs[tdg_idx]->tdg_status != KMP_TDG_NONE)
+        res = __kmp_global_tdgs[tdg_idx];
+      break;
+    }
+  }
   return res;
 }
 
+// __kmp_alloc_tdg: Allocates a TDG if it doesn't already exist.
+// tdg_id: ID of the TDG.
+// returns: A pointer to the TDG if it already exists. Otherwise,
+// allocates a new TDG if the maximum limit has not been reached.
+// Returns nullptr if no TDG can be allocated.
+static kmp_tdg_info_t *__kmp_alloc_tdg(kmp_int32 tdg_id) {
+  kmp_tdg_info_t *res = nullptr;
+  if ((res = __kmp_find_tdg(tdg_id)))
+    return res;
+
+  if (__kmp_num_tdg > __kmp_max_tdgs)
+    return res;
+
+  for (kmp_int32 tdg_idx = 0; tdg_idx < __kmp_max_tdgs; tdg_idx++) {
+    if (!__kmp_global_tdgs[tdg_idx]) {
+      kmp_tdg_info_t *tdg =
+          (kmp_tdg_info_t *)__kmp_allocate(sizeof(kmp_tdg_info_t));
+      __kmp_global_tdgs[tdg_idx] = tdg;
+      __kmp_curr_tdg = tdg;
+      res = __kmp_global_tdgs[tdg_idx];
+      break;
+    }
+  }
+  return res;
+}
+
+// __kmp_free_tdg: Frees a TDG if it exists.
+// tdg_id:  ID of the TDG to be freed.
+// returns: true if a TDG with the given ID was found and successfully freed,
+// false if no such TDG exists.
+static bool __kmp_free_tdg(kmp_int32 tdg_id) {
+  kmp_tdg_info_t *tdg = nullptr;
+  if (__kmp_global_tdgs == NULL)
+    return false;
+
+  for (kmp_int32 tdg_idx = 0; tdg_idx < __kmp_max_tdgs; tdg_idx++) {
+    if (__kmp_global_tdgs[tdg_idx] &&
+        __kmp_global_tdgs[tdg_idx]->tdg_id == tdg_id) {
+      tdg = __kmp_global_tdgs[tdg_idx];
+      for (kmp_int map_idx = 0; map_idx < tdg->map_size; map_idx++) {
+        __kmp_free(tdg->record_map[map_idx].successors);
+      }
+      __kmp_free(tdg->record_map);
+      if (tdg->root_tasks)
+        __kmp_free(tdg->root_tasks);
+
+      __kmp_free(tdg);
+      __kmp_global_tdgs[tdg_idx] = NULL;
+      return true;
+    }
+  }
+  return false;
+}
+
 // __kmp_print_tdg_dot: prints the TDG to a dot file
 // tdg:    ID of the TDG
 // gtid:   Global Thread ID
@@ -5505,7 +5556,7 @@ void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg, kmp_int32 gtid) {
   KA_TRACE(10, ("__kmp_print_tdg_dot(exit): T#%d tdg_id=%d \n", gtid, tdg_id));
 }
 
-// __kmp_start_record: launch the execution of a previous
+// __kmp_exec_tdg: launch the execution of a previous
 // recorded TDG
 // gtid:   Global Thread ID
 // tdg:    ID of the TDG
@@ -5565,9 +5616,7 @@ void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
 static inline void __kmp_start_record(kmp_int32 gtid,
                                       kmp_taskgraph_flags_t *flags,
                                       kmp_int32 tdg_id) {
-  kmp_tdg_info_t *tdg =
-      (kmp_tdg_info_t *)__kmp_allocate(sizeof(kmp_tdg_info_t));
-  __kmp_global_tdgs[__kmp_curr_tdg_idx] = tdg;
+  kmp_tdg_info_t *tdg = __kmp_alloc_tdg(tdg_id);
   // Initializing the TDG structure
   tdg->tdg_id = tdg_id;
   tdg->map_size = INIT_MAPSIZE;
@@ -5592,7 +5641,7 @@ static inline void __kmp_start_record(kmp_int32 gtid,
     KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter, 0);
   }
 
-  __kmp_global_tdgs[__kmp_curr_tdg_idx]->record_map = this_record_map;
+  tdg->record_map = this_record_map;
 }
 
 // __kmpc_start_record_task: Wrapper around __kmp_start_record to mark
@@ -5600,34 +5649,34 @@ static inline void __kmp_start_record(kmp_int32 gtid,
 // loc_ref:     Location of TDG, not used yet
 // gtid:        Global Thread ID of the encountering thread
 // input_flags: Flags associated with the TDG
-// tdg_id:      ID of the TDG to record, for now, incremental integer
+// tdg_id:      ID of the TDG to record
 // returns:     1 if we record, otherwise, 0
 kmp_int32 __kmpc_start_record_task(ident_t *loc_ref, kmp_int32 gtid,
                                    kmp_int32 input_flags, kmp_int32 tdg_id) {
-
   kmp_int32 res;
   kmp_taskgraph_flags_t *flags = (kmp_taskgraph_flags_t *)&input_flags;
-  KA_TRACE(10,
-           ("__kmpc_start_record_task(enter): T#%d loc=%p flags=%d tdg_id=%d\n",
-            gtid, loc_ref, input_flags, tdg_id));
+  KA_TRACE(10, ("__kmpc_start_record_task(enter): T#%d loc=%p flags=%d "
+                "tdg_id=%d\n",
+                gtid, loc_ref, input_flags, tdg_id));
 
   if (__kmp_max_tdgs == 0) {
-    KA_TRACE(
-        10,
-        ("__kmpc_start_record_task(abandon): T#%d loc=%p flags=%d tdg_id = %d, "
-         "__kmp_max_tdgs = 0\n",
-         gtid, loc_ref, input_flags, tdg_id));
+    KA_TRACE(10, ("__kmpc_start_record_task(abandon): T#%d loc=%p flags=%d "
+                  "tdg_id = %d, __kmp_max_tdgs = 0\n",
+                  gtid, loc_ref, input_flags, tdg_id));
     return 1;
   }
 
   __kmpc_taskgroup(loc_ref, gtid);
+  if (flags->graph_reset) {
+    __kmp_free_tdg(tdg_id);
+    __kmp_num_tdg--;
+  }
   if (kmp_tdg_info_t *tdg = __kmp_find_tdg(tdg_id)) {
     // TODO: use re_record flag
     __kmp_exec_tdg(gtid, tdg);
     res = 0;
   } else {
-    __kmp_curr_tdg_idx = tdg_id;
-    KMP_DEBUG_ASSERT(__kmp_curr_tdg_idx < __kmp_max_tdgs);
+    KMP_DEBUG_ASSERT(__kmp_num_tdg < __kmp_max_tdgs);
     __kmp_start_record(gtid, flags, tdg_id);
     __kmp_num_tdg++;
     res = 1;
@@ -5690,10 +5739,11 @@ void __kmpc_end_record_task(ident_t *loc_ref, kmp_int32 gtid,
                             kmp_int32 input_flags, kmp_int32 tdg_id) {
   kmp_tdg_info_t *tdg = __kmp_find_tdg(tdg_id);
 
+  KMP_DEBUG_ASSERT(tdg != NULL);
   KA_TRACE(10, ("__kmpc_end_record_task(enter): T#%d loc=%p finishes recording"
                 " tdg=%d with flags=%d\n",
                 gtid, loc_ref, tdg_id, input_flags));
-  if (__kmp_max_tdgs) {
+  if (__kmp_max_tdgs && tdg) {
     // TODO: use input_flags->nowait
     __kmpc_end_taskgroup(loc_ref, gtid);
     if (__kmp_tdg_is_recording(tdg->tdg_status))

diff  --git a/openmp/runtime/test/tasking/omp_record_replay_random_id.cpp b/openmp/runtime/test/tasking/omp_record_replay_random_id.cpp
new file mode 100644
index 0000000000000..58e90da4d782a
--- /dev/null
+++ b/openmp/runtime/test/tasking/omp_record_replay_random_id.cpp
@@ -0,0 +1,47 @@
+// REQUIRES: ompx_taskgraph
+// RUN: %libomp-cxx-compile-and-run
+#include <iostream>
+#include <cassert>
+#define NT 10
+
+// Compiler-generated code (emulation)
+typedef struct ident {
+  void *dummy;
+} ident_t;
+
+#ifdef __cplusplus
+extern "C" {
+int __kmpc_global_thread_num(ident_t *);
+int __kmpc_start_record_task(ident_t *, int, int, int);
+void __kmpc_end_record_task(ident_t *, int, int, int);
+}
+#endif
+
+static void func(int *num_exec) { (*num_exec)++; }
+
+int main() {
+  int num_exec = 0;
+  int num_tasks = 0;
+  int hash_id = 135343854;
+#pragma omp parallel
+#pragma omp single
+  for (int iter = 0; iter < NT; ++iter) {
+    int gtid = __kmpc_global_thread_num(nullptr);
+    int res = __kmpc_start_record_task(nullptr, gtid, /* kmp_tdg_flags */ 0,
+                                       /* tdg_id */ hash_id);
+    if (res) {
+      num_tasks++;
+#pragma omp task
+      func(&num_exec);
+    }
+    __kmpc_end_record_task(nullptr, gtid, /* kmp_tdg_flags */ 0,
+                           /* tdg_id */ hash_id);
+  }
+
+  assert(num_tasks == 1);
+  assert(num_exec == NT);
+
+  std::cout << "Passed" << std::endl;
+  return 0;
+}
+// CHECK: Passed

diff  --git a/openmp/runtime/test/tasking/omp_record_replay_reset.cpp b/openmp/runtime/test/tasking/omp_record_replay_reset.cpp
new file mode 100644
index 0000000000000..123a9fa5a72f0
--- /dev/null
+++ b/openmp/runtime/test/tasking/omp_record_replay_reset.cpp
@@ -0,0 +1,47 @@
+// REQUIRES: ompx_taskgraph
+// RUN: %libomp-cxx-compile-and-run
+#include <iostream>
+#include <cassert>
+#define NT 10
+
+// Compiler-generated code (emulation)
+typedef struct ident {
+  void *dummy;
+} ident_t;
+
+#ifdef __cplusplus
+extern "C" {
+int __kmpc_global_thread_num(ident_t *);
+int __kmpc_start_record_task(ident_t *, int, int, int);
+void __kmpc_end_record_task(ident_t *, int, int, int);
+}
+#endif
+
+static void func(int *num_exec) { (*num_exec)++; }
+
+int main() {
+  int num_exec = 0;
+  int num_tasks = 0;
+  int flags = 1 << 2;
+#pragma omp parallel
+#pragma omp single
+  for (int iter = 0; iter < NT; ++iter) {
+    int gtid = __kmpc_global_thread_num(nullptr);
+    int res = __kmpc_start_record_task(nullptr, gtid, /* kmp_tdg_flags */ flags,
+                                       /* tdg_id */ 0);
+    if (res) {
+      num_tasks++;
+#pragma omp task
+      func(&num_exec);
+    }
+    __kmpc_end_record_task(nullptr, gtid, /* kmp_tdg_flags */ 0,
+                           /* tdg_id */ 0);
+  }
+
+  assert(num_tasks == NT);
+  assert(num_exec == NT);
+
+  std::cout << "Passed" << std::endl;
+  return 0;
+}
+// CHECK: Passed


        


More information about the Openmp-commits mailing list