[Openmp-commits] [openmp] 77ad061 - [OpenMP] Update OpenMP runtime to adopt taskgraph clause from 6.0 Specs (#130751)
via Openmp-commits
openmp-commits at lists.llvm.org
Fri Mar 14 00:02:26 PDT 2025
Author: Josep Pinot
Date: 2025-03-14T08:02:23+01:00
New Revision: 77ad061923418ba0f4c8fd4a0710a5ace825bf8e
URL: https://github.com/llvm/llvm-project/commit/77ad061923418ba0f4c8fd4a0710a5ace825bf8e
DIFF: https://github.com/llvm/llvm-project/commit/77ad061923418ba0f4c8fd4a0710a5ace825bf8e.diff
LOG: [OpenMP] Update OpenMP runtime to adopt taskgraph clause from 6.0 Specs (#130751)
Updating OpenMP runtime taskgraph support(record/replay mechanism):
- Adds a `graph_reset` bit in `kmp_taskgraph_flags_t` to discard
existing TDG records.
- Switches from a strict index-based TDG ID/IDX to a more flexible
integer-based, which can be any integer (e.g. hashed).
- Adds helper functions like `__kmp_find_tdg`, `__kmp_alloc_tdg`, and
`__kmp_free_tdg` to manage TDGs by their IDs.
These changes pave the way for the integration of OpenMP taskgraph (spec
6.0). Taskgraphs are still recorded in an array with a lookup efficiency
reduced to O(n), where n ≤ `__kmp_max_tdgs`. This can be optimized by
moving the TDGs to a hashtable, making lookups more efficient. The
provided helper routines facilitate easier future optimizations.
Added:
openmp/runtime/test/tasking/omp_record_replay_random_id.cpp
openmp/runtime/test/tasking/omp_record_replay_reset.cpp
Modified:
openmp/runtime/src/kmp.h
openmp/runtime/src/kmp_global.cpp
openmp/runtime/src/kmp_tasking.cpp
Removed:
################################################################################
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 9b8c6102dbee2..856f14e5f057f 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -2606,7 +2606,9 @@ typedef struct {
typedef struct kmp_taskgraph_flags { /*This needs to be exactly 32 bits */
unsigned nowait : 1;
unsigned re_record : 1;
- unsigned reserved : 30;
+ unsigned graph_reset : 1; /* 1==discard taskgraph record, 0==use taskgraph
+ record */
+ unsigned reserved : 29;
} kmp_taskgraph_flags_t;
/// Represents a TDG node
@@ -2650,7 +2652,7 @@ typedef struct kmp_tdg_info {
extern int __kmp_tdg_dot;
extern kmp_int32 __kmp_max_tdgs;
extern kmp_tdg_info_t **__kmp_global_tdgs;
-extern kmp_int32 __kmp_curr_tdg_idx;
+extern kmp_tdg_info_t *__kmp_curr_tdg;
extern kmp_int32 __kmp_successors_size;
extern std::atomic<kmp_int32> __kmp_tdg_task_id;
extern kmp_int32 __kmp_num_tdg;
diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp
index 52e0fdbdfb1da..7b6bfff7c54ea 100644
--- a/openmp/runtime/src/kmp_global.cpp
+++ b/openmp/runtime/src/kmp_global.cpp
@@ -554,8 +554,7 @@ int *__kmp_nesting_nth_level;
int __kmp_tdg_dot = 0;
kmp_int32 __kmp_max_tdgs = 100;
kmp_tdg_info_t **__kmp_global_tdgs = NULL;
-kmp_int32 __kmp_curr_tdg_idx =
- 0; // Id of the current TDG being recorded or executed
+kmp_tdg_info_t *__kmp_curr_tdg = NULL; // Current TDG being recorded or executed
kmp_int32 __kmp_num_tdg = 0;
kmp_int32 __kmp_successors_size = 10; // Initial succesor size list for
// recording
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 563aa29f6265e..90004bfc8afe0 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1651,11 +1651,11 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
}
#if OMPX_TASKGRAPH
- kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
+ kmp_tdg_info_t *tdg = __kmp_curr_tdg;
if (tdg && __kmp_tdg_is_recording(tdg->tdg_status) &&
(task_entry != (kmp_routine_entry_t)__kmp_taskloop_task)) {
taskdata->is_taskgraph = 1;
- taskdata->tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
+ taskdata->tdg = tdg;
taskdata->td_task_id = KMP_GEN_TASK_ID();
taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id);
}
@@ -2577,14 +2577,11 @@ without help of the runtime library.
*/
void *__kmpc_task_reduction_init(int gtid, int num, void *data) {
#if OMPX_TASKGRAPH
- kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
+ kmp_tdg_info_t *tdg = __kmp_curr_tdg;
if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) {
- kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
- this_tdg->rec_taskred_data =
- __kmp_allocate(sizeof(kmp_task_red_input_t) * num);
- this_tdg->rec_num_taskred = num;
- KMP_MEMCPY(this_tdg->rec_taskred_data, data,
- sizeof(kmp_task_red_input_t) * num);
+ tdg->rec_taskred_data = __kmp_allocate(sizeof(kmp_task_red_input_t) * num);
+ tdg->rec_num_taskred = num;
+ KMP_MEMCPY(tdg->rec_taskred_data, data, sizeof(kmp_task_red_input_t) * num);
}
#endif
return __kmp_task_reduction_init(gtid, num, (kmp_task_red_input_t *)data);
@@ -2604,14 +2601,11 @@ has two parameters, pointer to object to be initialized and pointer to omp_orig
*/
void *__kmpc_taskred_init(int gtid, int num, void *data) {
#if OMPX_TASKGRAPH
- kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
+ kmp_tdg_info_t *tdg = __kmp_curr_tdg;
if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) {
- kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
- this_tdg->rec_taskred_data =
- __kmp_allocate(sizeof(kmp_task_red_input_t) * num);
- this_tdg->rec_num_taskred = num;
- KMP_MEMCPY(this_tdg->rec_taskred_data, data,
- sizeof(kmp_task_red_input_t) * num);
+ tdg->rec_taskred_data = __kmp_allocate(sizeof(kmp_task_red_input_t) * num);
+ tdg->rec_num_taskred = num;
+ KMP_MEMCPY(tdg->rec_taskred_data, data, sizeof(kmp_task_red_input_t) * num);
}
#endif
return __kmp_task_reduction_init(gtid, num, (kmp_taskred_input_t *)data);
@@ -2662,8 +2656,7 @@ void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) {
#if OMPX_TASKGRAPH
if ((thread->th.th_current_task->is_taskgraph) &&
- (!__kmp_tdg_is_recording(
- __kmp_global_tdgs[__kmp_curr_tdg_idx]->tdg_status))) {
+ (!__kmp_tdg_is_recording(__kmp_curr_tdg->tdg_status))) {
tg = thread->th.th_current_task->td_taskgroup;
KMP_ASSERT(tg != NULL);
KMP_ASSERT(tg->reduce_data != NULL);
@@ -5452,7 +5445,6 @@ bool __kmpc_omp_has_task_team(kmp_int32 gtid) {
#if OMPX_TASKGRAPH
// __kmp_find_tdg: identify a TDG through its ID
-// gtid: Global Thread ID
// tdg_id: ID of the TDG
// returns: If a TDG corresponding to this ID is found and not
// its initial state, return the pointer to it, otherwise nullptr
@@ -5465,12 +5457,71 @@ static kmp_tdg_info_t *__kmp_find_tdg(kmp_int32 tdg_id) {
__kmp_global_tdgs = (kmp_tdg_info_t **)__kmp_allocate(
sizeof(kmp_tdg_info_t *) * __kmp_max_tdgs);
- if ((__kmp_global_tdgs[tdg_id]) &&
- (__kmp_global_tdgs[tdg_id]->tdg_status != KMP_TDG_NONE))
- res = __kmp_global_tdgs[tdg_id];
+ for (kmp_int32 tdg_idx = 0; tdg_idx < __kmp_max_tdgs; tdg_idx++) {
+ if (__kmp_global_tdgs[tdg_idx] &&
+ __kmp_global_tdgs[tdg_idx]->tdg_id == tdg_id) {
+ if (__kmp_global_tdgs[tdg_idx]->tdg_status != KMP_TDG_NONE)
+ res = __kmp_global_tdgs[tdg_idx];
+ break;
+ }
+ }
return res;
}
+// __kmp_alloc_tdg: Allocates a TDG if it doesn't already exist.
+// tdg_id: ID of the TDG.
+// returns: A pointer to the TDG if it already exists. Otherwise,
+// allocates a new TDG if the maximum limit has not been reached.
+// Returns nullptr if no TDG can be allocated.
+static kmp_tdg_info_t *__kmp_alloc_tdg(kmp_int32 tdg_id) {
+ kmp_tdg_info_t *res = nullptr;
+ if ((res = __kmp_find_tdg(tdg_id)))
+ return res;
+
+ if (__kmp_num_tdg > __kmp_max_tdgs)
+ return res;
+
+ for (kmp_int32 tdg_idx = 0; tdg_idx < __kmp_max_tdgs; tdg_idx++) {
+ if (!__kmp_global_tdgs[tdg_idx]) {
+ kmp_tdg_info_t *tdg =
+ (kmp_tdg_info_t *)__kmp_allocate(sizeof(kmp_tdg_info_t));
+ __kmp_global_tdgs[tdg_idx] = tdg;
+ __kmp_curr_tdg = tdg;
+ res = __kmp_global_tdgs[tdg_idx];
+ break;
+ }
+ }
+ return res;
+}
+
+// __kmp_free_tdg: Frees a TDG if it exists.
+// tdg_id: ID of the TDG to be freed.
+// returns: true if a TDG with the given ID was found and successfully freed,
+// false if no such TDG exists.
+static bool __kmp_free_tdg(kmp_int32 tdg_id) {
+ kmp_tdg_info_t *tdg = nullptr;
+ if (__kmp_global_tdgs == NULL)
+ return false;
+
+ for (kmp_int32 tdg_idx = 0; tdg_idx < __kmp_max_tdgs; tdg_idx++) {
+ if (__kmp_global_tdgs[tdg_idx] &&
+ __kmp_global_tdgs[tdg_idx]->tdg_id == tdg_id) {
+ tdg = __kmp_global_tdgs[tdg_idx];
+ for (kmp_int map_idx = 0; map_idx < tdg->map_size; map_idx++) {
+ __kmp_free(tdg->record_map[map_idx].successors);
+ }
+ __kmp_free(tdg->record_map);
+ if (tdg->root_tasks)
+ __kmp_free(tdg->root_tasks);
+
+ __kmp_free(tdg);
+ __kmp_global_tdgs[tdg_idx] = NULL;
+ return true;
+ }
+ }
+ return false;
+}
+
// __kmp_print_tdg_dot: prints the TDG to a dot file
// tdg: ID of the TDG
// gtid: Global Thread ID
@@ -5505,7 +5556,7 @@ void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg, kmp_int32 gtid) {
KA_TRACE(10, ("__kmp_print_tdg_dot(exit): T#%d tdg_id=%d \n", gtid, tdg_id));
}
-// __kmp_start_record: launch the execution of a previous
+// __kmp_exec_tdg: launch the execution of a previous
// recorded TDG
// gtid: Global Thread ID
// tdg: ID of the TDG
@@ -5565,9 +5616,7 @@ void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
static inline void __kmp_start_record(kmp_int32 gtid,
kmp_taskgraph_flags_t *flags,
kmp_int32 tdg_id) {
- kmp_tdg_info_t *tdg =
- (kmp_tdg_info_t *)__kmp_allocate(sizeof(kmp_tdg_info_t));
- __kmp_global_tdgs[__kmp_curr_tdg_idx] = tdg;
+ kmp_tdg_info_t *tdg = __kmp_alloc_tdg(tdg_id);
// Initializing the TDG structure
tdg->tdg_id = tdg_id;
tdg->map_size = INIT_MAPSIZE;
@@ -5592,7 +5641,7 @@ static inline void __kmp_start_record(kmp_int32 gtid,
KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter, 0);
}
- __kmp_global_tdgs[__kmp_curr_tdg_idx]->record_map = this_record_map;
+ tdg->record_map = this_record_map;
}
// __kmpc_start_record_task: Wrapper around __kmp_start_record to mark
@@ -5600,34 +5649,34 @@ static inline void __kmp_start_record(kmp_int32 gtid,
// loc_ref: Location of TDG, not used yet
// gtid: Global Thread ID of the encountering thread
// input_flags: Flags associated with the TDG
-// tdg_id: ID of the TDG to record, for now, incremental integer
+// tdg_id: ID of the TDG to record
// returns: 1 if we record, otherwise, 0
kmp_int32 __kmpc_start_record_task(ident_t *loc_ref, kmp_int32 gtid,
kmp_int32 input_flags, kmp_int32 tdg_id) {
-
kmp_int32 res;
kmp_taskgraph_flags_t *flags = (kmp_taskgraph_flags_t *)&input_flags;
- KA_TRACE(10,
- ("__kmpc_start_record_task(enter): T#%d loc=%p flags=%d tdg_id=%d\n",
- gtid, loc_ref, input_flags, tdg_id));
+ KA_TRACE(10, ("__kmpc_start_record_task(enter): T#%d loc=%p flags=%d "
+ "tdg_id=%d\n",
+ gtid, loc_ref, input_flags, tdg_id));
if (__kmp_max_tdgs == 0) {
- KA_TRACE(
- 10,
- ("__kmpc_start_record_task(abandon): T#%d loc=%p flags=%d tdg_id = %d, "
- "__kmp_max_tdgs = 0\n",
- gtid, loc_ref, input_flags, tdg_id));
+ KA_TRACE(10, ("__kmpc_start_record_task(abandon): T#%d loc=%p flags=%d "
+ "tdg_id = %d, __kmp_max_tdgs = 0\n",
+ gtid, loc_ref, input_flags, tdg_id));
return 1;
}
__kmpc_taskgroup(loc_ref, gtid);
+ if (flags->graph_reset) {
+ __kmp_free_tdg(tdg_id);
+ __kmp_num_tdg--;
+ }
if (kmp_tdg_info_t *tdg = __kmp_find_tdg(tdg_id)) {
// TODO: use re_record flag
__kmp_exec_tdg(gtid, tdg);
res = 0;
} else {
- __kmp_curr_tdg_idx = tdg_id;
- KMP_DEBUG_ASSERT(__kmp_curr_tdg_idx < __kmp_max_tdgs);
+ KMP_DEBUG_ASSERT(__kmp_num_tdg < __kmp_max_tdgs);
__kmp_start_record(gtid, flags, tdg_id);
__kmp_num_tdg++;
res = 1;
@@ -5690,10 +5739,11 @@ void __kmpc_end_record_task(ident_t *loc_ref, kmp_int32 gtid,
kmp_int32 input_flags, kmp_int32 tdg_id) {
kmp_tdg_info_t *tdg = __kmp_find_tdg(tdg_id);
+ KMP_DEBUG_ASSERT(tdg != NULL);
KA_TRACE(10, ("__kmpc_end_record_task(enter): T#%d loc=%p finishes recording"
" tdg=%d with flags=%d\n",
gtid, loc_ref, tdg_id, input_flags));
- if (__kmp_max_tdgs) {
+ if (__kmp_max_tdgs && tdg) {
// TODO: use input_flags->nowait
__kmpc_end_taskgroup(loc_ref, gtid);
if (__kmp_tdg_is_recording(tdg->tdg_status))
diff --git a/openmp/runtime/test/tasking/omp_record_replay_random_id.cpp b/openmp/runtime/test/tasking/omp_record_replay_random_id.cpp
new file mode 100644
index 0000000000000..58e90da4d782a
--- /dev/null
+++ b/openmp/runtime/test/tasking/omp_record_replay_random_id.cpp
@@ -0,0 +1,47 @@
+// REQUIRES: ompx_taskgraph
+// RUN: %libomp-cxx-compile-and-run
+#include <iostream>
+#include <cassert>
+#define NT 10
+
+// Compiler-generated code (emulation)
+typedef struct ident {
+ void *dummy;
+} ident_t;
+
+#ifdef __cplusplus
+extern "C" {
+int __kmpc_global_thread_num(ident_t *);
+int __kmpc_start_record_task(ident_t *, int, int, int);
+void __kmpc_end_record_task(ident_t *, int, int, int);
+}
+#endif
+
+static void func(int *num_exec) { (*num_exec)++; }
+
+int main() {
+ int num_exec = 0;
+ int num_tasks = 0;
+ int hash_id = 135343854;
+#pragma omp parallel
+#pragma omp single
+ for (int iter = 0; iter < NT; ++iter) {
+ int gtid = __kmpc_global_thread_num(nullptr);
+ int res = __kmpc_start_record_task(nullptr, gtid, /* kmp_tdg_flags */ 0,
+ /* tdg_id */ hash_id);
+ if (res) {
+ num_tasks++;
+#pragma omp task
+ func(&num_exec);
+ }
+ __kmpc_end_record_task(nullptr, gtid, /* kmp_tdg_flags */ 0,
+ /* tdg_id */ hash_id);
+ }
+
+ assert(num_tasks == 1);
+ assert(num_exec == NT);
+
+ std::cout << "Passed" << std::endl;
+ return 0;
+}
+// CHECK: Passed
diff --git a/openmp/runtime/test/tasking/omp_record_replay_reset.cpp b/openmp/runtime/test/tasking/omp_record_replay_reset.cpp
new file mode 100644
index 0000000000000..123a9fa5a72f0
--- /dev/null
+++ b/openmp/runtime/test/tasking/omp_record_replay_reset.cpp
@@ -0,0 +1,47 @@
+// REQUIRES: ompx_taskgraph
+// RUN: %libomp-cxx-compile-and-run
+#include <iostream>
+#include <cassert>
+#define NT 10
+
+// Compiler-generated code (emulation)
+typedef struct ident {
+ void *dummy;
+} ident_t;
+
+#ifdef __cplusplus
+extern "C" {
+int __kmpc_global_thread_num(ident_t *);
+int __kmpc_start_record_task(ident_t *, int, int, int);
+void __kmpc_end_record_task(ident_t *, int, int, int);
+}
+#endif
+
+static void func(int *num_exec) { (*num_exec)++; }
+
+int main() {
+ int num_exec = 0;
+ int num_tasks = 0;
+ int flags = 1 << 2;
+#pragma omp parallel
+#pragma omp single
+ for (int iter = 0; iter < NT; ++iter) {
+ int gtid = __kmpc_global_thread_num(nullptr);
+ int res = __kmpc_start_record_task(nullptr, gtid, /* kmp_tdg_flags */ flags,
+ /* tdg_id */ 0);
+ if (res) {
+ num_tasks++;
+#pragma omp task
+ func(&num_exec);
+ }
+ __kmpc_end_record_task(nullptr, gtid, /* kmp_tdg_flags */ 0,
+ /* tdg_id */ 0);
+ }
+
+ assert(num_tasks == NT);
+ assert(num_exec == NT);
+
+ std::cout << "Passed" << std::endl;
+ return 0;
+}
+// CHECK: Passed
More information about the Openmp-commits
mailing list