[Openmp-commits] [openmp] Draft: [OpenMP] Fix td_tdg_task_id underflow with taskloop and taskgraph (PR #150877)
Josep Pinot via Openmp-commits
openmp-commits at lists.llvm.org
Thu Sep 18 01:50:43 PDT 2025
https://github.com/jpinot updated https://github.com/llvm/llvm-project/pull/150877
>From 4b4938042950dd7898253509bf511b1bf18a19d0 Mon Sep 17 00:00:00 2001
From: jpinot <josep.pinot at bsc.es>
Date: Thu, 17 Jul 2025 12:22:01 +0200
Subject: [PATCH 1/4] [OpenMP] Fix td_tdg_task_id underflow with taskloop and
taskgraph
This patch addresses an issue where the td_tdg_task_id could underflow,
leading to a negative task ID, when a taskloop region was encountered
before a taskgraph clause.
This change allows surious holes in the record_map.
---
openmp/runtime/src/kmp.h | 1 +
openmp/runtime/src/kmp_tasking.cpp | 30 ++++++++++++++++++++----------
2 files changed, 21 insertions(+), 10 deletions(-)
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 83afc0e83f231..1f909cd3d3916 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -2667,6 +2667,7 @@ typedef struct kmp_tdg_info {
kmp_tdg_status_t tdg_status =
KMP_TDG_NONE; // Status of the TDG (recording, ready...)
std::atomic<kmp_int32> num_tasks; // Number of TDG nodes
+ std::atomic<kmp_int32> tdg_task_id_next; // Task id of next node
kmp_bootstrap_lock_t
graph_lock; // Protect graph attributes when updated via taskloop_recur
// Taskloop reduction related
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 37836fb457537..fb54baf7e4e07 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1437,7 +1437,7 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
taskdata->is_taskgraph = 1;
taskdata->tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
taskdata->td_task_id = KMP_GEN_TASK_ID();
- taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id);
+ taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&tdg->tdg_task_id_next);
}
#endif
KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
@@ -4465,7 +4465,8 @@ kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src
#if OMPX_TASKGRAPH
if (taskdata->is_taskgraph && !taskloop_recur &&
__kmp_tdg_is_recording(taskdata_src->tdg->tdg_status))
- taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id);
+ taskdata->td_tdg_task_id =
+ KMP_ATOMIC_INC(&taskdata_src->tdg->tdg_task_id_next);
#endif
taskdata->td_task_id = KMP_GEN_TASK_ID();
if (task->shareds != NULL) { // need setup shareds pointer
@@ -4979,10 +4980,6 @@ static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
#endif
__kmpc_taskgroup(loc, gtid);
}
-
-#if OMPX_TASKGRAPH
- KMP_ATOMIC_DEC(&__kmp_tdg_task_id);
-#endif
// =========================================================================
// calculate loop parameters
kmp_taskloop_bounds_t task_bounds(task, lb, ub);
@@ -5263,6 +5260,7 @@ void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg, kmp_int32 gtid) {
kmp_safe_raii_file_t tdg_file(file_name, "w");
kmp_int32 num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
+ kmp_int32 map_size = tdg->map_size;
fprintf(tdg_file,
"digraph TDG {\n"
" compound=true\n"
@@ -5273,7 +5271,11 @@ void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg, kmp_int32 gtid) {
fprintf(tdg_file, " %d[style=bold]\n", i);
}
fprintf(tdg_file, " }\n");
- for (kmp_int32 i = 0; i < num_tasks; i++) {
+ kmp_int32 tasks = 0;
+ for (kmp_int32 i = 0; tasks < num_tasks && i < map_size; i++) {
+ if (tdg->record_map[i].task == nullptr)
+ continue;
+ tasks++;
kmp_int32 nsuccessors = tdg->record_map[i].nsuccessors;
kmp_int32 *successors = tdg->record_map[i].successors;
if (nsuccessors > 0) {
@@ -5297,6 +5299,7 @@ void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
kmp_int32 *this_root_tasks = tdg->root_tasks;
kmp_int32 this_num_roots = tdg->num_roots;
kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
+ kmp_int32 tasks = 0;
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskdata_t *parent_task = thread->th.th_current_task;
@@ -5305,7 +5308,10 @@ void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
__kmpc_taskred_init(gtid, tdg->rec_num_taskred, tdg->rec_taskred_data);
}
- for (kmp_int32 j = 0; j < this_num_tasks; j++) {
+ for (kmp_int32 j = 0; j < tdg->map_size && tasks < this_num_tasks; j++) {
+ if (this_record_map[j].task == nullptr)
+ continue;
+ tasks++;
kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(this_record_map[j].task);
td->td_parent = parent_task;
@@ -5429,8 +5435,13 @@ void __kmp_end_record(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
kmp_int32 this_map_size = tdg->map_size;
kmp_int32 this_num_roots = 0;
kmp_info_t *thread = __kmp_threads[gtid];
+ kmp_int32 tasks = 0;
- for (kmp_int32 i = 0; i < this_num_tasks; i++) {
+ for (kmp_int32 i = 0; tasks < this_num_tasks && i < this_map_size; i++) {
+ if (this_record_map[i].task == nullptr) {
+ continue;
+ }
+ tasks++;
if (this_record_map[i].npredecessors == 0) {
this_root_tasks[this_num_roots++] = i;
}
@@ -5453,7 +5464,6 @@ void __kmp_end_record(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter,
this_record_map[i].npredecessors);
}
- KMP_ATOMIC_ST_RLX(&__kmp_tdg_task_id, 0);
if (__kmp_tdg_dot)
__kmp_print_tdg_dot(tdg, gtid);
>From c8fdb6411e098205e6f2c76a54778754dd560ba2 Mon Sep 17 00:00:00 2001
From: jpinot <josep.pinot at bsc.es>
Date: Wed, 3 Sep 2025 10:10:41 +0200
Subject: [PATCH 2/4] [openmp] Remove taskgraph successors alloction before is
needed
Delayed allocation of successors in kmp_node until the array is needed,
removing the small allocation when a taskgraph node is created or
resized.
---
openmp/runtime/src/kmp_taskdeps.cpp | 16 +++++++++-------
openmp/runtime/src/kmp_tasking.cpp | 13 +++++--------
2 files changed, 14 insertions(+), 15 deletions(-)
diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index abbca752f0587..8215c4b318bb5 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -244,13 +244,17 @@ static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source,
if (!exists) {
if (source_info->nsuccessors >= source_info->successors_size) {
kmp_uint old_size = source_info->successors_size;
- source_info->successors_size = 2 * source_info->successors_size;
+ source_info->successors_size = old_size == 0
+ ? __kmp_successors_size
+ : 2 * source_info->successors_size;
kmp_int32 *old_succ_ids = source_info->successors;
kmp_int32 *new_succ_ids = (kmp_int32 *)__kmp_allocate(
source_info->successors_size * sizeof(kmp_int32));
- KMP_MEMCPY(new_succ_ids, old_succ_ids, old_size * sizeof(kmp_int32));
+ if (old_succ_ids) {
+ KMP_MEMCPY(new_succ_ids, old_succ_ids, old_size * sizeof(kmp_int32));
+ __kmp_free(old_succ_ids);
+ }
source_info->successors = new_succ_ids;
- __kmp_free(old_succ_ids);
}
source_info->successors[source_info->nsuccessors] =
@@ -715,13 +719,11 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
__kmp_free(old_record);
for (kmp_int i = old_size; i < new_size; i++) {
- kmp_int32 *successorsList = (kmp_int32 *)__kmp_allocate(
- __kmp_successors_size * sizeof(kmp_int32));
new_record[i].task = nullptr;
- new_record[i].successors = successorsList;
+ new_record[i].successors = nullptr;
new_record[i].nsuccessors = 0;
new_record[i].npredecessors = 0;
- new_record[i].successors_size = __kmp_successors_size;
+ new_record[i].successors_size = 0;
KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
}
// update the size at the end, so that we avoid other
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index fb54baf7e4e07..c40dcf9b85b4a 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1817,13 +1817,11 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
__kmp_free(old_record);
for (kmp_int i = old_size; i < new_size; i++) {
- kmp_int32 *successorsList = (kmp_int32 *)__kmp_allocate(
- __kmp_successors_size * sizeof(kmp_int32));
new_record[i].task = nullptr;
- new_record[i].successors = successorsList;
+ new_record[i].successors = nullptr;
new_record[i].nsuccessors = 0;
new_record[i].npredecessors = 0;
- new_record[i].successors_size = __kmp_successors_size;
+ new_record[i].successors_size = 0;
KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
}
// update the size at the end, so that we avoid other
@@ -5368,13 +5366,12 @@ static inline void __kmp_start_record(kmp_int32 gtid,
kmp_node_info_t *this_record_map =
(kmp_node_info_t *)__kmp_allocate(INIT_MAPSIZE * sizeof(kmp_node_info_t));
for (kmp_int32 i = 0; i < INIT_MAPSIZE; i++) {
- kmp_int32 *successorsList =
- (kmp_int32 *)__kmp_allocate(__kmp_successors_size * sizeof(kmp_int32));
this_record_map[i].task = nullptr;
- this_record_map[i].successors = successorsList;
+ this_record_map[i].parent_task = nullptr;
+ this_record_map[i].successors = nullptr;
this_record_map[i].nsuccessors = 0;
this_record_map[i].npredecessors = 0;
- this_record_map[i].successors_size = __kmp_successors_size;
+ this_record_map[i].successors_size = 0;
KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter, 0);
}
>From 8936d9b1549150a0ac979222ff8ddd2e3f2b27be Mon Sep 17 00:00:00 2001
From: jpinot <josep.pinot at bsc.es>
Date: Wed, 3 Sep 2025 10:12:19 +0200
Subject: [PATCH 3/4] [openmp] Fix perent_task unitialization when allocating
kmp_tdg_info
---
openmp/runtime/src/kmp_taskdeps.cpp | 1 +
openmp/runtime/src/kmp_tasking.cpp | 1 +
2 files changed, 2 insertions(+)
diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index 8215c4b318bb5..8610143a32f0b 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -720,6 +720,7 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
for (kmp_int i = old_size; i < new_size; i++) {
new_record[i].task = nullptr;
+ new_record[i].parent_task = nullptr;
new_record[i].successors = nullptr;
new_record[i].nsuccessors = 0;
new_record[i].npredecessors = 0;
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index c40dcf9b85b4a..2e77cf632dc94 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1818,6 +1818,7 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
for (kmp_int i = old_size; i < new_size; i++) {
new_record[i].task = nullptr;
+ new_record[i].parent_task = nullptr;
new_record[i].successors = nullptr;
new_record[i].nsuccessors = 0;
new_record[i].npredecessors = 0;
>From bf1f0df127cd49f565e4708c97a9606932468545 Mon Sep 17 00:00:00 2001
From: jpinot <josep.pinot at bsc.es>
Date: Tue, 9 Sep 2025 10:09:47 +0200
Subject: [PATCH 4/4] [openmp] Fix locking when expanding recorded tdg
---
openmp/runtime/src/kmp_taskdeps.cpp | 50 ++++++++++++++---------------
openmp/runtime/src/kmp_tasking.cpp | 8 ++---
2 files changed, 27 insertions(+), 31 deletions(-)
diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index 8610143a32f0b..15e7585a65617 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -704,39 +704,37 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
__kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) {
kmp_tdg_info_t *tdg = new_taskdata->tdg;
// extend record_map if needed
+ __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
if (new_taskdata->td_tdg_task_id >= tdg->map_size) {
- __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
- if (new_taskdata->td_tdg_task_id >= tdg->map_size) {
- kmp_uint old_size = tdg->map_size;
- kmp_uint new_size = old_size * 2;
- kmp_node_info_t *old_record = tdg->record_map;
- kmp_node_info_t *new_record = (kmp_node_info_t *)__kmp_allocate(
- new_size * sizeof(kmp_node_info_t));
- KMP_MEMCPY(new_record, tdg->record_map,
- old_size * sizeof(kmp_node_info_t));
- tdg->record_map = new_record;
-
- __kmp_free(old_record);
-
- for (kmp_int i = old_size; i < new_size; i++) {
- new_record[i].task = nullptr;
- new_record[i].parent_task = nullptr;
- new_record[i].successors = nullptr;
- new_record[i].nsuccessors = 0;
- new_record[i].npredecessors = 0;
- new_record[i].successors_size = 0;
- KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
- }
- // update the size at the end, so that we avoid other
- // threads use old_record while map_size is already updated
- tdg->map_size = new_size;
+ kmp_uint old_size = tdg->map_size;
+ kmp_uint new_size = old_size * 2;
+ kmp_node_info_t *old_record = tdg->record_map;
+ kmp_node_info_t *new_record =
+ (kmp_node_info_t *)__kmp_allocate(new_size * sizeof(kmp_node_info_t));
+ KMP_MEMCPY(new_record, tdg->record_map,
+ old_size * sizeof(kmp_node_info_t));
+ tdg->record_map = new_record;
+
+ __kmp_free(old_record);
+
+ for (kmp_int i = old_size; i < new_size; i++) {
+ new_record[i].task = nullptr;
+ new_record[i].parent_task = nullptr;
+ new_record[i].successors = nullptr;
+ new_record[i].nsuccessors = 0;
+ new_record[i].npredecessors = 0;
+ new_record[i].successors_size = 0;
+ KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
}
- __kmp_release_bootstrap_lock(&tdg->graph_lock);
+ // update the size at the end, so that we avoid other
+ // threads use old_record while map_size is already updated
+ tdg->map_size = new_size;
}
tdg->record_map[new_taskdata->td_tdg_task_id].task = new_task;
tdg->record_map[new_taskdata->td_tdg_task_id].parent_task =
new_taskdata->td_parent;
KMP_ATOMIC_INC(&tdg->num_tasks);
+ __kmp_release_bootstrap_lock(&tdg->graph_lock);
}
#endif
#if OMPT_SUPPORT
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 2e77cf632dc94..f735e4208ba48 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1800,7 +1800,8 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
__kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) {
kmp_tdg_info_t *tdg = new_taskdata->tdg;
// extend the record_map if needed
- if (new_taskdata->td_tdg_task_id >= new_taskdata->tdg->map_size) {
+ if (new_taskdata->td_tdg_task_id >= tdg->map_size ||
+ tdg->record_map[new_taskdata->td_tdg_task_id].task == nullptr) {
__kmp_acquire_bootstrap_lock(&tdg->graph_lock);
// map_size could have been updated by another thread if recursive
// taskloop
@@ -1829,14 +1830,11 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
// threads use old_record while map_size is already updated
tdg->map_size = new_size;
}
- __kmp_release_bootstrap_lock(&tdg->graph_lock);
- }
- // record a task
- if (tdg->record_map[new_taskdata->td_tdg_task_id].task == nullptr) {
tdg->record_map[new_taskdata->td_tdg_task_id].task = new_task;
tdg->record_map[new_taskdata->td_tdg_task_id].parent_task =
new_taskdata->td_parent;
KMP_ATOMIC_INC(&tdg->num_tasks);
+ __kmp_release_bootstrap_lock(&tdg->graph_lock);
}
}
#endif
More information about the Openmp-commits
mailing list