[Openmp-commits] [openmp] [OpenMP] Fix td_tdg_task_id underflow when taskloop and taskgraph (PR #150877)
Josep Pinot via Openmp-commits
openmp-commits at lists.llvm.org
Wed Oct 1 07:02:21 PDT 2025
https://github.com/jpinot updated https://github.com/llvm/llvm-project/pull/150877
>From 4b4938042950dd7898253509bf511b1bf18a19d0 Mon Sep 17 00:00:00 2001
From: jpinot <josep.pinot at bsc.es>
Date: Thu, 17 Jul 2025 12:22:01 +0200
Subject: [PATCH 1/7] [OpenMP] Fix td_tdg_task_id underflow with taskloop and
taskgraph
This patch addresses an issue where the td_tdg_task_id could underflow,
leading to a negative task ID, when a taskloop region was encountered
before a taskgraph clause.
This change allows surious holes in the record_map.
---
openmp/runtime/src/kmp.h | 1 +
openmp/runtime/src/kmp_tasking.cpp | 30 ++++++++++++++++++++----------
2 files changed, 21 insertions(+), 10 deletions(-)
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 83afc0e83f231..1f909cd3d3916 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -2667,6 +2667,7 @@ typedef struct kmp_tdg_info {
kmp_tdg_status_t tdg_status =
KMP_TDG_NONE; // Status of the TDG (recording, ready...)
std::atomic<kmp_int32> num_tasks; // Number of TDG nodes
+ std::atomic<kmp_int32> tdg_task_id_next; // Task id of next node
kmp_bootstrap_lock_t
graph_lock; // Protect graph attributes when updated via taskloop_recur
// Taskloop reduction related
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 37836fb457537..fb54baf7e4e07 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1437,7 +1437,7 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
taskdata->is_taskgraph = 1;
taskdata->tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
taskdata->td_task_id = KMP_GEN_TASK_ID();
- taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id);
+ taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&tdg->tdg_task_id_next);
}
#endif
KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
@@ -4465,7 +4465,8 @@ kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src
#if OMPX_TASKGRAPH
if (taskdata->is_taskgraph && !taskloop_recur &&
__kmp_tdg_is_recording(taskdata_src->tdg->tdg_status))
- taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id);
+ taskdata->td_tdg_task_id =
+ KMP_ATOMIC_INC(&taskdata_src->tdg->tdg_task_id_next);
#endif
taskdata->td_task_id = KMP_GEN_TASK_ID();
if (task->shareds != NULL) { // need setup shareds pointer
@@ -4979,10 +4980,6 @@ static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
#endif
__kmpc_taskgroup(loc, gtid);
}
-
-#if OMPX_TASKGRAPH
- KMP_ATOMIC_DEC(&__kmp_tdg_task_id);
-#endif
// =========================================================================
// calculate loop parameters
kmp_taskloop_bounds_t task_bounds(task, lb, ub);
@@ -5263,6 +5260,7 @@ void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg, kmp_int32 gtid) {
kmp_safe_raii_file_t tdg_file(file_name, "w");
kmp_int32 num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
+ kmp_int32 map_size = tdg->map_size;
fprintf(tdg_file,
"digraph TDG {\n"
" compound=true\n"
@@ -5273,7 +5271,11 @@ void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg, kmp_int32 gtid) {
fprintf(tdg_file, " %d[style=bold]\n", i);
}
fprintf(tdg_file, " }\n");
- for (kmp_int32 i = 0; i < num_tasks; i++) {
+ kmp_int32 tasks = 0;
+ for (kmp_int32 i = 0; tasks < num_tasks && i < map_size; i++) {
+ if (tdg->record_map[i].task == nullptr)
+ continue;
+ tasks++;
kmp_int32 nsuccessors = tdg->record_map[i].nsuccessors;
kmp_int32 *successors = tdg->record_map[i].successors;
if (nsuccessors > 0) {
@@ -5297,6 +5299,7 @@ void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
kmp_int32 *this_root_tasks = tdg->root_tasks;
kmp_int32 this_num_roots = tdg->num_roots;
kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
+ kmp_int32 tasks = 0;
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskdata_t *parent_task = thread->th.th_current_task;
@@ -5305,7 +5308,10 @@ void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
__kmpc_taskred_init(gtid, tdg->rec_num_taskred, tdg->rec_taskred_data);
}
- for (kmp_int32 j = 0; j < this_num_tasks; j++) {
+ for (kmp_int32 j = 0; j < tdg->map_size && tasks < this_num_tasks; j++) {
+ if (this_record_map[j].task == nullptr)
+ continue;
+ tasks++;
kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(this_record_map[j].task);
td->td_parent = parent_task;
@@ -5429,8 +5435,13 @@ void __kmp_end_record(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
kmp_int32 this_map_size = tdg->map_size;
kmp_int32 this_num_roots = 0;
kmp_info_t *thread = __kmp_threads[gtid];
+ kmp_int32 tasks = 0;
- for (kmp_int32 i = 0; i < this_num_tasks; i++) {
+ for (kmp_int32 i = 0; tasks < this_num_tasks && i < this_map_size; i++) {
+ if (this_record_map[i].task == nullptr) {
+ continue;
+ }
+ tasks++;
if (this_record_map[i].npredecessors == 0) {
this_root_tasks[this_num_roots++] = i;
}
@@ -5453,7 +5464,6 @@ void __kmp_end_record(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter,
this_record_map[i].npredecessors);
}
- KMP_ATOMIC_ST_RLX(&__kmp_tdg_task_id, 0);
if (__kmp_tdg_dot)
__kmp_print_tdg_dot(tdg, gtid);
>From c8fdb6411e098205e6f2c76a54778754dd560ba2 Mon Sep 17 00:00:00 2001
From: jpinot <josep.pinot at bsc.es>
Date: Wed, 3 Sep 2025 10:10:41 +0200
Subject: [PATCH 2/7] [openmp] Remove taskgraph successors alloction before is
needed
Delayed allocation of successors in kmp_node until the array is needed,
removing the small allocation when a taskgraph node is created or
resized.
---
openmp/runtime/src/kmp_taskdeps.cpp | 16 +++++++++-------
openmp/runtime/src/kmp_tasking.cpp | 13 +++++--------
2 files changed, 14 insertions(+), 15 deletions(-)
diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index abbca752f0587..8215c4b318bb5 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -244,13 +244,17 @@ static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source,
if (!exists) {
if (source_info->nsuccessors >= source_info->successors_size) {
kmp_uint old_size = source_info->successors_size;
- source_info->successors_size = 2 * source_info->successors_size;
+ source_info->successors_size = old_size == 0
+ ? __kmp_successors_size
+ : 2 * source_info->successors_size;
kmp_int32 *old_succ_ids = source_info->successors;
kmp_int32 *new_succ_ids = (kmp_int32 *)__kmp_allocate(
source_info->successors_size * sizeof(kmp_int32));
- KMP_MEMCPY(new_succ_ids, old_succ_ids, old_size * sizeof(kmp_int32));
+ if (old_succ_ids) {
+ KMP_MEMCPY(new_succ_ids, old_succ_ids, old_size * sizeof(kmp_int32));
+ __kmp_free(old_succ_ids);
+ }
source_info->successors = new_succ_ids;
- __kmp_free(old_succ_ids);
}
source_info->successors[source_info->nsuccessors] =
@@ -715,13 +719,11 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
__kmp_free(old_record);
for (kmp_int i = old_size; i < new_size; i++) {
- kmp_int32 *successorsList = (kmp_int32 *)__kmp_allocate(
- __kmp_successors_size * sizeof(kmp_int32));
new_record[i].task = nullptr;
- new_record[i].successors = successorsList;
+ new_record[i].successors = nullptr;
new_record[i].nsuccessors = 0;
new_record[i].npredecessors = 0;
- new_record[i].successors_size = __kmp_successors_size;
+ new_record[i].successors_size = 0;
KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
}
// update the size at the end, so that we avoid other
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index fb54baf7e4e07..c40dcf9b85b4a 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1817,13 +1817,11 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
__kmp_free(old_record);
for (kmp_int i = old_size; i < new_size; i++) {
- kmp_int32 *successorsList = (kmp_int32 *)__kmp_allocate(
- __kmp_successors_size * sizeof(kmp_int32));
new_record[i].task = nullptr;
- new_record[i].successors = successorsList;
+ new_record[i].successors = nullptr;
new_record[i].nsuccessors = 0;
new_record[i].npredecessors = 0;
- new_record[i].successors_size = __kmp_successors_size;
+ new_record[i].successors_size = 0;
KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
}
// update the size at the end, so that we avoid other
@@ -5368,13 +5366,12 @@ static inline void __kmp_start_record(kmp_int32 gtid,
kmp_node_info_t *this_record_map =
(kmp_node_info_t *)__kmp_allocate(INIT_MAPSIZE * sizeof(kmp_node_info_t));
for (kmp_int32 i = 0; i < INIT_MAPSIZE; i++) {
- kmp_int32 *successorsList =
- (kmp_int32 *)__kmp_allocate(__kmp_successors_size * sizeof(kmp_int32));
this_record_map[i].task = nullptr;
- this_record_map[i].successors = successorsList;
+ this_record_map[i].parent_task = nullptr;
+ this_record_map[i].successors = nullptr;
this_record_map[i].nsuccessors = 0;
this_record_map[i].npredecessors = 0;
- this_record_map[i].successors_size = __kmp_successors_size;
+ this_record_map[i].successors_size = 0;
KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter, 0);
}
>From 8936d9b1549150a0ac979222ff8ddd2e3f2b27be Mon Sep 17 00:00:00 2001
From: jpinot <josep.pinot at bsc.es>
Date: Wed, 3 Sep 2025 10:12:19 +0200
Subject: [PATCH 3/7] [openmp] Fix perent_task unitialization when allocating
kmp_tdg_info
---
openmp/runtime/src/kmp_taskdeps.cpp | 1 +
openmp/runtime/src/kmp_tasking.cpp | 1 +
2 files changed, 2 insertions(+)
diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index 8215c4b318bb5..8610143a32f0b 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -720,6 +720,7 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
for (kmp_int i = old_size; i < new_size; i++) {
new_record[i].task = nullptr;
+ new_record[i].parent_task = nullptr;
new_record[i].successors = nullptr;
new_record[i].nsuccessors = 0;
new_record[i].npredecessors = 0;
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index c40dcf9b85b4a..2e77cf632dc94 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1818,6 +1818,7 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
for (kmp_int i = old_size; i < new_size; i++) {
new_record[i].task = nullptr;
+ new_record[i].parent_task = nullptr;
new_record[i].successors = nullptr;
new_record[i].nsuccessors = 0;
new_record[i].npredecessors = 0;
>From bf1f0df127cd49f565e4708c97a9606932468545 Mon Sep 17 00:00:00 2001
From: jpinot <josep.pinot at bsc.es>
Date: Tue, 9 Sep 2025 10:09:47 +0200
Subject: [PATCH 4/7] [openmp] Fix locking when expanding recorded tdg
---
openmp/runtime/src/kmp_taskdeps.cpp | 50 ++++++++++++++---------------
openmp/runtime/src/kmp_tasking.cpp | 8 ++---
2 files changed, 27 insertions(+), 31 deletions(-)
diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index 8610143a32f0b..15e7585a65617 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -704,39 +704,37 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
__kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) {
kmp_tdg_info_t *tdg = new_taskdata->tdg;
// extend record_map if needed
+ __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
if (new_taskdata->td_tdg_task_id >= tdg->map_size) {
- __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
- if (new_taskdata->td_tdg_task_id >= tdg->map_size) {
- kmp_uint old_size = tdg->map_size;
- kmp_uint new_size = old_size * 2;
- kmp_node_info_t *old_record = tdg->record_map;
- kmp_node_info_t *new_record = (kmp_node_info_t *)__kmp_allocate(
- new_size * sizeof(kmp_node_info_t));
- KMP_MEMCPY(new_record, tdg->record_map,
- old_size * sizeof(kmp_node_info_t));
- tdg->record_map = new_record;
-
- __kmp_free(old_record);
-
- for (kmp_int i = old_size; i < new_size; i++) {
- new_record[i].task = nullptr;
- new_record[i].parent_task = nullptr;
- new_record[i].successors = nullptr;
- new_record[i].nsuccessors = 0;
- new_record[i].npredecessors = 0;
- new_record[i].successors_size = 0;
- KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
- }
- // update the size at the end, so that we avoid other
- // threads use old_record while map_size is already updated
- tdg->map_size = new_size;
+ kmp_uint old_size = tdg->map_size;
+ kmp_uint new_size = old_size * 2;
+ kmp_node_info_t *old_record = tdg->record_map;
+ kmp_node_info_t *new_record =
+ (kmp_node_info_t *)__kmp_allocate(new_size * sizeof(kmp_node_info_t));
+ KMP_MEMCPY(new_record, tdg->record_map,
+ old_size * sizeof(kmp_node_info_t));
+ tdg->record_map = new_record;
+
+ __kmp_free(old_record);
+
+ for (kmp_int i = old_size; i < new_size; i++) {
+ new_record[i].task = nullptr;
+ new_record[i].parent_task = nullptr;
+ new_record[i].successors = nullptr;
+ new_record[i].nsuccessors = 0;
+ new_record[i].npredecessors = 0;
+ new_record[i].successors_size = 0;
+ KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
}
- __kmp_release_bootstrap_lock(&tdg->graph_lock);
+ // update the size at the end, so that we avoid other
+ // threads use old_record while map_size is already updated
+ tdg->map_size = new_size;
}
tdg->record_map[new_taskdata->td_tdg_task_id].task = new_task;
tdg->record_map[new_taskdata->td_tdg_task_id].parent_task =
new_taskdata->td_parent;
KMP_ATOMIC_INC(&tdg->num_tasks);
+ __kmp_release_bootstrap_lock(&tdg->graph_lock);
}
#endif
#if OMPT_SUPPORT
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 2e77cf632dc94..f735e4208ba48 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1800,7 +1800,8 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
__kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) {
kmp_tdg_info_t *tdg = new_taskdata->tdg;
// extend the record_map if needed
- if (new_taskdata->td_tdg_task_id >= new_taskdata->tdg->map_size) {
+ if (new_taskdata->td_tdg_task_id >= tdg->map_size ||
+ tdg->record_map[new_taskdata->td_tdg_task_id].task == nullptr) {
__kmp_acquire_bootstrap_lock(&tdg->graph_lock);
// map_size could have been updated by another thread if recursive
// taskloop
@@ -1829,14 +1830,11 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
// threads use old_record while map_size is already updated
tdg->map_size = new_size;
}
- __kmp_release_bootstrap_lock(&tdg->graph_lock);
- }
- // record a task
- if (tdg->record_map[new_taskdata->td_tdg_task_id].task == nullptr) {
tdg->record_map[new_taskdata->td_tdg_task_id].task = new_task;
tdg->record_map[new_taskdata->td_tdg_task_id].parent_task =
new_taskdata->td_parent;
KMP_ATOMIC_INC(&tdg->num_tasks);
+ __kmp_release_bootstrap_lock(&tdg->graph_lock);
}
}
#endif
>From 163b1ebb32ba4af3fa6393c04bbd21a537ed4f1a Mon Sep 17 00:00:00 2001
From: jpinot <jsp.pinot at gmail.com>
Date: Fri, 19 Sep 2025 13:50:42 +0200
Subject: [PATCH 5/7] [openmp] Add tdg node pointer in kmp_taskdata_t
Add pointer to node represeting the task in the TDG, the way avoids
locking access to record_map every time a node need to be accessed.
---
openmp/runtime/src/kmp.h | 1 +
openmp/runtime/src/kmp_taskdeps.cpp | 8 ++++----
openmp/runtime/src/kmp_taskdeps.h | 2 +-
openmp/runtime/src/kmp_tasking.cpp | 3 +++
4 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 1f909cd3d3916..d8412e30f6e7d 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -2805,6 +2805,7 @@ struct kmp_taskdata { /* aligned during dynamic allocation */
#if OMPX_TASKGRAPH
bool is_taskgraph = 0; // whether the task is within a TDG
kmp_tdg_info_t *tdg; // used to associate task with a TDG
+ kmp_node_info_t *td_tdg_node_info; // node representing the task's in the TDG
kmp_int32 td_tdg_task_id; // local task id in its TDG
#endif
kmp_target_data_t td_target_data;
diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index 15e7585a65617..f1c2fbf32a854 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -232,8 +232,7 @@ static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source,
}
if (task_sink->is_taskgraph &&
__kmp_tdg_is_recording(task_sink->tdg->tdg_status)) {
- kmp_node_info_t *source_info =
- &task_sink->tdg->record_map[task_source->td_tdg_task_id];
+ kmp_node_info_t *source_info = task_source->td_tdg_node_info;
bool exists = false;
for (int i = 0; i < source_info->nsuccessors; i++) {
if (source_info->successors[i] == task_sink->td_tdg_task_id) {
@@ -261,8 +260,7 @@ static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source,
task_sink->td_tdg_task_id;
source_info->nsuccessors++;
- kmp_node_info_t *sink_info =
- &(task_sink->tdg->record_map[task_sink->td_tdg_task_id]);
+ kmp_node_info_t *sink_info = task_sink->td_tdg_node_info;
sink_info->npredecessors++;
}
}
@@ -733,6 +731,8 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
tdg->record_map[new_taskdata->td_tdg_task_id].task = new_task;
tdg->record_map[new_taskdata->td_tdg_task_id].parent_task =
new_taskdata->td_parent;
+ new_taskdata->td_tdg_node_info =
+ &tdg->record_map[new_taskdata->td_tdg_task_id];
KMP_ATOMIC_INC(&tdg->num_tasks);
__kmp_release_bootstrap_lock(&tdg->graph_lock);
}
diff --git a/openmp/runtime/src/kmp_taskdeps.h b/openmp/runtime/src/kmp_taskdeps.h
index f6bfb39218a21..d8e000f75cecb 100644
--- a/openmp/runtime/src/kmp_taskdeps.h
+++ b/openmp/runtime/src/kmp_taskdeps.h
@@ -98,7 +98,7 @@ static inline void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) {
#if OMPX_TASKGRAPH
if (task->is_taskgraph && !(__kmp_tdg_is_recording(task->tdg->tdg_status))) {
- kmp_node_info_t *TaskInfo = &(task->tdg->record_map[task->td_tdg_task_id]);
+ kmp_node_info_t *TaskInfo = task->td_tdg_node_info;
for (int i = 0; i < TaskInfo->nsuccessors; i++) {
kmp_int32 successorNumber = TaskInfo->successors[i];
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index f735e4208ba48..38f2093b3ce2c 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1394,6 +1394,7 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
taskdata->td_flags.onced = 0;
taskdata->is_taskgraph = 0;
taskdata->tdg = nullptr;
+ taskdata->td_tdg_node_info = nullptr;
#endif
KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
// start at one because counts current task and children
@@ -1833,6 +1834,8 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
tdg->record_map[new_taskdata->td_tdg_task_id].task = new_task;
tdg->record_map[new_taskdata->td_tdg_task_id].parent_task =
new_taskdata->td_parent;
+ new_taskdata->td_tdg_node_info =
+ &tdg->record_map[new_taskdata->td_tdg_task_id];
KMP_ATOMIC_INC(&tdg->num_tasks);
__kmp_release_bootstrap_lock(&tdg->graph_lock);
}
>From d18675ed1f6fe28248bbb0ad31cd3cef5b0fa1cb Mon Sep 17 00:00:00 2001
From: jpinot <jsp.pinot at gmail.com>
Date: Fri, 19 Sep 2025 20:28:09 +0200
Subject: [PATCH 6/7] [OpenMP] Add kmp_node_vector for TDG successors
Replaced the fixed-size array for TDG successors with kmp_node_vector,
a custom dynamic vector of kmp_node_info to TDG nodes. This change aims
to mitigate data races during vector resizing by using a block-based
allocation strategy.
---
openmp/runtime/src/kmp.h | 12 +-
openmp/runtime/src/kmp_taskdeps.cpp | 39 ++---
openmp/runtime/src/kmp_taskdeps.h | 12 +-
openmp/runtime/src/kmp_tasking.cpp | 215 ++++++++++++++++++----------
4 files changed, 169 insertions(+), 109 deletions(-)
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index d8412e30f6e7d..72dbbba58ad2d 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -2649,6 +2649,14 @@ typedef struct kmp_node_info {
kmp_taskdata_t *parent_task; // Parent implicit task
} kmp_node_info_t;
+// Representation of recorded nodes
+typedef struct kmp_node_vector {
+ kmp_node_info_t **blocks;
+ kmp_int32 block_size;
+ std::atomic<kmp_int32> num_of_blocks;
+ kmp_bootstrap_lock_t lock;
+} kmp_node_vector_t;
+
/// Represent a TDG's current status
typedef enum kmp_tdg_status {
KMP_TDG_NONE = 0,
@@ -2660,10 +2668,10 @@ typedef enum kmp_tdg_status {
typedef struct kmp_tdg_info {
kmp_int32 tdg_id; // Unique idenfifier of the TDG
kmp_taskgraph_flags_t tdg_flags; // Flags related to a TDG
- kmp_int32 map_size; // Number of allocated TDG nodes
+ /* kmp_int32 map_size; // Number of allocated TDG nodes */
kmp_int32 num_roots; // Number of roots tasks int the TDG
kmp_int32 *root_tasks; // Array of tasks identifiers that are roots
- kmp_node_info_t *record_map; // Array of TDG nodes
+ kmp_node_vector_t *record_map; // Array of TDG nodes
kmp_tdg_status_t tdg_status =
KMP_TDG_NONE; // Status of the TDG (recording, ready...)
std::atomic<kmp_int32> num_tasks; // Number of TDG nodes
diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index f1c2fbf32a854..49b9e1c1adca3 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -702,37 +702,16 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
__kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) {
kmp_tdg_info_t *tdg = new_taskdata->tdg;
// extend record_map if needed
- __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
- if (new_taskdata->td_tdg_task_id >= tdg->map_size) {
- kmp_uint old_size = tdg->map_size;
- kmp_uint new_size = old_size * 2;
- kmp_node_info_t *old_record = tdg->record_map;
- kmp_node_info_t *new_record =
- (kmp_node_info_t *)__kmp_allocate(new_size * sizeof(kmp_node_info_t));
- KMP_MEMCPY(new_record, tdg->record_map,
- old_size * sizeof(kmp_node_info_t));
- tdg->record_map = new_record;
-
- __kmp_free(old_record);
-
- for (kmp_int i = old_size; i < new_size; i++) {
- new_record[i].task = nullptr;
- new_record[i].parent_task = nullptr;
- new_record[i].successors = nullptr;
- new_record[i].nsuccessors = 0;
- new_record[i].npredecessors = 0;
- new_record[i].successors_size = 0;
- KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
- }
- // update the size at the end, so that we avoid other
- // threads use old_record while map_size is already updated
- tdg->map_size = new_size;
+ kmp_node_info_t *node =
+ kmp_node_vector_get(tdg->record_map, new_taskdata->td_tdg_task_id);
+ if (node == nullptr) {
+ kmp_node_vector_resize(tdg->record_map, new_taskdata->td_tdg_task_id * 2);
+ node = kmp_node_vector_get(tdg->record_map, new_taskdata->td_tdg_task_id);
}
- tdg->record_map[new_taskdata->td_tdg_task_id].task = new_task;
- tdg->record_map[new_taskdata->td_tdg_task_id].parent_task =
- new_taskdata->td_parent;
- new_taskdata->td_tdg_node_info =
- &tdg->record_map[new_taskdata->td_tdg_task_id];
+ __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
+ node->task = new_task;
+ node->parent_task = new_taskdata->td_parent;
+ new_taskdata->td_tdg_node_info = node;
KMP_ATOMIC_INC(&tdg->num_tasks);
__kmp_release_bootstrap_lock(&tdg->graph_lock);
}
diff --git a/openmp/runtime/src/kmp_taskdeps.h b/openmp/runtime/src/kmp_taskdeps.h
index d8e000f75cecb..b67317818a4cf 100644
--- a/openmp/runtime/src/kmp_taskdeps.h
+++ b/openmp/runtime/src/kmp_taskdeps.h
@@ -93,6 +93,13 @@ static inline void __kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h) {
}
extern void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start);
+#if OMPX_TASKGRAPH
+extern kmp_node_vector_t *kmp_alloc_tdg_vector(kmp_int32 block_size);
+extern kmp_node_info_t *kmp_node_vector_get(kmp_node_vector_t *vector,
+ kmp_int32 id);
+extern void kmp_node_vector_resize(kmp_node_vector_t *vector, kmp_int32 size);
+extern void kmp_node_vector_free(kmp_node_vector_t *vector);
+#endif
static inline void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) {
@@ -102,7 +109,10 @@ static inline void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) {
for (int i = 0; i < TaskInfo->nsuccessors; i++) {
kmp_int32 successorNumber = TaskInfo->successors[i];
- kmp_node_info_t *successor = &(task->tdg->record_map[successorNumber]);
+ kmp_node_info_t *successor =
+ kmp_node_vector_get(task->tdg->record_map, successorNumber);
+ /* kmp_node_info_t *successor = &(task->tdg->record_map[successorNumber]);
+ */
kmp_int32 npredecessors = KMP_ATOMIC_DEC(&successor->npredecessors_counter) - 1;
if (successor->task != nullptr && npredecessors == 0) {
__kmp_omp_task(gtid, successor->task, false);
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 38f2093b3ce2c..8aafe939a244b 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1800,42 +1800,18 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
if (new_taskdata->is_taskgraph &&
__kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) {
kmp_tdg_info_t *tdg = new_taskdata->tdg;
- // extend the record_map if needed
- if (new_taskdata->td_tdg_task_id >= tdg->map_size ||
- tdg->record_map[new_taskdata->td_tdg_task_id].task == nullptr) {
+
+ kmp_node_info_t *node =
+ kmp_node_vector_get(tdg->record_map, new_taskdata->td_tdg_task_id);
+ if (node == nullptr) {
+ kmp_node_vector_resize(tdg->record_map, new_taskdata->td_tdg_task_id * 2);
+ node = kmp_node_vector_get(tdg->record_map, new_taskdata->td_tdg_task_id);
+ }
+ if (node->task == nullptr) {
__kmp_acquire_bootstrap_lock(&tdg->graph_lock);
- // map_size could have been updated by another thread if recursive
- // taskloop
- if (new_taskdata->td_tdg_task_id >= tdg->map_size) {
- kmp_uint old_size = tdg->map_size;
- kmp_uint new_size = old_size * 2;
- kmp_node_info_t *old_record = tdg->record_map;
- kmp_node_info_t *new_record = (kmp_node_info_t *)__kmp_allocate(
- new_size * sizeof(kmp_node_info_t));
-
- KMP_MEMCPY(new_record, old_record, old_size * sizeof(kmp_node_info_t));
- tdg->record_map = new_record;
-
- __kmp_free(old_record);
-
- for (kmp_int i = old_size; i < new_size; i++) {
- new_record[i].task = nullptr;
- new_record[i].parent_task = nullptr;
- new_record[i].successors = nullptr;
- new_record[i].nsuccessors = 0;
- new_record[i].npredecessors = 0;
- new_record[i].successors_size = 0;
- KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
- }
- // update the size at the end, so that we avoid other
- // threads use old_record while map_size is already updated
- tdg->map_size = new_size;
- }
- tdg->record_map[new_taskdata->td_tdg_task_id].task = new_task;
- tdg->record_map[new_taskdata->td_tdg_task_id].parent_task =
- new_taskdata->td_parent;
- new_taskdata->td_tdg_node_info =
- &tdg->record_map[new_taskdata->td_tdg_task_id];
+ node->task = new_task;
+ node->parent_task = new_taskdata->td_parent;
+ new_taskdata->td_tdg_node_info = node;
KMP_ATOMIC_INC(&tdg->num_tasks);
__kmp_release_bootstrap_lock(&tdg->graph_lock);
}
@@ -4334,6 +4310,95 @@ void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start = 0) {
}
}
+#if OMPX_TASKGRAPH
+kmp_node_vector_t *kmp_alloc_tdg_vector(kmp_int32 block_size) {
+ constexpr kmp_int32 block_nums = 1;
+ kmp_node_vector_t *vector =
+ (kmp_node_vector_t *)__kmp_allocate(sizeof(kmp_node_vector_t));
+ kmp_node_info_t **blocks = (kmp_node_info_t **)__kmp_allocate(
+ block_nums * sizeof(kmp_node_info_t *));
+ for (kmp_int32 i = 0; i < block_nums; i++) {
+ kmp_node_info_t *this_record_map =
+ (kmp_node_info_t *)__kmp_allocate(block_size * sizeof(kmp_node_info_t));
+ for (kmp_int32 j = 0; j < block_size; j++) {
+ this_record_map[j].task = nullptr;
+ this_record_map[j].parent_task = nullptr;
+ this_record_map[j].successors = nullptr;
+ this_record_map[j].nsuccessors = 0;
+ this_record_map[j].npredecessors = 0;
+ this_record_map[j].successors_size = 0;
+ KMP_ATOMIC_ST_RLX(&this_record_map[j].npredecessors_counter, 0);
+ }
+ blocks[i] = this_record_map;
+ }
+ vector->blocks = blocks;
+ vector->block_size = block_size;
+ vector->num_of_blocks = block_nums;
+
+ return vector;
+}
+
+void kmp_node_vector_resize(kmp_node_vector_t *vector, kmp_int32 size) {
+ const kmp_int32 num_of_blocks = KMP_ATOMIC_LD_RLX(&vector->num_of_blocks);
+ kmp_int32 current_size = vector->block_size * num_of_blocks;
+ if (current_size >= size)
+ return;
+
+ const kmp_int32 new_block_nums = 1 + ((size - 1) / vector->block_size);
+ kmp_node_info_t **old_blocks = vector->blocks;
+ kmp_node_info_t **new_blocks = (kmp_node_info_t **)__kmp_allocate(
+ new_block_nums * sizeof(kmp_node_info_t *));
+ KMP_MEMCPY(new_blocks, old_blocks, num_of_blocks * sizeof(kmp_node_info_t *));
+ for (kmp_int i = num_of_blocks; i < new_block_nums; i++) {
+ kmp_node_info_t *this_record_map = (kmp_node_info_t *)__kmp_allocate(
+ vector->block_size * sizeof(kmp_node_info_t));
+ for (kmp_int32 j = 0; j < vector->block_size; j++) {
+ this_record_map[j].task = nullptr;
+ this_record_map[j].parent_task = nullptr;
+ this_record_map[j].successors = nullptr;
+ this_record_map[j].nsuccessors = 0;
+ this_record_map[j].npredecessors = 0;
+ this_record_map[j].successors_size = 0;
+ KMP_ATOMIC_ST_RLX(&this_record_map[j].npredecessors_counter, 0);
+ }
+ new_blocks[i] = this_record_map;
+ }
+ __kmp_acquire_bootstrap_lock(&vector->lock);
+ current_size = vector->block_size * KMP_ATOMIC_LD_RLX(&vector->num_of_blocks);
+ if (current_size >= size) {
+ for (kmp_int i = num_of_blocks; i < new_block_nums; i++)
+ __kmp_free(new_blocks[i]);
+ __kmp_free(new_blocks);
+ } else {
+ vector->blocks = new_blocks;
+ KMP_ATOMIC_ST_REL(&vector->num_of_blocks, new_block_nums);
+ __kmp_free(old_blocks);
+ }
+ __kmp_release_bootstrap_lock(&vector->lock);
+}
+
+kmp_node_info_t *kmp_node_vector_get(kmp_node_vector_t *vector, kmp_int32 id) {
+ const kmp_int32 num_of_blocks = KMP_ATOMIC_LD_RLX(&vector->num_of_blocks);
+ const kmp_int32 vector_size = vector->block_size * num_of_blocks;
+ if (id >= vector_size)
+ return nullptr;
+ const kmp_int32 block_idx = id / vector->block_size;
+ const kmp_int32 node_idx = id % vector->block_size;
+ __kmp_acquire_bootstrap_lock(&vector->lock);
+ kmp_node_info_t *ret = &(vector->blocks[block_idx][node_idx]);
+ __kmp_release_bootstrap_lock(&vector->lock);
+ return ret;
+}
+
+void kmp_node_vector_free(kmp_node_vector_t *vector) {
+ __kmp_acquire_bootstrap_lock(&vector->lock);
+ for (int i = 0; i < vector->num_of_blocks; i++)
+ __kmp_free(vector->blocks[i]);
+ __kmp_free(vector->blocks);
+ __kmp_release_bootstrap_lock(&vector->lock);
+}
+#endif
+
/*!
@ingroup TASKING
@param ptask Task which execution is completed
@@ -5260,7 +5325,9 @@ void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg, kmp_int32 gtid) {
kmp_safe_raii_file_t tdg_file(file_name, "w");
kmp_int32 num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
- kmp_int32 map_size = tdg->map_size;
+ /* kmp_int32 map_size = tdg->map_size; */
+ kmp_int32 map_size =
+ tdg->record_map->block_size * tdg->record_map->num_of_blocks;
fprintf(tdg_file,
"digraph TDG {\n"
" compound=true\n"
@@ -5273,11 +5340,12 @@ void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg, kmp_int32 gtid) {
fprintf(tdg_file, " }\n");
kmp_int32 tasks = 0;
for (kmp_int32 i = 0; tasks < num_tasks && i < map_size; i++) {
- if (tdg->record_map[i].task == nullptr)
+ kmp_node_info_t *node = kmp_node_vector_get(tdg->record_map, i);
+ if (node->task == nullptr)
continue;
tasks++;
- kmp_int32 nsuccessors = tdg->record_map[i].nsuccessors;
- kmp_int32 *successors = tdg->record_map[i].successors;
+ kmp_int32 nsuccessors = node->nsuccessors;
+ kmp_int32 *successors = node->successors;
if (nsuccessors > 0) {
for (kmp_int32 j = 0; j < nsuccessors; j++)
fprintf(tdg_file, " %d -> %d \n", i, successors[j]);
@@ -5295,10 +5363,12 @@ void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
KMP_DEBUG_ASSERT(tdg->tdg_status == KMP_TDG_READY);
KA_TRACE(10, ("__kmp_exec_tdg(enter): T#%d tdg_id=%d num_roots=%d\n", gtid,
tdg->tdg_id, tdg->num_roots));
- kmp_node_info_t *this_record_map = tdg->record_map;
+ /* kmp_node_info_t *this_record_map = tdg->record_map; */
kmp_int32 *this_root_tasks = tdg->root_tasks;
kmp_int32 this_num_roots = tdg->num_roots;
kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
+ kmp_int32 map_size =
+ tdg->record_map->block_size * tdg->record_map->num_of_blocks;
kmp_int32 tasks = 0;
kmp_info_t *thread = __kmp_threads[gtid];
@@ -5308,21 +5378,20 @@ void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
__kmpc_taskred_init(gtid, tdg->rec_num_taskred, tdg->rec_taskred_data);
}
- for (kmp_int32 j = 0; j < tdg->map_size && tasks < this_num_tasks; j++) {
- if (this_record_map[j].task == nullptr)
+ for (kmp_int32 j = 0; j < map_size && tasks < this_num_tasks; j++) {
+ kmp_node_info_t *node = kmp_node_vector_get(tdg->record_map, j);
+ if (node->task == nullptr)
continue;
tasks++;
- kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(this_record_map[j].task);
+ kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(node->task);
td->td_parent = parent_task;
- this_record_map[j].parent_task = parent_task;
+ node->parent_task = parent_task;
- kmp_taskgroup_t *parent_taskgroup =
- this_record_map[j].parent_task->td_taskgroup;
+ kmp_taskgroup_t *parent_taskgroup = node->parent_task->td_taskgroup;
- KMP_ATOMIC_ST_RLX(&this_record_map[j].npredecessors_counter,
- this_record_map[j].npredecessors);
- KMP_ATOMIC_INC(&this_record_map[j].parent_task->td_incomplete_child_tasks);
+ KMP_ATOMIC_ST_RLX(&node->npredecessors_counter, node->npredecessors);
+ KMP_ATOMIC_INC(&node->parent_task->td_incomplete_child_tasks);
if (parent_taskgroup) {
KMP_ATOMIC_INC(&parent_taskgroup->count);
@@ -5332,12 +5401,14 @@ void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
// If the parent doesnt have a taskgroup, remove it from the task
td->td_taskgroup = nullptr;
}
- if (this_record_map[j].parent_task->td_flags.tasktype == TASK_EXPLICIT)
- KMP_ATOMIC_INC(&this_record_map[j].parent_task->td_allocated_child_tasks);
+ if (node->parent_task->td_flags.tasktype == TASK_EXPLICIT)
+ KMP_ATOMIC_INC(&node->parent_task->td_allocated_child_tasks);
}
for (kmp_int32 j = 0; j < this_num_roots; ++j) {
- __kmp_omp_task(gtid, this_record_map[this_root_tasks[j]].task, true);
+ kmp_node_info_t *node =
+ kmp_node_vector_get(tdg->record_map, this_root_tasks[j]);
+ __kmp_omp_task(gtid, node->task, true);
}
KA_TRACE(10, ("__kmp_exec_tdg(exit): T#%d tdg_id=%d num_roots=%d\n", gtid,
tdg->tdg_id, tdg->num_roots));
@@ -5356,28 +5427,14 @@ static inline void __kmp_start_record(kmp_int32 gtid,
__kmp_global_tdgs[__kmp_curr_tdg_idx] = tdg;
// Initializing the TDG structure
tdg->tdg_id = tdg_id;
- tdg->map_size = INIT_MAPSIZE;
+ /* tdg->map_size = INIT_MAPSIZE; */
tdg->num_roots = -1;
tdg->root_tasks = nullptr;
tdg->tdg_status = KMP_TDG_RECORDING;
tdg->rec_num_taskred = 0;
tdg->rec_taskred_data = nullptr;
KMP_ATOMIC_ST_RLX(&tdg->num_tasks, 0);
-
- // Initializing the list of nodes in this TDG
- kmp_node_info_t *this_record_map =
- (kmp_node_info_t *)__kmp_allocate(INIT_MAPSIZE * sizeof(kmp_node_info_t));
- for (kmp_int32 i = 0; i < INIT_MAPSIZE; i++) {
- this_record_map[i].task = nullptr;
- this_record_map[i].parent_task = nullptr;
- this_record_map[i].successors = nullptr;
- this_record_map[i].nsuccessors = 0;
- this_record_map[i].npredecessors = 0;
- this_record_map[i].successors_size = 0;
- KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter, 0);
- }
-
- __kmp_global_tdgs[__kmp_curr_tdg_idx]->record_map = this_record_map;
+ tdg->record_map = kmp_alloc_tdg_vector(INIT_MAPSIZE);
}
// __kmpc_start_record_task: Wrapper around __kmp_start_record to mark
@@ -5427,27 +5484,28 @@ kmp_int32 __kmpc_start_record_task(ident_t *loc_ref, kmp_int32 gtid,
// tdg: Pointer to the TDG
void __kmp_end_record(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
// Store roots
- kmp_node_info_t *this_record_map = tdg->record_map;
kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
kmp_int32 *this_root_tasks =
(kmp_int32 *)__kmp_allocate(this_num_tasks * sizeof(kmp_int32));
- kmp_int32 this_map_size = tdg->map_size;
+ kmp_int32 this_map_size =
+ tdg->record_map->block_size * tdg->record_map->num_of_blocks;
kmp_int32 this_num_roots = 0;
kmp_info_t *thread = __kmp_threads[gtid];
kmp_int32 tasks = 0;
for (kmp_int32 i = 0; tasks < this_num_tasks && i < this_map_size; i++) {
- if (this_record_map[i].task == nullptr) {
+ kmp_node_info_t *node = kmp_node_vector_get(tdg->record_map, i);
+ if (node->task == nullptr) {
continue;
}
tasks++;
- if (this_record_map[i].npredecessors == 0) {
+ if (node->npredecessors == 0) {
this_root_tasks[this_num_roots++] = i;
}
}
// Update with roots info and mapsize
- tdg->map_size = this_map_size;
+ /* tdg->map_size = this_map_size; */
tdg->num_roots = this_num_roots;
tdg->root_tasks = this_root_tasks;
KMP_DEBUG_ASSERT(tdg->tdg_status == KMP_TDG_RECORDING);
@@ -5459,9 +5517,14 @@ void __kmp_end_record(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
}
// Reset predecessor counter
- for (kmp_int32 i = 0; i < this_num_tasks; i++) {
- KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter,
- this_record_map[i].npredecessors);
+ tasks = 0;
+ for (kmp_int32 i = 0; tasks < this_num_tasks && i < this_map_size; i++) {
+ kmp_node_info_t *node = kmp_node_vector_get(tdg->record_map, i);
+ if (node->task == nullptr) {
+ continue;
+ }
+ tasks++;
+ KMP_ATOMIC_ST_RLX(&node->npredecessors_counter, node->npredecessors);
}
if (__kmp_tdg_dot)
>From 32edeb63e80cd10f9a31cca7492f0a7483a6719f Mon Sep 17 00:00:00 2001
From: jpinot <jsp.pinot at gmail.com>
Date: Wed, 1 Oct 2025 16:00:41 +0200
Subject: [PATCH 7/7] [openmp] Delete graph_lock
---
openmp/runtime/src/kmp.h | 2 --
openmp/runtime/src/kmp_taskdeps.cpp | 2 --
openmp/runtime/src/kmp_tasking.cpp | 2 --
3 files changed, 6 deletions(-)
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 72dbbba58ad2d..6b28b504392a3 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -2676,8 +2676,6 @@ typedef struct kmp_tdg_info {
KMP_TDG_NONE; // Status of the TDG (recording, ready...)
std::atomic<kmp_int32> num_tasks; // Number of TDG nodes
std::atomic<kmp_int32> tdg_task_id_next; // Task id of next node
- kmp_bootstrap_lock_t
- graph_lock; // Protect graph attributes when updated via taskloop_recur
// Taskloop reduction related
void *rec_taskred_data; // Data to pass to __kmpc_task_reduction_init or
// __kmpc_taskred_init
diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index 49b9e1c1adca3..2d1256431752a 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -708,12 +708,10 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
kmp_node_vector_resize(tdg->record_map, new_taskdata->td_tdg_task_id * 2);
node = kmp_node_vector_get(tdg->record_map, new_taskdata->td_tdg_task_id);
}
- __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
node->task = new_task;
node->parent_task = new_taskdata->td_parent;
new_taskdata->td_tdg_node_info = node;
KMP_ATOMIC_INC(&tdg->num_tasks);
- __kmp_release_bootstrap_lock(&tdg->graph_lock);
}
#endif
#if OMPT_SUPPORT
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 8aafe939a244b..307e69ba992b4 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1808,12 +1808,10 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
node = kmp_node_vector_get(tdg->record_map, new_taskdata->td_tdg_task_id);
}
if (node->task == nullptr) {
- __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
node->task = new_task;
node->parent_task = new_taskdata->td_parent;
new_taskdata->td_tdg_node_info = node;
KMP_ATOMIC_INC(&tdg->num_tasks);
- __kmp_release_bootstrap_lock(&tdg->graph_lock);
}
}
#endif
More information about the Openmp-commits
mailing list