[Openmp-commits] [openmp] [OpenMP] Fix td_tdg_task_id underflow when taskloop and taskgraph (PR #150877)

Josep Pinot via Openmp-commits openmp-commits at lists.llvm.org
Wed Oct 1 07:02:21 PDT 2025


https://github.com/jpinot updated https://github.com/llvm/llvm-project/pull/150877

>From 4b4938042950dd7898253509bf511b1bf18a19d0 Mon Sep 17 00:00:00 2001
From: jpinot <josep.pinot at bsc.es>
Date: Thu, 17 Jul 2025 12:22:01 +0200
Subject: [PATCH 1/7] [OpenMP] Fix td_tdg_task_id underflow with taskloop and
 taskgraph

This patch addresses an issue where the td_tdg_task_id could underflow,
leading to a negative task ID, when a taskloop region was encountered
before a taskgraph clause.

This change allows surious holes in the record_map.
---
 openmp/runtime/src/kmp.h           |  1 +
 openmp/runtime/src/kmp_tasking.cpp | 30 ++++++++++++++++++++----------
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 83afc0e83f231..1f909cd3d3916 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -2667,6 +2667,7 @@ typedef struct kmp_tdg_info {
   kmp_tdg_status_t tdg_status =
       KMP_TDG_NONE; // Status of the TDG (recording, ready...)
   std::atomic<kmp_int32> num_tasks; // Number of TDG nodes
+  std::atomic<kmp_int32> tdg_task_id_next; // Task id of next node
   kmp_bootstrap_lock_t
       graph_lock; // Protect graph attributes when updated via taskloop_recur
   // Taskloop reduction related
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 37836fb457537..fb54baf7e4e07 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1437,7 +1437,7 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
     taskdata->is_taskgraph = 1;
     taskdata->tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
     taskdata->td_task_id = KMP_GEN_TASK_ID();
-    taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id);
+    taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&tdg->tdg_task_id_next);
   }
 #endif
   KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
@@ -4465,7 +4465,8 @@ kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src
 #if OMPX_TASKGRAPH
   if (taskdata->is_taskgraph && !taskloop_recur &&
       __kmp_tdg_is_recording(taskdata_src->tdg->tdg_status))
-    taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id);
+    taskdata->td_tdg_task_id =
+        KMP_ATOMIC_INC(&taskdata_src->tdg->tdg_task_id_next);
 #endif
   taskdata->td_task_id = KMP_GEN_TASK_ID();
   if (task->shareds != NULL) { // need setup shareds pointer
@@ -4979,10 +4980,6 @@ static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
 #endif
     __kmpc_taskgroup(loc, gtid);
   }
-
-#if OMPX_TASKGRAPH
-  KMP_ATOMIC_DEC(&__kmp_tdg_task_id);
-#endif
   // =========================================================================
   // calculate loop parameters
   kmp_taskloop_bounds_t task_bounds(task, lb, ub);
@@ -5263,6 +5260,7 @@ void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg, kmp_int32 gtid) {
   kmp_safe_raii_file_t tdg_file(file_name, "w");
 
   kmp_int32 num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
+  kmp_int32 map_size = tdg->map_size;
   fprintf(tdg_file,
           "digraph TDG {\n"
           "   compound=true\n"
@@ -5273,7 +5271,11 @@ void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg, kmp_int32 gtid) {
     fprintf(tdg_file, "      %d[style=bold]\n", i);
   }
   fprintf(tdg_file, "   }\n");
-  for (kmp_int32 i = 0; i < num_tasks; i++) {
+  kmp_int32 tasks = 0;
+  for (kmp_int32 i = 0; tasks < num_tasks && i < map_size; i++) {
+    if (tdg->record_map[i].task == nullptr)
+      continue;
+    tasks++;
     kmp_int32 nsuccessors = tdg->record_map[i].nsuccessors;
     kmp_int32 *successors = tdg->record_map[i].successors;
     if (nsuccessors > 0) {
@@ -5297,6 +5299,7 @@ void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
   kmp_int32 *this_root_tasks = tdg->root_tasks;
   kmp_int32 this_num_roots = tdg->num_roots;
   kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
+  kmp_int32 tasks = 0;
 
   kmp_info_t *thread = __kmp_threads[gtid];
   kmp_taskdata_t *parent_task = thread->th.th_current_task;
@@ -5305,7 +5308,10 @@ void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
     __kmpc_taskred_init(gtid, tdg->rec_num_taskred, tdg->rec_taskred_data);
   }
 
-  for (kmp_int32 j = 0; j < this_num_tasks; j++) {
+  for (kmp_int32 j = 0; j < tdg->map_size && tasks < this_num_tasks; j++) {
+    if (this_record_map[j].task == nullptr)
+      continue;
+    tasks++;
     kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(this_record_map[j].task);
 
     td->td_parent = parent_task;
@@ -5429,8 +5435,13 @@ void __kmp_end_record(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
   kmp_int32 this_map_size = tdg->map_size;
   kmp_int32 this_num_roots = 0;
   kmp_info_t *thread = __kmp_threads[gtid];
+  kmp_int32 tasks = 0;
 
-  for (kmp_int32 i = 0; i < this_num_tasks; i++) {
+  for (kmp_int32 i = 0; tasks < this_num_tasks && i < this_map_size; i++) {
+    if (this_record_map[i].task == nullptr) {
+      continue;
+    }
+    tasks++;
     if (this_record_map[i].npredecessors == 0) {
       this_root_tasks[this_num_roots++] = i;
     }
@@ -5453,7 +5464,6 @@ void __kmp_end_record(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
     KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter,
                       this_record_map[i].npredecessors);
   }
-  KMP_ATOMIC_ST_RLX(&__kmp_tdg_task_id, 0);
 
   if (__kmp_tdg_dot)
     __kmp_print_tdg_dot(tdg, gtid);

>From c8fdb6411e098205e6f2c76a54778754dd560ba2 Mon Sep 17 00:00:00 2001
From: jpinot <josep.pinot at bsc.es>
Date: Wed, 3 Sep 2025 10:10:41 +0200
Subject: [PATCH 2/7] [openmp] Remove taskgraph successors alloction before is
 needed

Delayed allocation of successors in kmp_node until the array is needed,
removing the small allocation when a taskgraph node is created or
resized.
---
 openmp/runtime/src/kmp_taskdeps.cpp | 16 +++++++++-------
 openmp/runtime/src/kmp_tasking.cpp  | 13 +++++--------
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index abbca752f0587..8215c4b318bb5 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -244,13 +244,17 @@ static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source,
     if (!exists) {
       if (source_info->nsuccessors >= source_info->successors_size) {
         kmp_uint old_size = source_info->successors_size;
-        source_info->successors_size = 2 * source_info->successors_size;
+        source_info->successors_size = old_size == 0
+                                           ? __kmp_successors_size
+                                           : 2 * source_info->successors_size;
         kmp_int32 *old_succ_ids = source_info->successors;
         kmp_int32 *new_succ_ids = (kmp_int32 *)__kmp_allocate(
             source_info->successors_size * sizeof(kmp_int32));
-        KMP_MEMCPY(new_succ_ids, old_succ_ids, old_size * sizeof(kmp_int32));
+        if (old_succ_ids) {
+          KMP_MEMCPY(new_succ_ids, old_succ_ids, old_size * sizeof(kmp_int32));
+          __kmp_free(old_succ_ids);
+        }
         source_info->successors = new_succ_ids;
-        __kmp_free(old_succ_ids);
       }
 
       source_info->successors[source_info->nsuccessors] =
@@ -715,13 +719,11 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
         __kmp_free(old_record);
 
         for (kmp_int i = old_size; i < new_size; i++) {
-          kmp_int32 *successorsList = (kmp_int32 *)__kmp_allocate(
-              __kmp_successors_size * sizeof(kmp_int32));
           new_record[i].task = nullptr;
-          new_record[i].successors = successorsList;
+          new_record[i].successors = nullptr;
           new_record[i].nsuccessors = 0;
           new_record[i].npredecessors = 0;
-          new_record[i].successors_size = __kmp_successors_size;
+          new_record[i].successors_size = 0;
           KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
         }
         // update the size at the end, so that we avoid other
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index fb54baf7e4e07..c40dcf9b85b4a 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1817,13 +1817,11 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
         __kmp_free(old_record);
 
         for (kmp_int i = old_size; i < new_size; i++) {
-          kmp_int32 *successorsList = (kmp_int32 *)__kmp_allocate(
-              __kmp_successors_size * sizeof(kmp_int32));
           new_record[i].task = nullptr;
-          new_record[i].successors = successorsList;
+          new_record[i].successors = nullptr;
           new_record[i].nsuccessors = 0;
           new_record[i].npredecessors = 0;
-          new_record[i].successors_size = __kmp_successors_size;
+          new_record[i].successors_size = 0;
           KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
         }
         // update the size at the end, so that we avoid other
@@ -5368,13 +5366,12 @@ static inline void __kmp_start_record(kmp_int32 gtid,
   kmp_node_info_t *this_record_map =
       (kmp_node_info_t *)__kmp_allocate(INIT_MAPSIZE * sizeof(kmp_node_info_t));
   for (kmp_int32 i = 0; i < INIT_MAPSIZE; i++) {
-    kmp_int32 *successorsList =
-        (kmp_int32 *)__kmp_allocate(__kmp_successors_size * sizeof(kmp_int32));
     this_record_map[i].task = nullptr;
-    this_record_map[i].successors = successorsList;
+    this_record_map[i].parent_task = nullptr;
+    this_record_map[i].successors = nullptr;
     this_record_map[i].nsuccessors = 0;
     this_record_map[i].npredecessors = 0;
-    this_record_map[i].successors_size = __kmp_successors_size;
+    this_record_map[i].successors_size = 0;
     KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter, 0);
   }
 

>From 8936d9b1549150a0ac979222ff8ddd2e3f2b27be Mon Sep 17 00:00:00 2001
From: jpinot <josep.pinot at bsc.es>
Date: Wed, 3 Sep 2025 10:12:19 +0200
Subject: [PATCH 3/7] [openmp] Fix perent_task unitialization when allocating
 kmp_tdg_info

---
 openmp/runtime/src/kmp_taskdeps.cpp | 1 +
 openmp/runtime/src/kmp_tasking.cpp  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index 8215c4b318bb5..8610143a32f0b 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -720,6 +720,7 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
 
         for (kmp_int i = old_size; i < new_size; i++) {
           new_record[i].task = nullptr;
+          new_record[i].parent_task = nullptr;
           new_record[i].successors = nullptr;
           new_record[i].nsuccessors = 0;
           new_record[i].npredecessors = 0;
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index c40dcf9b85b4a..2e77cf632dc94 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1818,6 +1818,7 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
 
         for (kmp_int i = old_size; i < new_size; i++) {
           new_record[i].task = nullptr;
+          new_record[i].parent_task = nullptr;
           new_record[i].successors = nullptr;
           new_record[i].nsuccessors = 0;
           new_record[i].npredecessors = 0;

>From bf1f0df127cd49f565e4708c97a9606932468545 Mon Sep 17 00:00:00 2001
From: jpinot <josep.pinot at bsc.es>
Date: Tue, 9 Sep 2025 10:09:47 +0200
Subject: [PATCH 4/7] [openmp] Fix locking when expanding recorded tdg

---
 openmp/runtime/src/kmp_taskdeps.cpp | 50 ++++++++++++++---------------
 openmp/runtime/src/kmp_tasking.cpp  |  8 ++---
 2 files changed, 27 insertions(+), 31 deletions(-)

diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index 8610143a32f0b..15e7585a65617 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -704,39 +704,37 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
       __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) {
     kmp_tdg_info_t *tdg = new_taskdata->tdg;
     // extend record_map if needed
+    __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
     if (new_taskdata->td_tdg_task_id >= tdg->map_size) {
-      __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
-      if (new_taskdata->td_tdg_task_id >= tdg->map_size) {
-        kmp_uint old_size = tdg->map_size;
-        kmp_uint new_size = old_size * 2;
-        kmp_node_info_t *old_record = tdg->record_map;
-        kmp_node_info_t *new_record = (kmp_node_info_t *)__kmp_allocate(
-            new_size * sizeof(kmp_node_info_t));
-        KMP_MEMCPY(new_record, tdg->record_map,
-                   old_size * sizeof(kmp_node_info_t));
-        tdg->record_map = new_record;
-
-        __kmp_free(old_record);
-
-        for (kmp_int i = old_size; i < new_size; i++) {
-          new_record[i].task = nullptr;
-          new_record[i].parent_task = nullptr;
-          new_record[i].successors = nullptr;
-          new_record[i].nsuccessors = 0;
-          new_record[i].npredecessors = 0;
-          new_record[i].successors_size = 0;
-          KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
-        }
-        // update the size at the end, so that we avoid other
-        // threads use old_record while map_size is already updated
-        tdg->map_size = new_size;
+      kmp_uint old_size = tdg->map_size;
+      kmp_uint new_size = old_size * 2;
+      kmp_node_info_t *old_record = tdg->record_map;
+      kmp_node_info_t *new_record =
+          (kmp_node_info_t *)__kmp_allocate(new_size * sizeof(kmp_node_info_t));
+      KMP_MEMCPY(new_record, tdg->record_map,
+                 old_size * sizeof(kmp_node_info_t));
+      tdg->record_map = new_record;
+
+      __kmp_free(old_record);
+
+      for (kmp_int i = old_size; i < new_size; i++) {
+        new_record[i].task = nullptr;
+        new_record[i].parent_task = nullptr;
+        new_record[i].successors = nullptr;
+        new_record[i].nsuccessors = 0;
+        new_record[i].npredecessors = 0;
+        new_record[i].successors_size = 0;
+        KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
       }
-      __kmp_release_bootstrap_lock(&tdg->graph_lock);
+      // update the size at the end, so that we avoid other
+      // threads use old_record while map_size is already updated
+      tdg->map_size = new_size;
     }
     tdg->record_map[new_taskdata->td_tdg_task_id].task = new_task;
     tdg->record_map[new_taskdata->td_tdg_task_id].parent_task =
         new_taskdata->td_parent;
     KMP_ATOMIC_INC(&tdg->num_tasks);
+    __kmp_release_bootstrap_lock(&tdg->graph_lock);
   }
 #endif
 #if OMPT_SUPPORT
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 2e77cf632dc94..f735e4208ba48 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1800,7 +1800,8 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
       __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) {
     kmp_tdg_info_t *tdg = new_taskdata->tdg;
     // extend the record_map if needed
-    if (new_taskdata->td_tdg_task_id >= new_taskdata->tdg->map_size) {
+    if (new_taskdata->td_tdg_task_id >= tdg->map_size ||
+        tdg->record_map[new_taskdata->td_tdg_task_id].task == nullptr) {
       __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
       // map_size could have been updated by another thread if recursive
       // taskloop
@@ -1829,14 +1830,11 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
         // threads use old_record while map_size is already updated
         tdg->map_size = new_size;
       }
-      __kmp_release_bootstrap_lock(&tdg->graph_lock);
-    }
-    // record a task
-    if (tdg->record_map[new_taskdata->td_tdg_task_id].task == nullptr) {
       tdg->record_map[new_taskdata->td_tdg_task_id].task = new_task;
       tdg->record_map[new_taskdata->td_tdg_task_id].parent_task =
           new_taskdata->td_parent;
       KMP_ATOMIC_INC(&tdg->num_tasks);
+      __kmp_release_bootstrap_lock(&tdg->graph_lock);
     }
   }
 #endif

>From 163b1ebb32ba4af3fa6393c04bbd21a537ed4f1a Mon Sep 17 00:00:00 2001
From: jpinot <jsp.pinot at gmail.com>
Date: Fri, 19 Sep 2025 13:50:42 +0200
Subject: [PATCH 5/7] [openmp] Add tdg node pointer in kmp_taskdata_t

Add pointer to node represeting the task in the TDG, the way avoids
locking access to record_map every time a node need to be accessed.
---
 openmp/runtime/src/kmp.h            | 1 +
 openmp/runtime/src/kmp_taskdeps.cpp | 8 ++++----
 openmp/runtime/src/kmp_taskdeps.h   | 2 +-
 openmp/runtime/src/kmp_tasking.cpp  | 3 +++
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 1f909cd3d3916..d8412e30f6e7d 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -2805,6 +2805,7 @@ struct kmp_taskdata { /* aligned during dynamic allocation       */
 #if OMPX_TASKGRAPH
   bool is_taskgraph = 0; // whether the task is within a TDG
   kmp_tdg_info_t *tdg; // used to associate task with a TDG
+  kmp_node_info_t *td_tdg_node_info; // node representing the task's in the TDG
   kmp_int32 td_tdg_task_id; // local task id in its TDG
 #endif
   kmp_target_data_t td_target_data;
diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index 15e7585a65617..f1c2fbf32a854 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -232,8 +232,7 @@ static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source,
   }
   if (task_sink->is_taskgraph &&
       __kmp_tdg_is_recording(task_sink->tdg->tdg_status)) {
-    kmp_node_info_t *source_info =
-        &task_sink->tdg->record_map[task_source->td_tdg_task_id];
+    kmp_node_info_t *source_info = task_source->td_tdg_node_info;
     bool exists = false;
     for (int i = 0; i < source_info->nsuccessors; i++) {
       if (source_info->successors[i] == task_sink->td_tdg_task_id) {
@@ -261,8 +260,7 @@ static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source,
           task_sink->td_tdg_task_id;
       source_info->nsuccessors++;
 
-      kmp_node_info_t *sink_info =
-          &(task_sink->tdg->record_map[task_sink->td_tdg_task_id]);
+      kmp_node_info_t *sink_info = task_sink->td_tdg_node_info;
       sink_info->npredecessors++;
     }
   }
@@ -733,6 +731,8 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
     tdg->record_map[new_taskdata->td_tdg_task_id].task = new_task;
     tdg->record_map[new_taskdata->td_tdg_task_id].parent_task =
         new_taskdata->td_parent;
+    new_taskdata->td_tdg_node_info =
+        &tdg->record_map[new_taskdata->td_tdg_task_id];
     KMP_ATOMIC_INC(&tdg->num_tasks);
     __kmp_release_bootstrap_lock(&tdg->graph_lock);
   }
diff --git a/openmp/runtime/src/kmp_taskdeps.h b/openmp/runtime/src/kmp_taskdeps.h
index f6bfb39218a21..d8e000f75cecb 100644
--- a/openmp/runtime/src/kmp_taskdeps.h
+++ b/openmp/runtime/src/kmp_taskdeps.h
@@ -98,7 +98,7 @@ static inline void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) {
 
 #if OMPX_TASKGRAPH
   if (task->is_taskgraph && !(__kmp_tdg_is_recording(task->tdg->tdg_status))) {
-    kmp_node_info_t *TaskInfo = &(task->tdg->record_map[task->td_tdg_task_id]);
+    kmp_node_info_t *TaskInfo = task->td_tdg_node_info;
 
     for (int i = 0; i < TaskInfo->nsuccessors; i++) {
       kmp_int32 successorNumber = TaskInfo->successors[i];
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index f735e4208ba48..38f2093b3ce2c 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1394,6 +1394,7 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
   taskdata->td_flags.onced = 0;
   taskdata->is_taskgraph = 0;
   taskdata->tdg = nullptr;
+  taskdata->td_tdg_node_info = nullptr;
 #endif
   KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
   // start at one because counts current task and children
@@ -1833,6 +1834,8 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
       tdg->record_map[new_taskdata->td_tdg_task_id].task = new_task;
       tdg->record_map[new_taskdata->td_tdg_task_id].parent_task =
           new_taskdata->td_parent;
+      new_taskdata->td_tdg_node_info =
+          &tdg->record_map[new_taskdata->td_tdg_task_id];
       KMP_ATOMIC_INC(&tdg->num_tasks);
       __kmp_release_bootstrap_lock(&tdg->graph_lock);
     }

>From d18675ed1f6fe28248bbb0ad31cd3cef5b0fa1cb Mon Sep 17 00:00:00 2001
From: jpinot <jsp.pinot at gmail.com>
Date: Fri, 19 Sep 2025 20:28:09 +0200
Subject: [PATCH 6/7] [OpenMP] Add kmp_node_vector for TDG successors

Replaced the fixed-size array for TDG successors with kmp_node_vector,
a custom dynamic vector of kmp_node_info to TDG nodes. This change aims
to mitigate data races during vector resizing by using a block-based
allocation strategy.
---
 openmp/runtime/src/kmp.h            |  12 +-
 openmp/runtime/src/kmp_taskdeps.cpp |  39 ++---
 openmp/runtime/src/kmp_taskdeps.h   |  12 +-
 openmp/runtime/src/kmp_tasking.cpp  | 215 ++++++++++++++++++----------
 4 files changed, 169 insertions(+), 109 deletions(-)

diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index d8412e30f6e7d..72dbbba58ad2d 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -2649,6 +2649,14 @@ typedef struct kmp_node_info {
   kmp_taskdata_t *parent_task; // Parent implicit task
 } kmp_node_info_t;
 
+// Representation of recorded nodes
+typedef struct kmp_node_vector {
+  kmp_node_info_t **blocks;
+  kmp_int32 block_size;
+  std::atomic<kmp_int32> num_of_blocks;
+  kmp_bootstrap_lock_t lock;
+} kmp_node_vector_t;
+
 /// Represent a TDG's current status
 typedef enum kmp_tdg_status {
   KMP_TDG_NONE = 0,
@@ -2660,10 +2668,10 @@ typedef enum kmp_tdg_status {
 typedef struct kmp_tdg_info {
   kmp_int32 tdg_id; // Unique idenfifier of the TDG
   kmp_taskgraph_flags_t tdg_flags; // Flags related to a TDG
-  kmp_int32 map_size; // Number of allocated TDG nodes
+  /* kmp_int32 map_size; // Number of allocated TDG nodes */
   kmp_int32 num_roots; // Number of roots tasks int the TDG
   kmp_int32 *root_tasks; // Array of tasks identifiers that are roots
-  kmp_node_info_t *record_map; // Array of TDG nodes
+  kmp_node_vector_t *record_map; // Array of TDG nodes
   kmp_tdg_status_t tdg_status =
       KMP_TDG_NONE; // Status of the TDG (recording, ready...)
   std::atomic<kmp_int32> num_tasks; // Number of TDG nodes
diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index f1c2fbf32a854..49b9e1c1adca3 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -702,37 +702,16 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
       __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) {
     kmp_tdg_info_t *tdg = new_taskdata->tdg;
     // extend record_map if needed
-    __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
-    if (new_taskdata->td_tdg_task_id >= tdg->map_size) {
-      kmp_uint old_size = tdg->map_size;
-      kmp_uint new_size = old_size * 2;
-      kmp_node_info_t *old_record = tdg->record_map;
-      kmp_node_info_t *new_record =
-          (kmp_node_info_t *)__kmp_allocate(new_size * sizeof(kmp_node_info_t));
-      KMP_MEMCPY(new_record, tdg->record_map,
-                 old_size * sizeof(kmp_node_info_t));
-      tdg->record_map = new_record;
-
-      __kmp_free(old_record);
-
-      for (kmp_int i = old_size; i < new_size; i++) {
-        new_record[i].task = nullptr;
-        new_record[i].parent_task = nullptr;
-        new_record[i].successors = nullptr;
-        new_record[i].nsuccessors = 0;
-        new_record[i].npredecessors = 0;
-        new_record[i].successors_size = 0;
-        KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
-      }
-      // update the size at the end, so that we avoid other
-      // threads use old_record while map_size is already updated
-      tdg->map_size = new_size;
+    kmp_node_info_t *node =
+        kmp_node_vector_get(tdg->record_map, new_taskdata->td_tdg_task_id);
+    if (node == nullptr) {
+      kmp_node_vector_resize(tdg->record_map, new_taskdata->td_tdg_task_id * 2);
+      node = kmp_node_vector_get(tdg->record_map, new_taskdata->td_tdg_task_id);
     }
-    tdg->record_map[new_taskdata->td_tdg_task_id].task = new_task;
-    tdg->record_map[new_taskdata->td_tdg_task_id].parent_task =
-        new_taskdata->td_parent;
-    new_taskdata->td_tdg_node_info =
-        &tdg->record_map[new_taskdata->td_tdg_task_id];
+    __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
+    node->task = new_task;
+    node->parent_task = new_taskdata->td_parent;
+    new_taskdata->td_tdg_node_info = node;
     KMP_ATOMIC_INC(&tdg->num_tasks);
     __kmp_release_bootstrap_lock(&tdg->graph_lock);
   }
diff --git a/openmp/runtime/src/kmp_taskdeps.h b/openmp/runtime/src/kmp_taskdeps.h
index d8e000f75cecb..b67317818a4cf 100644
--- a/openmp/runtime/src/kmp_taskdeps.h
+++ b/openmp/runtime/src/kmp_taskdeps.h
@@ -93,6 +93,13 @@ static inline void __kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h) {
 }
 
 extern void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start);
+#if OMPX_TASKGRAPH
+extern kmp_node_vector_t *kmp_alloc_tdg_vector(kmp_int32 block_size);
+extern kmp_node_info_t *kmp_node_vector_get(kmp_node_vector_t *vector,
+                                            kmp_int32 id);
+extern void kmp_node_vector_resize(kmp_node_vector_t *vector, kmp_int32 size);
+extern void kmp_node_vector_free(kmp_node_vector_t *vector);
+#endif
 
 static inline void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) {
 
@@ -102,7 +109,10 @@ static inline void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) {
 
     for (int i = 0; i < TaskInfo->nsuccessors; i++) {
       kmp_int32 successorNumber = TaskInfo->successors[i];
-      kmp_node_info_t *successor = &(task->tdg->record_map[successorNumber]);
+      kmp_node_info_t *successor =
+          kmp_node_vector_get(task->tdg->record_map, successorNumber);
+      /* kmp_node_info_t *successor = &(task->tdg->record_map[successorNumber]);
+       */
       kmp_int32 npredecessors = KMP_ATOMIC_DEC(&successor->npredecessors_counter) - 1;
       if (successor->task != nullptr && npredecessors == 0) {
         __kmp_omp_task(gtid, successor->task, false);
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 38f2093b3ce2c..8aafe939a244b 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1800,42 +1800,18 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
   if (new_taskdata->is_taskgraph &&
       __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) {
     kmp_tdg_info_t *tdg = new_taskdata->tdg;
-    // extend the record_map if needed
-    if (new_taskdata->td_tdg_task_id >= tdg->map_size ||
-        tdg->record_map[new_taskdata->td_tdg_task_id].task == nullptr) {
+
+    kmp_node_info_t *node =
+        kmp_node_vector_get(tdg->record_map, new_taskdata->td_tdg_task_id);
+    if (node == nullptr) {
+      kmp_node_vector_resize(tdg->record_map, new_taskdata->td_tdg_task_id * 2);
+      node = kmp_node_vector_get(tdg->record_map, new_taskdata->td_tdg_task_id);
+    }
+    if (node->task == nullptr) {
       __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
-      // map_size could have been updated by another thread if recursive
-      // taskloop
-      if (new_taskdata->td_tdg_task_id >= tdg->map_size) {
-        kmp_uint old_size = tdg->map_size;
-        kmp_uint new_size = old_size * 2;
-        kmp_node_info_t *old_record = tdg->record_map;
-        kmp_node_info_t *new_record = (kmp_node_info_t *)__kmp_allocate(
-            new_size * sizeof(kmp_node_info_t));
-
-        KMP_MEMCPY(new_record, old_record, old_size * sizeof(kmp_node_info_t));
-        tdg->record_map = new_record;
-
-        __kmp_free(old_record);
-
-        for (kmp_int i = old_size; i < new_size; i++) {
-          new_record[i].task = nullptr;
-          new_record[i].parent_task = nullptr;
-          new_record[i].successors = nullptr;
-          new_record[i].nsuccessors = 0;
-          new_record[i].npredecessors = 0;
-          new_record[i].successors_size = 0;
-          KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
-        }
-        // update the size at the end, so that we avoid other
-        // threads use old_record while map_size is already updated
-        tdg->map_size = new_size;
-      }
-      tdg->record_map[new_taskdata->td_tdg_task_id].task = new_task;
-      tdg->record_map[new_taskdata->td_tdg_task_id].parent_task =
-          new_taskdata->td_parent;
-      new_taskdata->td_tdg_node_info =
-          &tdg->record_map[new_taskdata->td_tdg_task_id];
+      node->task = new_task;
+      node->parent_task = new_taskdata->td_parent;
+      new_taskdata->td_tdg_node_info = node;
       KMP_ATOMIC_INC(&tdg->num_tasks);
       __kmp_release_bootstrap_lock(&tdg->graph_lock);
     }
@@ -4334,6 +4310,95 @@ void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start = 0) {
   }
 }
 
+#if OMPX_TASKGRAPH
+kmp_node_vector_t *kmp_alloc_tdg_vector(kmp_int32 block_size) {
+  constexpr kmp_int32 block_nums = 1;
+  kmp_node_vector_t *vector =
+      (kmp_node_vector_t *)__kmp_allocate(sizeof(kmp_node_vector_t));
+  kmp_node_info_t **blocks = (kmp_node_info_t **)__kmp_allocate(
+      block_nums * sizeof(kmp_node_info_t *));
+  for (kmp_int32 i = 0; i < block_nums; i++) {
+    kmp_node_info_t *this_record_map =
+        (kmp_node_info_t *)__kmp_allocate(block_size * sizeof(kmp_node_info_t));
+    for (kmp_int32 j = 0; j < block_size; j++) {
+      this_record_map[j].task = nullptr;
+      this_record_map[j].parent_task = nullptr;
+      this_record_map[j].successors = nullptr;
+      this_record_map[j].nsuccessors = 0;
+      this_record_map[j].npredecessors = 0;
+      this_record_map[j].successors_size = 0;
+      KMP_ATOMIC_ST_RLX(&this_record_map[j].npredecessors_counter, 0);
+    }
+    blocks[i] = this_record_map;
+  }
+  vector->blocks = blocks;
+  vector->block_size = block_size;
+  vector->num_of_blocks = block_nums;
+
+  return vector;
+}
+
+void kmp_node_vector_resize(kmp_node_vector_t *vector, kmp_int32 size) {
+  const kmp_int32 num_of_blocks = KMP_ATOMIC_LD_RLX(&vector->num_of_blocks);
+  kmp_int32 current_size = vector->block_size * num_of_blocks;
+  if (current_size >= size)
+    return;
+
+  const kmp_int32 new_block_nums = 1 + ((size - 1) / vector->block_size);
+  kmp_node_info_t **old_blocks = vector->blocks;
+  kmp_node_info_t **new_blocks = (kmp_node_info_t **)__kmp_allocate(
+      new_block_nums * sizeof(kmp_node_info_t *));
+  KMP_MEMCPY(new_blocks, old_blocks, num_of_blocks * sizeof(kmp_node_info_t *));
+  for (kmp_int i = num_of_blocks; i < new_block_nums; i++) {
+    kmp_node_info_t *this_record_map = (kmp_node_info_t *)__kmp_allocate(
+        vector->block_size * sizeof(kmp_node_info_t));
+    for (kmp_int32 j = 0; j < vector->block_size; j++) {
+      this_record_map[j].task = nullptr;
+      this_record_map[j].parent_task = nullptr;
+      this_record_map[j].successors = nullptr;
+      this_record_map[j].nsuccessors = 0;
+      this_record_map[j].npredecessors = 0;
+      this_record_map[j].successors_size = 0;
+      KMP_ATOMIC_ST_RLX(&this_record_map[j].npredecessors_counter, 0);
+    }
+    new_blocks[i] = this_record_map;
+  }
+  __kmp_acquire_bootstrap_lock(&vector->lock);
+  current_size = vector->block_size * KMP_ATOMIC_LD_RLX(&vector->num_of_blocks);
+  if (current_size >= size) {
+    for (kmp_int i = num_of_blocks; i < new_block_nums; i++)
+      __kmp_free(new_blocks[i]);
+    __kmp_free(new_blocks);
+  } else {
+    vector->blocks = new_blocks;
+    KMP_ATOMIC_ST_REL(&vector->num_of_blocks, new_block_nums);
+    __kmp_free(old_blocks);
+  }
+  __kmp_release_bootstrap_lock(&vector->lock);
+}
+
+kmp_node_info_t *kmp_node_vector_get(kmp_node_vector_t *vector, kmp_int32 id) {
+  const kmp_int32 num_of_blocks = KMP_ATOMIC_LD_RLX(&vector->num_of_blocks);
+  const kmp_int32 vector_size = vector->block_size * num_of_blocks;
+  if (id >= vector_size)
+    return nullptr;
+  const kmp_int32 block_idx = id / vector->block_size;
+  const kmp_int32 node_idx = id % vector->block_size;
+  __kmp_acquire_bootstrap_lock(&vector->lock);
+  kmp_node_info_t *ret = &(vector->blocks[block_idx][node_idx]);
+  __kmp_release_bootstrap_lock(&vector->lock);
+  return ret;
+}
+
+void kmp_node_vector_free(kmp_node_vector_t *vector) {
+  __kmp_acquire_bootstrap_lock(&vector->lock);
+  for (int i = 0; i < vector->num_of_blocks; i++)
+    __kmp_free(vector->blocks[i]);
+  __kmp_free(vector->blocks);
+  __kmp_release_bootstrap_lock(&vector->lock);
+}
+#endif
+
 /*!
 @ingroup TASKING
 @param ptask Task which execution is completed
@@ -5260,7 +5325,9 @@ void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg, kmp_int32 gtid) {
   kmp_safe_raii_file_t tdg_file(file_name, "w");
 
   kmp_int32 num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
-  kmp_int32 map_size = tdg->map_size;
+  /* kmp_int32 map_size = tdg->map_size; */
+  kmp_int32 map_size =
+      tdg->record_map->block_size * tdg->record_map->num_of_blocks;
   fprintf(tdg_file,
           "digraph TDG {\n"
           "   compound=true\n"
@@ -5273,11 +5340,12 @@ void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg, kmp_int32 gtid) {
   fprintf(tdg_file, "   }\n");
   kmp_int32 tasks = 0;
   for (kmp_int32 i = 0; tasks < num_tasks && i < map_size; i++) {
-    if (tdg->record_map[i].task == nullptr)
+    kmp_node_info_t *node = kmp_node_vector_get(tdg->record_map, i);
+    if (node->task == nullptr)
       continue;
     tasks++;
-    kmp_int32 nsuccessors = tdg->record_map[i].nsuccessors;
-    kmp_int32 *successors = tdg->record_map[i].successors;
+    kmp_int32 nsuccessors = node->nsuccessors;
+    kmp_int32 *successors = node->successors;
     if (nsuccessors > 0) {
       for (kmp_int32 j = 0; j < nsuccessors; j++)
         fprintf(tdg_file, "   %d -> %d \n", i, successors[j]);
@@ -5295,10 +5363,12 @@ void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
   KMP_DEBUG_ASSERT(tdg->tdg_status == KMP_TDG_READY);
   KA_TRACE(10, ("__kmp_exec_tdg(enter): T#%d tdg_id=%d num_roots=%d\n", gtid,
                 tdg->tdg_id, tdg->num_roots));
-  kmp_node_info_t *this_record_map = tdg->record_map;
+  /* kmp_node_info_t *this_record_map = tdg->record_map; */
   kmp_int32 *this_root_tasks = tdg->root_tasks;
   kmp_int32 this_num_roots = tdg->num_roots;
   kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
+  kmp_int32 map_size =
+      tdg->record_map->block_size * tdg->record_map->num_of_blocks;
   kmp_int32 tasks = 0;
 
   kmp_info_t *thread = __kmp_threads[gtid];
@@ -5308,21 +5378,20 @@ void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
     __kmpc_taskred_init(gtid, tdg->rec_num_taskred, tdg->rec_taskred_data);
   }
 
-  for (kmp_int32 j = 0; j < tdg->map_size && tasks < this_num_tasks; j++) {
-    if (this_record_map[j].task == nullptr)
+  for (kmp_int32 j = 0; j < map_size && tasks < this_num_tasks; j++) {
+    kmp_node_info_t *node = kmp_node_vector_get(tdg->record_map, j);
+    if (node->task == nullptr)
       continue;
     tasks++;
-    kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(this_record_map[j].task);
+    kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(node->task);
 
     td->td_parent = parent_task;
-    this_record_map[j].parent_task = parent_task;
+    node->parent_task = parent_task;
 
-    kmp_taskgroup_t *parent_taskgroup =
-        this_record_map[j].parent_task->td_taskgroup;
+    kmp_taskgroup_t *parent_taskgroup = node->parent_task->td_taskgroup;
 
-    KMP_ATOMIC_ST_RLX(&this_record_map[j].npredecessors_counter,
-                      this_record_map[j].npredecessors);
-    KMP_ATOMIC_INC(&this_record_map[j].parent_task->td_incomplete_child_tasks);
+    KMP_ATOMIC_ST_RLX(&node->npredecessors_counter, node->npredecessors);
+    KMP_ATOMIC_INC(&node->parent_task->td_incomplete_child_tasks);
 
     if (parent_taskgroup) {
       KMP_ATOMIC_INC(&parent_taskgroup->count);
@@ -5332,12 +5401,14 @@ void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
       // If the parent doesnt have a taskgroup, remove it from the task
       td->td_taskgroup = nullptr;
     }
-    if (this_record_map[j].parent_task->td_flags.tasktype == TASK_EXPLICIT)
-      KMP_ATOMIC_INC(&this_record_map[j].parent_task->td_allocated_child_tasks);
+    if (node->parent_task->td_flags.tasktype == TASK_EXPLICIT)
+      KMP_ATOMIC_INC(&node->parent_task->td_allocated_child_tasks);
   }
 
   for (kmp_int32 j = 0; j < this_num_roots; ++j) {
-    __kmp_omp_task(gtid, this_record_map[this_root_tasks[j]].task, true);
+    kmp_node_info_t *node =
+        kmp_node_vector_get(tdg->record_map, this_root_tasks[j]);
+    __kmp_omp_task(gtid, node->task, true);
   }
   KA_TRACE(10, ("__kmp_exec_tdg(exit): T#%d tdg_id=%d num_roots=%d\n", gtid,
                 tdg->tdg_id, tdg->num_roots));
@@ -5356,28 +5427,14 @@ static inline void __kmp_start_record(kmp_int32 gtid,
   __kmp_global_tdgs[__kmp_curr_tdg_idx] = tdg;
   // Initializing the TDG structure
   tdg->tdg_id = tdg_id;
-  tdg->map_size = INIT_MAPSIZE;
+  /* tdg->map_size = INIT_MAPSIZE; */
   tdg->num_roots = -1;
   tdg->root_tasks = nullptr;
   tdg->tdg_status = KMP_TDG_RECORDING;
   tdg->rec_num_taskred = 0;
   tdg->rec_taskred_data = nullptr;
   KMP_ATOMIC_ST_RLX(&tdg->num_tasks, 0);
-
-  // Initializing the list of nodes in this TDG
-  kmp_node_info_t *this_record_map =
-      (kmp_node_info_t *)__kmp_allocate(INIT_MAPSIZE * sizeof(kmp_node_info_t));
-  for (kmp_int32 i = 0; i < INIT_MAPSIZE; i++) {
-    this_record_map[i].task = nullptr;
-    this_record_map[i].parent_task = nullptr;
-    this_record_map[i].successors = nullptr;
-    this_record_map[i].nsuccessors = 0;
-    this_record_map[i].npredecessors = 0;
-    this_record_map[i].successors_size = 0;
-    KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter, 0);
-  }
-
-  __kmp_global_tdgs[__kmp_curr_tdg_idx]->record_map = this_record_map;
+  tdg->record_map = kmp_alloc_tdg_vector(INIT_MAPSIZE);
 }
 
 // __kmpc_start_record_task: Wrapper around __kmp_start_record to mark
@@ -5427,27 +5484,28 @@ kmp_int32 __kmpc_start_record_task(ident_t *loc_ref, kmp_int32 gtid,
 // tdg:    Pointer to the TDG
 void __kmp_end_record(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
   // Store roots
-  kmp_node_info_t *this_record_map = tdg->record_map;
   kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
   kmp_int32 *this_root_tasks =
       (kmp_int32 *)__kmp_allocate(this_num_tasks * sizeof(kmp_int32));
-  kmp_int32 this_map_size = tdg->map_size;
+  kmp_int32 this_map_size =
+      tdg->record_map->block_size * tdg->record_map->num_of_blocks;
   kmp_int32 this_num_roots = 0;
   kmp_info_t *thread = __kmp_threads[gtid];
   kmp_int32 tasks = 0;
 
   for (kmp_int32 i = 0; tasks < this_num_tasks && i < this_map_size; i++) {
-    if (this_record_map[i].task == nullptr) {
+    kmp_node_info_t *node = kmp_node_vector_get(tdg->record_map, i);
+    if (node->task == nullptr) {
       continue;
     }
     tasks++;
-    if (this_record_map[i].npredecessors == 0) {
+    if (node->npredecessors == 0) {
       this_root_tasks[this_num_roots++] = i;
     }
   }
 
   // Update with roots info and mapsize
-  tdg->map_size = this_map_size;
+  /* tdg->map_size = this_map_size; */
   tdg->num_roots = this_num_roots;
   tdg->root_tasks = this_root_tasks;
   KMP_DEBUG_ASSERT(tdg->tdg_status == KMP_TDG_RECORDING);
@@ -5459,9 +5517,14 @@ void __kmp_end_record(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
   }
 
   // Reset predecessor counter
-  for (kmp_int32 i = 0; i < this_num_tasks; i++) {
-    KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter,
-                      this_record_map[i].npredecessors);
+  tasks = 0;
+  for (kmp_int32 i = 0; tasks < this_num_tasks && i < this_map_size; i++) {
+    kmp_node_info_t *node = kmp_node_vector_get(tdg->record_map, i);
+    if (node->task == nullptr) {
+      continue;
+    }
+    tasks++;
+    KMP_ATOMIC_ST_RLX(&node->npredecessors_counter, node->npredecessors);
   }
 
   if (__kmp_tdg_dot)

>From 32edeb63e80cd10f9a31cca7492f0a7483a6719f Mon Sep 17 00:00:00 2001
From: jpinot <jsp.pinot at gmail.com>
Date: Wed, 1 Oct 2025 16:00:41 +0200
Subject: [PATCH 7/7] [openmp] Delete graph_lock

---
 openmp/runtime/src/kmp.h            | 2 --
 openmp/runtime/src/kmp_taskdeps.cpp | 2 --
 openmp/runtime/src/kmp_tasking.cpp  | 2 --
 3 files changed, 6 deletions(-)

diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 72dbbba58ad2d..6b28b504392a3 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -2676,8 +2676,6 @@ typedef struct kmp_tdg_info {
       KMP_TDG_NONE; // Status of the TDG (recording, ready...)
   std::atomic<kmp_int32> num_tasks; // Number of TDG nodes
   std::atomic<kmp_int32> tdg_task_id_next; // Task id of next node
-  kmp_bootstrap_lock_t
-      graph_lock; // Protect graph attributes when updated via taskloop_recur
   // Taskloop reduction related
   void *rec_taskred_data; // Data to pass to __kmpc_task_reduction_init or
                           // __kmpc_taskred_init
diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index 49b9e1c1adca3..2d1256431752a 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -708,12 +708,10 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
       kmp_node_vector_resize(tdg->record_map, new_taskdata->td_tdg_task_id * 2);
       node = kmp_node_vector_get(tdg->record_map, new_taskdata->td_tdg_task_id);
     }
-    __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
     node->task = new_task;
     node->parent_task = new_taskdata->td_parent;
     new_taskdata->td_tdg_node_info = node;
     KMP_ATOMIC_INC(&tdg->num_tasks);
-    __kmp_release_bootstrap_lock(&tdg->graph_lock);
   }
 #endif
 #if OMPT_SUPPORT
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 8aafe939a244b..307e69ba992b4 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1808,12 +1808,10 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
       node = kmp_node_vector_get(tdg->record_map, new_taskdata->td_tdg_task_id);
     }
     if (node->task == nullptr) {
-      __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
       node->task = new_task;
       node->parent_task = new_taskdata->td_parent;
       new_taskdata->td_tdg_node_info = node;
       KMP_ATOMIC_INC(&tdg->num_tasks);
-      __kmp_release_bootstrap_lock(&tdg->graph_lock);
     }
   }
 #endif



More information about the Openmp-commits mailing list