[Openmp-commits] [openmp] Draft: [OpenMP] Fix td_tdg_task_id underflow with taskloop and taskgraph (PR #150877)

Josep Pinot via Openmp-commits openmp-commits at lists.llvm.org
Thu Sep 18 01:50:43 PDT 2025


https://github.com/jpinot updated https://github.com/llvm/llvm-project/pull/150877

>From 4b4938042950dd7898253509bf511b1bf18a19d0 Mon Sep 17 00:00:00 2001
From: jpinot <josep.pinot at bsc.es>
Date: Thu, 17 Jul 2025 12:22:01 +0200
Subject: [PATCH 1/4] [OpenMP] Fix td_tdg_task_id underflow with taskloop and
 taskgraph

This patch addresses an issue where the td_tdg_task_id could underflow,
leading to a negative task ID, when a taskloop region was encountered
before a taskgraph clause.

This change allows surious holes in the record_map.
---
 openmp/runtime/src/kmp.h           |  1 +
 openmp/runtime/src/kmp_tasking.cpp | 30 ++++++++++++++++++++----------
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 83afc0e83f231..1f909cd3d3916 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -2667,6 +2667,7 @@ typedef struct kmp_tdg_info {
   kmp_tdg_status_t tdg_status =
       KMP_TDG_NONE; // Status of the TDG (recording, ready...)
   std::atomic<kmp_int32> num_tasks; // Number of TDG nodes
+  std::atomic<kmp_int32> tdg_task_id_next; // Task id of next node
   kmp_bootstrap_lock_t
       graph_lock; // Protect graph attributes when updated via taskloop_recur
   // Taskloop reduction related
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 37836fb457537..fb54baf7e4e07 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1437,7 +1437,7 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
     taskdata->is_taskgraph = 1;
     taskdata->tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
     taskdata->td_task_id = KMP_GEN_TASK_ID();
-    taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id);
+    taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&tdg->tdg_task_id_next);
   }
 #endif
   KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
@@ -4465,7 +4465,8 @@ kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src
 #if OMPX_TASKGRAPH
   if (taskdata->is_taskgraph && !taskloop_recur &&
       __kmp_tdg_is_recording(taskdata_src->tdg->tdg_status))
-    taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id);
+    taskdata->td_tdg_task_id =
+        KMP_ATOMIC_INC(&taskdata_src->tdg->tdg_task_id_next);
 #endif
   taskdata->td_task_id = KMP_GEN_TASK_ID();
   if (task->shareds != NULL) { // need setup shareds pointer
@@ -4979,10 +4980,6 @@ static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
 #endif
     __kmpc_taskgroup(loc, gtid);
   }
-
-#if OMPX_TASKGRAPH
-  KMP_ATOMIC_DEC(&__kmp_tdg_task_id);
-#endif
   // =========================================================================
   // calculate loop parameters
   kmp_taskloop_bounds_t task_bounds(task, lb, ub);
@@ -5263,6 +5260,7 @@ void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg, kmp_int32 gtid) {
   kmp_safe_raii_file_t tdg_file(file_name, "w");
 
   kmp_int32 num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
+  kmp_int32 map_size = tdg->map_size;
   fprintf(tdg_file,
           "digraph TDG {\n"
           "   compound=true\n"
@@ -5273,7 +5271,11 @@ void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg, kmp_int32 gtid) {
     fprintf(tdg_file, "      %d[style=bold]\n", i);
   }
   fprintf(tdg_file, "   }\n");
-  for (kmp_int32 i = 0; i < num_tasks; i++) {
+  kmp_int32 tasks = 0;
+  for (kmp_int32 i = 0; tasks < num_tasks && i < map_size; i++) {
+    if (tdg->record_map[i].task == nullptr)
+      continue;
+    tasks++;
     kmp_int32 nsuccessors = tdg->record_map[i].nsuccessors;
     kmp_int32 *successors = tdg->record_map[i].successors;
     if (nsuccessors > 0) {
@@ -5297,6 +5299,7 @@ void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
   kmp_int32 *this_root_tasks = tdg->root_tasks;
   kmp_int32 this_num_roots = tdg->num_roots;
   kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
+  kmp_int32 tasks = 0;
 
   kmp_info_t *thread = __kmp_threads[gtid];
   kmp_taskdata_t *parent_task = thread->th.th_current_task;
@@ -5305,7 +5308,10 @@ void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
     __kmpc_taskred_init(gtid, tdg->rec_num_taskred, tdg->rec_taskred_data);
   }
 
-  for (kmp_int32 j = 0; j < this_num_tasks; j++) {
+  for (kmp_int32 j = 0; j < tdg->map_size && tasks < this_num_tasks; j++) {
+    if (this_record_map[j].task == nullptr)
+      continue;
+    tasks++;
     kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(this_record_map[j].task);
 
     td->td_parent = parent_task;
@@ -5429,8 +5435,13 @@ void __kmp_end_record(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
   kmp_int32 this_map_size = tdg->map_size;
   kmp_int32 this_num_roots = 0;
   kmp_info_t *thread = __kmp_threads[gtid];
+  kmp_int32 tasks = 0;
 
-  for (kmp_int32 i = 0; i < this_num_tasks; i++) {
+  for (kmp_int32 i = 0; tasks < this_num_tasks && i < this_map_size; i++) {
+    if (this_record_map[i].task == nullptr) {
+      continue;
+    }
+    tasks++;
     if (this_record_map[i].npredecessors == 0) {
       this_root_tasks[this_num_roots++] = i;
     }
@@ -5453,7 +5464,6 @@ void __kmp_end_record(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
     KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter,
                       this_record_map[i].npredecessors);
   }
-  KMP_ATOMIC_ST_RLX(&__kmp_tdg_task_id, 0);
 
   if (__kmp_tdg_dot)
     __kmp_print_tdg_dot(tdg, gtid);

>From c8fdb6411e098205e6f2c76a54778754dd560ba2 Mon Sep 17 00:00:00 2001
From: jpinot <josep.pinot at bsc.es>
Date: Wed, 3 Sep 2025 10:10:41 +0200
Subject: [PATCH 2/4] [openmp] Remove taskgraph successors alloction before is
 needed

Delayed allocation of successors in kmp_node until the array is needed,
removing the small allocation when a taskgraph node is created or
resized.
---
 openmp/runtime/src/kmp_taskdeps.cpp | 16 +++++++++-------
 openmp/runtime/src/kmp_tasking.cpp  | 13 +++++--------
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index abbca752f0587..8215c4b318bb5 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -244,13 +244,17 @@ static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source,
     if (!exists) {
       if (source_info->nsuccessors >= source_info->successors_size) {
         kmp_uint old_size = source_info->successors_size;
-        source_info->successors_size = 2 * source_info->successors_size;
+        source_info->successors_size = old_size == 0
+                                           ? __kmp_successors_size
+                                           : 2 * source_info->successors_size;
         kmp_int32 *old_succ_ids = source_info->successors;
         kmp_int32 *new_succ_ids = (kmp_int32 *)__kmp_allocate(
             source_info->successors_size * sizeof(kmp_int32));
-        KMP_MEMCPY(new_succ_ids, old_succ_ids, old_size * sizeof(kmp_int32));
+        if (old_succ_ids) {
+          KMP_MEMCPY(new_succ_ids, old_succ_ids, old_size * sizeof(kmp_int32));
+          __kmp_free(old_succ_ids);
+        }
         source_info->successors = new_succ_ids;
-        __kmp_free(old_succ_ids);
       }
 
       source_info->successors[source_info->nsuccessors] =
@@ -715,13 +719,11 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
         __kmp_free(old_record);
 
         for (kmp_int i = old_size; i < new_size; i++) {
-          kmp_int32 *successorsList = (kmp_int32 *)__kmp_allocate(
-              __kmp_successors_size * sizeof(kmp_int32));
           new_record[i].task = nullptr;
-          new_record[i].successors = successorsList;
+          new_record[i].successors = nullptr;
           new_record[i].nsuccessors = 0;
           new_record[i].npredecessors = 0;
-          new_record[i].successors_size = __kmp_successors_size;
+          new_record[i].successors_size = 0;
           KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
         }
         // update the size at the end, so that we avoid other
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index fb54baf7e4e07..c40dcf9b85b4a 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1817,13 +1817,11 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
         __kmp_free(old_record);
 
         for (kmp_int i = old_size; i < new_size; i++) {
-          kmp_int32 *successorsList = (kmp_int32 *)__kmp_allocate(
-              __kmp_successors_size * sizeof(kmp_int32));
           new_record[i].task = nullptr;
-          new_record[i].successors = successorsList;
+          new_record[i].successors = nullptr;
           new_record[i].nsuccessors = 0;
           new_record[i].npredecessors = 0;
-          new_record[i].successors_size = __kmp_successors_size;
+          new_record[i].successors_size = 0;
           KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
         }
         // update the size at the end, so that we avoid other
@@ -5368,13 +5366,12 @@ static inline void __kmp_start_record(kmp_int32 gtid,
   kmp_node_info_t *this_record_map =
       (kmp_node_info_t *)__kmp_allocate(INIT_MAPSIZE * sizeof(kmp_node_info_t));
   for (kmp_int32 i = 0; i < INIT_MAPSIZE; i++) {
-    kmp_int32 *successorsList =
-        (kmp_int32 *)__kmp_allocate(__kmp_successors_size * sizeof(kmp_int32));
     this_record_map[i].task = nullptr;
-    this_record_map[i].successors = successorsList;
+    this_record_map[i].parent_task = nullptr;
+    this_record_map[i].successors = nullptr;
     this_record_map[i].nsuccessors = 0;
     this_record_map[i].npredecessors = 0;
-    this_record_map[i].successors_size = __kmp_successors_size;
+    this_record_map[i].successors_size = 0;
     KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter, 0);
   }
 

>From 8936d9b1549150a0ac979222ff8ddd2e3f2b27be Mon Sep 17 00:00:00 2001
From: jpinot <josep.pinot at bsc.es>
Date: Wed, 3 Sep 2025 10:12:19 +0200
Subject: [PATCH 3/4] [openmp] Fix perent_task unitialization when allocating
 kmp_tdg_info

---
 openmp/runtime/src/kmp_taskdeps.cpp | 1 +
 openmp/runtime/src/kmp_tasking.cpp  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index 8215c4b318bb5..8610143a32f0b 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -720,6 +720,7 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
 
         for (kmp_int i = old_size; i < new_size; i++) {
           new_record[i].task = nullptr;
+          new_record[i].parent_task = nullptr;
           new_record[i].successors = nullptr;
           new_record[i].nsuccessors = 0;
           new_record[i].npredecessors = 0;
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index c40dcf9b85b4a..2e77cf632dc94 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1818,6 +1818,7 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
 
         for (kmp_int i = old_size; i < new_size; i++) {
           new_record[i].task = nullptr;
+          new_record[i].parent_task = nullptr;
           new_record[i].successors = nullptr;
           new_record[i].nsuccessors = 0;
           new_record[i].npredecessors = 0;

>From bf1f0df127cd49f565e4708c97a9606932468545 Mon Sep 17 00:00:00 2001
From: jpinot <josep.pinot at bsc.es>
Date: Tue, 9 Sep 2025 10:09:47 +0200
Subject: [PATCH 4/4] [openmp] Fix locking when expanding recorded tdg

---
 openmp/runtime/src/kmp_taskdeps.cpp | 50 ++++++++++++++---------------
 openmp/runtime/src/kmp_tasking.cpp  |  8 ++---
 2 files changed, 27 insertions(+), 31 deletions(-)

diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index 8610143a32f0b..15e7585a65617 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -704,39 +704,37 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
       __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) {
     kmp_tdg_info_t *tdg = new_taskdata->tdg;
     // extend record_map if needed
+    __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
     if (new_taskdata->td_tdg_task_id >= tdg->map_size) {
-      __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
-      if (new_taskdata->td_tdg_task_id >= tdg->map_size) {
-        kmp_uint old_size = tdg->map_size;
-        kmp_uint new_size = old_size * 2;
-        kmp_node_info_t *old_record = tdg->record_map;
-        kmp_node_info_t *new_record = (kmp_node_info_t *)__kmp_allocate(
-            new_size * sizeof(kmp_node_info_t));
-        KMP_MEMCPY(new_record, tdg->record_map,
-                   old_size * sizeof(kmp_node_info_t));
-        tdg->record_map = new_record;
-
-        __kmp_free(old_record);
-
-        for (kmp_int i = old_size; i < new_size; i++) {
-          new_record[i].task = nullptr;
-          new_record[i].parent_task = nullptr;
-          new_record[i].successors = nullptr;
-          new_record[i].nsuccessors = 0;
-          new_record[i].npredecessors = 0;
-          new_record[i].successors_size = 0;
-          KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
-        }
-        // update the size at the end, so that we avoid other
-        // threads use old_record while map_size is already updated
-        tdg->map_size = new_size;
+      kmp_uint old_size = tdg->map_size;
+      kmp_uint new_size = old_size * 2;
+      kmp_node_info_t *old_record = tdg->record_map;
+      kmp_node_info_t *new_record =
+          (kmp_node_info_t *)__kmp_allocate(new_size * sizeof(kmp_node_info_t));
+      KMP_MEMCPY(new_record, tdg->record_map,
+                 old_size * sizeof(kmp_node_info_t));
+      tdg->record_map = new_record;
+
+      __kmp_free(old_record);
+
+      for (kmp_int i = old_size; i < new_size; i++) {
+        new_record[i].task = nullptr;
+        new_record[i].parent_task = nullptr;
+        new_record[i].successors = nullptr;
+        new_record[i].nsuccessors = 0;
+        new_record[i].npredecessors = 0;
+        new_record[i].successors_size = 0;
+        KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
       }
-      __kmp_release_bootstrap_lock(&tdg->graph_lock);
+      // update the size at the end, so that we avoid other
+      // threads use old_record while map_size is already updated
+      tdg->map_size = new_size;
     }
     tdg->record_map[new_taskdata->td_tdg_task_id].task = new_task;
     tdg->record_map[new_taskdata->td_tdg_task_id].parent_task =
         new_taskdata->td_parent;
     KMP_ATOMIC_INC(&tdg->num_tasks);
+    __kmp_release_bootstrap_lock(&tdg->graph_lock);
   }
 #endif
 #if OMPT_SUPPORT
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 2e77cf632dc94..f735e4208ba48 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1800,7 +1800,8 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
       __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) {
     kmp_tdg_info_t *tdg = new_taskdata->tdg;
     // extend the record_map if needed
-    if (new_taskdata->td_tdg_task_id >= new_taskdata->tdg->map_size) {
+    if (new_taskdata->td_tdg_task_id >= tdg->map_size ||
+        tdg->record_map[new_taskdata->td_tdg_task_id].task == nullptr) {
       __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
       // map_size could have been updated by another thread if recursive
       // taskloop
@@ -1829,14 +1830,11 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
         // threads use old_record while map_size is already updated
         tdg->map_size = new_size;
       }
-      __kmp_release_bootstrap_lock(&tdg->graph_lock);
-    }
-    // record a task
-    if (tdg->record_map[new_taskdata->td_tdg_task_id].task == nullptr) {
       tdg->record_map[new_taskdata->td_tdg_task_id].task = new_task;
       tdg->record_map[new_taskdata->td_tdg_task_id].parent_task =
           new_taskdata->td_parent;
       KMP_ATOMIC_INC(&tdg->num_tasks);
+      __kmp_release_bootstrap_lock(&tdg->graph_lock);
     }
   }
 #endif



More information about the Openmp-commits mailing list