[Openmp-commits] [openmp] [openmp] Segfaults/assertion errors on certain omp statements after calling `omp_pause_resource_all(omp_pause_hard)` (PR #154204)

Haiyang He via Openmp-commits openmp-commits at lists.llvm.org
Wed Sep 17 13:48:56 PDT 2025


https://github.com/haiyanghee updated https://github.com/llvm/llvm-project/pull/154204

>From df4b0ecf1cc9ae2534e3ede70a87f1c247c94b25 Mon Sep 17 00:00:00 2001
From: Haiyang He <Haiyang_He at mentor.com>
Date: Wed, 23 Jul 2025 15:29:29 -0600
Subject: [PATCH 1/9] Fixed mutex segfault after hard reset

---
 openmp/runtime/src/kmp.h            |  7 +++++--
 openmp/runtime/src/kmp_csupport.cpp |  2 ++
 openmp/runtime/src/kmp_global.cpp   |  2 ++
 openmp/runtime/src/kmp_lock.cpp     |  6 ++++++
 openmp/runtime/src/kmp_lock.h       | 11 +++++++++++
 openmp/runtime/src/kmp_runtime.cpp  |  1 +
 openmp/runtime/src/ompt-internal.h  |  2 ++
 7 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 83afc0e83f231..5ca474698d5f8 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -150,6 +150,10 @@ class kmp_stats_list;
 #define UNLIKELY(x) (x)
 #endif
 
+#ifndef LIKELY
+#define LIKELY(x) (x)
+#endif
+
 // Affinity format function
 #include "kmp_str.h"
 
@@ -1759,8 +1763,6 @@ typedef int kmp_itt_mark_t;
 #define KMP_ITT_DEBUG 0
 #endif /* USE_ITT_BUILD */
 
-typedef kmp_int32 kmp_critical_name[8];
-
 /*!
 @ingroup PARALLEL
 The type for a microtask which gets passed to @ref __kmpc_fork_call().
@@ -3510,6 +3512,7 @@ extern int __kmp_abort_delay;
 extern int __kmp_need_register_atfork_specified;
 extern int __kmp_need_register_atfork; /* At initialization, call pthread_atfork
                                           to install fork handler */
+extern int __kmp_in_atexit; /*Denote that we are in the atexit handler*/
 extern int __kmp_gtid_mode; /* Method of getting gtid, values:
                                0 - not set, will be set at runtime
                                1 - using stack search
diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
index 3ca32ba583fe2..075a90b9de7ee 100644
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -1165,6 +1165,8 @@ __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
     // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
   }
   KMP_DEBUG_ASSERT(*lck != NULL);
+  // save the reverse critical section global lock reference
+  ilk->rev_ptr_critSec = crit;
 }
 
 // Fast-path acquire tas lock
diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp
index 323d13e948b42..c9a64f999163c 100644
--- a/openmp/runtime/src/kmp_global.cpp
+++ b/openmp/runtime/src/kmp_global.cpp
@@ -414,6 +414,8 @@ int __kmp_need_register_atfork =
     TRUE; /* At initialization, call pthread_atfork to install fork handler */
 int __kmp_need_register_atfork_specified = TRUE;
 
+int __kmp_in_atexit = FALSE; /*Denote that we are in the atexit handler*/
+
 int __kmp_env_stksize = FALSE; /* KMP_STACKSIZE specified? */
 int __kmp_env_blocktime = FALSE; /* KMP_BLOCKTIME specified? */
 int __kmp_env_checks = FALSE; /* KMP_CHECKS specified?    */
diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp
index fd1300352e95b..4ab5e081c312f 100644
--- a/openmp/runtime/src/kmp_lock.cpp
+++ b/openmp/runtime/src/kmp_lock.cpp
@@ -3431,6 +3431,9 @@ void __kmp_cleanup_indirect_user_locks() {
                     ll));
       __kmp_free(ll->lock);
       ll->lock = NULL;
+      // reset the reverse critical section pointer to 0
+      if (ll->rev_ptr_critSec && LIKELY(!__kmp_in_atexit))
+        memset(ll->rev_ptr_critSec, 0, sizeof(kmp_critical_name));
     }
     __kmp_indirect_lock_pool[k] = NULL;
   }
@@ -3449,6 +3452,9 @@ void __kmp_cleanup_indirect_user_locks() {
                         "from table\n",
                         l));
           __kmp_free(l->lock);
+          // reset the reverse critical section pointer to 0
+          if (l->rev_ptr_critSec && LIKELY(!__kmp_in_atexit))
+            memset(l->rev_ptr_critSec, 0, sizeof(kmp_critical_name));
         }
       }
       __kmp_free(ptr->table[row]);
diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h
index 6202f3d617cc5..fc71bafd47e39 100644
--- a/openmp/runtime/src/kmp_lock.h
+++ b/openmp/runtime/src/kmp_lock.h
@@ -38,6 +38,9 @@ extern "C" {
 struct ident;
 typedef struct ident ident_t;
 
+// moved the typedef kmp_critical_name from kmp.h to here.
+typedef kmp_int32 kmp_critical_name[8];
+
 // End of copied code.
 // ----------------------------------------------------------------------------
 
@@ -1126,6 +1129,14 @@ typedef enum {
 typedef struct {
   kmp_user_lock_p lock;
   kmp_indirect_locktag_t type;
+  // NOTE: when a `#pragma omp critical` lock gets created, the corresponding
+  // critical section global locks needs to point to a lock when we reset the
+  // locks (via omp_pause_resource_all(omp_pause_hard)), these critical section
+  // global lock pointers need to also be reset back to NULL (in
+  // __kmp_cleanup_indirect_user_locks()) however, we will not reset the
+  // `rev_ptr_critSec` lock during the atexit() cleanup handler, since the
+  // memory of `rev_ptr_critSec` is/could be freed already
+  kmp_critical_name *rev_ptr_critSec;
 } kmp_indirect_lock_t;
 
 // Function tables for direct locks. Set/unset/test differentiate functions
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 48e29c9f9fe45..075cf2fb46256 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -6139,6 +6139,7 @@ void __kmp_internal_end_atexit(void) {
      Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there
      is nothing.  Thus, the workaround is applicable only for Windows static
      stat library. */
+  __kmp_in_atexit = TRUE;
   __kmp_internal_end_library(-1);
 #if KMP_OS_WINDOWS
   __kmp_close_console();
diff --git a/openmp/runtime/src/ompt-internal.h b/openmp/runtime/src/ompt-internal.h
index 36b45f7a91ea2..9d5c40c4d15b4 100644
--- a/openmp/runtime/src/ompt-internal.h
+++ b/openmp/runtime/src/ompt-internal.h
@@ -121,9 +121,11 @@ extern ompt_callbacks_active_t ompt_enabled;
 
 #if KMP_OS_WINDOWS
 #define UNLIKELY(x) (x)
+#define LIKELY(x) (x)
 #define OMPT_NOINLINE __declspec(noinline)
 #else
 #define UNLIKELY(x) __builtin_expect(!!(x), 0)
+#define LIKELY(x) __builtin_expect(!!(x), 1)
 #define OMPT_NOINLINE __attribute__((noinline))
 #endif
 

>From a08175c802ca5375a5eaa7205952882282c7dea8 Mon Sep 17 00:00:00 2001
From: Haiyang He <Haiyang_He at mentor.com>
Date: Thu, 24 Jul 2025 12:23:27 -0600
Subject: [PATCH 2/9] Prevented double atfork() handler initialization

---
 openmp/runtime/src/kmp.h            | 1 +
 openmp/runtime/src/kmp_global.cpp   | 4 ++++
 openmp/runtime/src/z_Linux_util.cpp | 4 +++-
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 5ca474698d5f8..c21af91016c19 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -3512,6 +3512,7 @@ extern int __kmp_abort_delay;
 extern int __kmp_need_register_atfork_specified;
 extern int __kmp_need_register_atfork; /* At initialization, call pthread_atfork
                                           to install fork handler */
+extern int __kmp_already_registered_atfork; /* Do not register atfork twice */
 extern int __kmp_in_atexit; /*Denote that we are in the atexit handler*/
 extern int __kmp_gtid_mode; /* Method of getting gtid, values:
                                0 - not set, will be set at runtime
diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp
index c9a64f999163c..df64d40a086ef 100644
--- a/openmp/runtime/src/kmp_global.cpp
+++ b/openmp/runtime/src/kmp_global.cpp
@@ -414,6 +414,10 @@ int __kmp_need_register_atfork =
     TRUE; /* At initialization, call pthread_atfork to install fork handler */
 int __kmp_need_register_atfork_specified = TRUE;
 
+/* We do not want to repeatedly register the atfork handler, because since we
+ * lock things (in __kmp_forkjoin_lock()) in the prepare handler, if the same
+ * prepare handler gets called multiple times, then it will always deadlock */
+int __kmp_already_registered_atfork = FALSE;
 int __kmp_in_atexit = FALSE; /*Denote that we are in the atexit handler*/
 
 int __kmp_env_stksize = FALSE; /* KMP_STACKSIZE specified? */
diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp
index 368c0b6e872cc..b33e87ad18e9b 100644
--- a/openmp/runtime/src/z_Linux_util.cpp
+++ b/openmp/runtime/src/z_Linux_util.cpp
@@ -1404,13 +1404,15 @@ static void __kmp_atfork_child(void) {
 }
 
 void __kmp_register_atfork(void) {
-  if (__kmp_need_register_atfork) {
+  // NOTE: we will not double register our fork handlers! It will cause deadlock
+  if (!__kmp_already_registered_atfork && __kmp_need_register_atfork) {
 #if !KMP_OS_WASI
     int status = pthread_atfork(__kmp_atfork_prepare, __kmp_atfork_parent,
                                 __kmp_atfork_child);
     KMP_CHECK_SYSFAIL("pthread_atfork", status);
 #endif
     __kmp_need_register_atfork = FALSE;
+    __kmp_already_registered_atfork = TRUE;
   }
 }
 

>From bf74a39e27ae53db741e726972dd3d572e3b6093 Mon Sep 17 00:00:00 2001
From: Haiyang He <imhhy123 at hotmail.com>
Date: Mon, 18 Aug 2025 10:51:12 -0600
Subject: [PATCH 3/9] Do not assert failure if `__kmp_unregister_library()`
 can't find the shm file because we could have unregistered the library right
 before fork via hard reset, and child can simply exit immediately.

---
 openmp/runtime/src/kmp_runtime.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 075cf2fb46256..0fabd5c7fecfd 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -6953,9 +6953,9 @@ void __kmp_unregister_library(void) {
   value = __kmp_env_get(name);
 #endif
 
-  KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
-  KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
-  if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
+  // if omp is not initialized and we exit, then we don't need to free anything
+  if (__kmp_registration_flag != 0 && __kmp_registration_str != NULL) {
+    if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
 //  Ok, this is our variable. Delete it.
 #if defined(KMP_USE_SHM)
     if (__kmp_shm_available) {
@@ -6968,7 +6968,7 @@ void __kmp_unregister_library(void) {
 #else
     __kmp_env_unset(name);
 #endif
-  }
+    }
 
 #if defined(KMP_USE_SHM)
   if (shm_name)
@@ -6976,8 +6976,9 @@ void __kmp_unregister_library(void) {
   if (temp_reg_status_file_name)
     KMP_INTERNAL_FREE(temp_reg_status_file_name);
 #endif
-
   KMP_INTERNAL_FREE(__kmp_registration_str);
+  }
+
   KMP_INTERNAL_FREE(value);
   KMP_INTERNAL_FREE(name);
 

>From 773ab2f3f49f949553052ea1b84b2c38530489ac Mon Sep 17 00:00:00 2001
From: Haiyang He <Haiyang_He at mentor.com>
Date: Sun, 27 Jul 2025 16:10:46 -0600
Subject: [PATCH 4/9] Fixed a deadlock in `#pragma omp parallel num_threads`
 after hard reset

---
 openmp/runtime/src/kmp_csupport.cpp | 5 +++++
 openmp/runtime/src/kmp_runtime.cpp  | 4 ++++
 2 files changed, 9 insertions(+)

diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
index 075a90b9de7ee..053fdb9d91d66 100644
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -233,6 +233,11 @@ void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
                              kmp_int32 num_threads) {
   KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
                 global_tid, num_threads));
+  // we'll do middle initialize first, as otherwise the assert on global_tid can
+  // fail when omp is not initialized and this function is called
+  if (!TCR_4(__kmp_init_middle)) {
+    __kmp_middle_initialize();
+  }
   __kmp_assert_valid_gtid(global_tid);
   __kmp_push_num_threads(loc, global_tid, num_threads);
 }
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 0fabd5c7fecfd..78f4c3d8e7e02 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -8345,6 +8345,10 @@ void __kmp_cleanup(void) {
 
   __kmpc_destroy_allocator(KMP_GTID_SHUTDOWN, __kmp_def_allocator);
   __kmp_def_allocator = omp_default_mem_alloc;
+#ifdef KMP_TDATA_GTID
+  /*reset __kmp_gtid to initial value*/
+  __kmp_gtid = KMP_GTID_DNE;
+#endif
 
   KA_TRACE(10, ("__kmp_cleanup: exit\n"));
 }

>From 3e875f57642e3a4d3fa9b1100b83ec1b6c65891f Mon Sep 17 00:00:00 2001
From: Haiyang He <imhhy123 at hotmail.com>
Date: Fri, 15 Aug 2025 13:58:59 -0600
Subject: [PATCH 5/9] Added my test cases to test/api/omp_pause_resource.c

---
 openmp/runtime/test/api/omp_pause_resource.c | 121 +++++++++++++++++++
 1 file changed, 121 insertions(+)

diff --git a/openmp/runtime/test/api/omp_pause_resource.c b/openmp/runtime/test/api/omp_pause_resource.c
index e4aaa51861b8e..fce83824df35b 100644
--- a/openmp/runtime/test/api/omp_pause_resource.c
+++ b/openmp/runtime/test/api/omp_pause_resource.c
@@ -4,8 +4,124 @@
 // UNSUPPORTED: icc-18, icc-19
 
 #include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/wait.h>
 #include "omp_testsuite.h"
 
+#define NUM_THREADS 3
+
+void doOmpWorkWithCritical(int *a_lockCtr, int *b_lockCtr) {
+#pragma omp parallel num_threads(NUM_THREADS)
+  {
+#pragma omp critical(a_lock)
+    { *a_lockCtr = *a_lockCtr + 1; }
+#pragma omp critical(b_lock)
+    { *b_lockCtr = *b_lockCtr + 1; }
+  }
+}
+
+void test_omp_critical_after_omp_hard_pause_resource_all() {
+  int a_lockCtr = 0, b_lockCtr = 0;
+
+  // use omp to do some work
+  doOmpWorkWithCritical(&a_lockCtr, &b_lockCtr);
+  assert(a_lockCtr == NUM_THREADS && b_lockCtr == NUM_THREADS);
+  a_lockCtr = b_lockCtr = 0; // reset the counters
+
+  // omp hard pause should succeed
+  int rc = omp_pause_resource_all(omp_pause_hard);
+  assert(rc == 0);
+
+  // we should not segfault inside the critical sections of doOmpWork()
+  doOmpWorkWithCritical(&a_lockCtr, &b_lockCtr);
+  assert(a_lockCtr == NUM_THREADS && b_lockCtr == NUM_THREADS);
+}
+
+void test_omp_get_thread_num_after_omp_hard_pause_resource_all() {
+  // omp_get_thread_num() should work, even if omp is not yet initialized
+  int n = omp_get_thread_num();
+  // called from serial region, omp_get_thread_num() should return 0
+  assert(n == 0);
+
+// use omp to do some work, guarantees omp initialization
+#pragma omp parallel num_threads(NUM_THREADS)
+  {}
+
+  // omp hard pause should succeed
+  int rc = omp_pause_resource_all(omp_pause_hard);
+  assert(rc == 0);
+
+  // omp_get_thread_num() should work again with no segfault
+  n = omp_get_thread_num();
+  // called from serial region, omp_get_thread_num() should return 0
+  assert(n == 0);
+}
+
+void test_omp_parallel_num_threads_after_omp_hard_pause_resource_all() {
+// use omp to do some work
+#pragma omp parallel num_threads(NUM_THREADS)
+  {}
+
+  // omp hard pause should succeed
+  int rc = omp_pause_resource_all(omp_pause_hard);
+  assert(rc == 0);
+
+// this should not trigger any omp asserts
+#pragma omp parallel num_threads(NUM_THREADS)
+  {}
+}
+
+void test_KMP_INIT_AT_FORK_with_fork_after_omp_hard_pause_resource_all() {
+  // explicitly set the KMP_INIT_AT_FORK environment variable to 1
+  setenv("KMP_INIT_AT_FORK", "1", 1);
+
+// use omp to do some work
+#pragma omp parallel for num_threads(NUM_THREADS)
+  for (int i = 0; i < NUM_THREADS; ++i) {
+  }
+
+  // omp hard pause should succeed
+  int rc = omp_pause_resource_all(omp_pause_hard);
+  assert(rc == 0);
+
+// use omp to do some work
+#pragma omp parallel for num_threads(NUM_THREADS)
+  for (int i = 0; i < NUM_THREADS; ++i) {
+  }
+
+  // we'll fork .. this shouldn't deadlock
+  int p = fork();
+
+  if (!p) {
+    exit(0); // child simply does nothing and exits
+  }
+
+  waitpid(p, NULL, 0);
+
+  unsetenv("KMP_INIT_AT_FORK");
+}
+
+void test_fork_child_exiting_after_omp_hard_pause_resource_all() {
+// use omp to do some work
+#pragma omp parallel num_threads(NUM_THREADS)
+  {}
+
+  // omp hard pause should succeed
+  int rc = omp_pause_resource_all(omp_pause_hard);
+  assert(rc == 0);
+
+  int p = fork();
+
+  if (!p) {
+    // child should be able to exit properly without assert failures
+    exit(0);
+  }
+
+  waitpid(p, NULL, 0);
+}
+
 int test_omp_pause_resource() {
   int fails, nthreads, my_dev;
 
@@ -57,6 +173,11 @@ int main() {
     if (!test_omp_pause_resource()) {
       num_failed++;
     }
+    test_omp_critical_after_omp_hard_pause_resource_all();
+    test_omp_get_thread_num_after_omp_hard_pause_resource_all();
+    test_omp_parallel_num_threads_after_omp_hard_pause_resource_all();
+    test_KMP_INIT_AT_FORK_with_fork_after_omp_hard_pause_resource_all();
+    test_fork_child_exiting_after_omp_hard_pause_resource_all();
   }
   return num_failed;
 }

>From 532ac49603840d96af22b175da34c5f178327599 Mon Sep 17 00:00:00 2001
From: Haiyang He <imhhy123 at hotmail.com>
Date: Thu, 4 Sep 2025 22:34:28 -0600
Subject: [PATCH 6/9] removed the LIKELY macro and changed
 `LIKELY(!__kmp_in_atexit))` to `!UNLIKELY(__kmp_in_atexit))` as suggested
 since they are equivalent

---
 openmp/runtime/src/kmp.h           | 4 ----
 openmp/runtime/src/kmp_lock.cpp    | 4 ++--
 openmp/runtime/src/ompt-internal.h | 2 --
 3 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index c21af91016c19..a92573d20d77a 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -150,10 +150,6 @@ class kmp_stats_list;
 #define UNLIKELY(x) (x)
 #endif
 
-#ifndef LIKELY
-#define LIKELY(x) (x)
-#endif
-
 // Affinity format function
 #include "kmp_str.h"
 
diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp
index 4ab5e081c312f..626b98c7ce92b 100644
--- a/openmp/runtime/src/kmp_lock.cpp
+++ b/openmp/runtime/src/kmp_lock.cpp
@@ -3432,7 +3432,7 @@ void __kmp_cleanup_indirect_user_locks() {
       __kmp_free(ll->lock);
       ll->lock = NULL;
       // reset the reverse critical section pointer to 0
-      if (ll->rev_ptr_critSec && LIKELY(!__kmp_in_atexit))
+      if (ll->rev_ptr_critSec && !UNLIKELY(__kmp_in_atexit))
         memset(ll->rev_ptr_critSec, 0, sizeof(kmp_critical_name));
     }
     __kmp_indirect_lock_pool[k] = NULL;
@@ -3453,7 +3453,7 @@ void __kmp_cleanup_indirect_user_locks() {
                         l));
           __kmp_free(l->lock);
           // reset the reverse critical section pointer to 0
-          if (l->rev_ptr_critSec && LIKELY(!__kmp_in_atexit))
+          if (l->rev_ptr_critSec && !UNLIKELY(__kmp_in_atexit))
             memset(l->rev_ptr_critSec, 0, sizeof(kmp_critical_name));
         }
       }
diff --git a/openmp/runtime/src/ompt-internal.h b/openmp/runtime/src/ompt-internal.h
index 9d5c40c4d15b4..36b45f7a91ea2 100644
--- a/openmp/runtime/src/ompt-internal.h
+++ b/openmp/runtime/src/ompt-internal.h
@@ -121,11 +121,9 @@ extern ompt_callbacks_active_t ompt_enabled;
 
 #if KMP_OS_WINDOWS
 #define UNLIKELY(x) (x)
-#define LIKELY(x) (x)
 #define OMPT_NOINLINE __declspec(noinline)
 #else
 #define UNLIKELY(x) __builtin_expect(!!(x), 0)
-#define LIKELY(x) __builtin_expect(!!(x), 1)
 #define OMPT_NOINLINE __attribute__((noinline))
 #endif
 

>From 258e329aaa983df4a727920cf4132031c66852a8 Mon Sep 17 00:00:00 2001
From: Haiyang He <imhhy123 at hotmail.com>
Date: Thu, 4 Sep 2025 23:50:21 -0600
Subject: [PATCH 7/9] call `__kmp_serial_initialize()` instead of
 `__kmp_middle_initialize()` in `__kmpc_push_num_threads()`, as suggested in
 feedback

---
 openmp/runtime/src/kmp_csupport.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
index 053fdb9d91d66..8f395ad3f7a63 100644
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -233,10 +233,10 @@ void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
                              kmp_int32 num_threads) {
   KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
                 global_tid, num_threads));
-  // we'll do middle initialize first, as otherwise the assert on global_tid can
+  // we'll do serial initialize first, as otherwise the assert on global_tid can
   // fail when omp is not initialized and this function is called
-  if (!TCR_4(__kmp_init_middle)) {
-    __kmp_middle_initialize();
+  if (!TCR_4(__kmp_init_serial)) {
+    __kmp_serial_initialize();
   }
   __kmp_assert_valid_gtid(global_tid);
   __kmp_push_num_threads(loc, global_tid, num_threads);

>From 4002bb7c16bc2bcba0ebf3ae6cd50c7c27aa2218 Mon Sep 17 00:00:00 2001
From: Haiyang He <Haiyang_He at mentor.com>
Date: Wed, 17 Sep 2025 10:21:33 -0600
Subject: [PATCH 8/9] Revert "Fixed a deadlock in `#pragma omp parallel
 num_threads` after hard reset"

This reverts commit 773ab2f3f49f949553052ea1b84b2c38530489ac.
---
 openmp/runtime/src/kmp_csupport.cpp | 5 -----
 openmp/runtime/src/kmp_runtime.cpp  | 4 ----
 2 files changed, 9 deletions(-)

diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
index 8f395ad3f7a63..075a90b9de7ee 100644
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -233,11 +233,6 @@ void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
                              kmp_int32 num_threads) {
   KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
                 global_tid, num_threads));
-  // we'll do serial initialize first, as otherwise the assert on global_tid can
-  // fail when omp is not initialized and this function is called
-  if (!TCR_4(__kmp_init_serial)) {
-    __kmp_serial_initialize();
-  }
   __kmp_assert_valid_gtid(global_tid);
   __kmp_push_num_threads(loc, global_tid, num_threads);
 }
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 78f4c3d8e7e02..0fabd5c7fecfd 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -8345,10 +8345,6 @@ void __kmp_cleanup(void) {
 
   __kmpc_destroy_allocator(KMP_GTID_SHUTDOWN, __kmp_def_allocator);
   __kmp_def_allocator = omp_default_mem_alloc;
-#ifdef KMP_TDATA_GTID
-  /*reset __kmp_gtid to initial value*/
-  __kmp_gtid = KMP_GTID_DNE;
-#endif
 
   KA_TRACE(10, ("__kmp_cleanup: exit\n"));
 }

>From 2888a73f76b5f91573eb62ad03b1e9466aadeb53 Mon Sep 17 00:00:00 2001
From: Haiyang He <Haiyang_He at mentor.com>
Date: Wed, 17 Sep 2025 13:52:20 -0600
Subject: [PATCH 9/9] Reuse the reset and re-initialize code in
 `__kmp_atfork_child()` to `__kmp_hard_pause()`

This should ensure that the program is in serially initialized state after
doing a hard pause, hence not breaking any compiler/runtime
invariants/assumptions.

Also fixed formatting issues
---
 openmp/runtime/src/kmp.h                     |  1 +
 openmp/runtime/src/kmp_runtime.cpp           | 94 ++++++++++++++++++++
 openmp/runtime/src/z_Linux_util.cpp          | 79 +---------------
 openmp/runtime/test/api/omp_pause_resource.c | 20 +++--
 4 files changed, 111 insertions(+), 83 deletions(-)

diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index a92573d20d77a..24db61c288822 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -4561,6 +4561,7 @@ static inline void __kmp_resume_if_hard_paused() {
     __kmp_pause_status = kmp_not_paused;
   }
 }
+extern void __kmp_hard_pause_reinitialize(const bool in_child_atfork_andler);
 
 extern void __kmp_omp_display_env(int verbose);
 
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 0fabd5c7fecfd..266645e22c6fb 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -9057,6 +9057,9 @@ void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
 void __kmp_hard_pause() {
   __kmp_pause_status = kmp_hard_paused;
   __kmp_internal_end_thread(-1);
+  // TODO: we'll do the same thing as child atfork handler, since we need to
+  // serially initialize the runtime library after __kmp_hard_pause()
+  __kmp_hard_pause_reinitialize(false);
 }
 
 // Soft resume sets __kmp_pause_status, and wakes up all threads.
@@ -9363,6 +9366,97 @@ void __kmp_set_nesting_mode_threads() {
     set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
 }
 
+void __kmp_hard_pause_reinitialize(const bool in_child_atfork_andler) {
+#if KMP_AFFINITY_SUPPORTED
+#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY ||     \
+    KMP_OS_AIX
+  // reset the affinity in the child to the initial thread
+  // affinity in the parent
+  kmp_set_thread_affinity_mask_initial();
+#endif
+  // Set default not to bind threads tightly in the child (we're expecting
+  // over-subscription after the fork and this can improve things for
+  // scripting languages that use OpenMP inside process-parallel code).
+  if (__kmp_nested_proc_bind.bind_types != NULL) {
+    __kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
+  }
+  for (kmp_affinity_t *affinity : __kmp_affinities)
+    *affinity = KMP_AFFINITY_INIT(affinity->env_var);
+  __kmp_affin_fullMask = nullptr;
+  __kmp_affin_origMask = nullptr;
+  __kmp_topology = nullptr;
+#endif // KMP_AFFINITY_SUPPORTED
+
+  // TODO: resetting these global variables might be not needed if we are not in
+  // child handler as `__kmp_cleanup()` would have most likely reset them
+  // already
+
+#if KMP_USE_MONITOR
+  __kmp_init_monitor = 0;
+#endif
+  __kmp_init_parallel = FALSE;
+  __kmp_init_middle = FALSE;
+  __kmp_init_serial = FALSE;
+  TCW_4(__kmp_init_gtid, FALSE);
+  __kmp_init_common = FALSE;
+
+  TCW_4(__kmp_init_user_locks, FALSE);
+#if !KMP_USE_DYNAMIC_LOCK
+  __kmp_user_lock_table.used = 1;
+  __kmp_user_lock_table.allocated = 0;
+  __kmp_user_lock_table.table = NULL;
+  __kmp_lock_blocks = NULL;
+#endif
+
+  __kmp_all_nth = 0;
+  TCW_4(__kmp_nth, 0);
+
+  __kmp_thread_pool = NULL;
+  __kmp_thread_pool_insert_pt = NULL;
+  __kmp_team_pool = NULL;
+
+  // The threadprivate cache will be cleared in `__kmp_cleanup()`
+  if (in_child_atfork_andler) {
+    /* Must actually zero all the *cache arguments passed to
+       __kmpc_threadprivate here so threadprivate doesn't use stale data */
+    KA_TRACE(10, ("__kmp_atfork_child: checking cache address list %p\n",
+                  __kmp_threadpriv_cache_list));
+
+    while (__kmp_threadpriv_cache_list != NULL) {
+
+      if (*__kmp_threadpriv_cache_list->addr != NULL) {
+        KC_TRACE(50, ("__kmp_atfork_child: zeroing cache at address %p\n",
+                      &(*__kmp_threadpriv_cache_list->addr)));
+
+        *__kmp_threadpriv_cache_list->addr = NULL;
+      }
+      __kmp_threadpriv_cache_list = __kmp_threadpriv_cache_list->next;
+    }
+
+    /* reset statically initialized locks */
+    __kmp_init_bootstrap_lock(&__kmp_initz_lock);
+    __kmp_init_bootstrap_lock(&__kmp_stdio_lock);
+    __kmp_init_bootstrap_lock(&__kmp_console_lock);
+    __kmp_init_bootstrap_lock(&__kmp_task_team_lock);
+  }
+
+#if USE_ITT_BUILD
+  __kmp_itt_reset(); // reset ITT's global state
+#endif /* USE_ITT_BUILD */
+
+  {
+    // Child process often get terminated without any use of OpenMP. That might
+    // cause mapped shared memory file to be left unattended. Thus we postpone
+    // library registration till middle initialization in the child process.
+
+    // After we do a `__kmpc_pause_resource()`, the omp runtime must also be in
+    // serially initialized state in order to not break the assumptions of
+    // compiler+runtime implementation
+    __kmp_need_register_serial = FALSE;
+    __kmp_serial_initialize();
+  }
+}
+
 #if ENABLE_LIBOMPTARGET
 void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, void *current_task,
                            void *event) = NULL;
diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp
index b33e87ad18e9b..cd48fd3fe9506 100644
--- a/openmp/runtime/src/z_Linux_util.cpp
+++ b/openmp/runtime/src/z_Linux_util.cpp
@@ -1312,86 +1312,11 @@ static void __kmp_atfork_child(void) {
 
   ++__kmp_fork_count;
 
-#if KMP_AFFINITY_SUPPORTED
-#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY ||     \
-    KMP_OS_AIX
-  // reset the affinity in the child to the initial thread
-  // affinity in the parent
-  kmp_set_thread_affinity_mask_initial();
-#endif
-  // Set default not to bind threads tightly in the child (we're expecting
-  // over-subscription after the fork and this can improve things for
-  // scripting languages that use OpenMP inside process-parallel code).
-  if (__kmp_nested_proc_bind.bind_types != NULL) {
-    __kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
-  }
-  for (kmp_affinity_t *affinity : __kmp_affinities)
-    *affinity = KMP_AFFINITY_INIT(affinity->env_var);
-  __kmp_affin_fullMask = nullptr;
-  __kmp_affin_origMask = nullptr;
-  __kmp_topology = nullptr;
-#endif // KMP_AFFINITY_SUPPORTED
-
-#if KMP_USE_MONITOR
-  __kmp_init_monitor = 0;
-#endif
-  __kmp_init_parallel = FALSE;
-  __kmp_init_middle = FALSE;
-  __kmp_init_serial = FALSE;
-  TCW_4(__kmp_init_gtid, FALSE);
-  __kmp_init_common = FALSE;
-
-  TCW_4(__kmp_init_user_locks, FALSE);
-#if !KMP_USE_DYNAMIC_LOCK
-  __kmp_user_lock_table.used = 1;
-  __kmp_user_lock_table.allocated = 0;
-  __kmp_user_lock_table.table = NULL;
-  __kmp_lock_blocks = NULL;
-#endif
-
-  __kmp_all_nth = 0;
-  TCW_4(__kmp_nth, 0);
-
-  __kmp_thread_pool = NULL;
-  __kmp_thread_pool_insert_pt = NULL;
-  __kmp_team_pool = NULL;
-
-  /* Must actually zero all the *cache arguments passed to __kmpc_threadprivate
-     here so threadprivate doesn't use stale data */
-  KA_TRACE(10, ("__kmp_atfork_child: checking cache address list %p\n",
-                __kmp_threadpriv_cache_list));
-
-  while (__kmp_threadpriv_cache_list != NULL) {
-
-    if (*__kmp_threadpriv_cache_list->addr != NULL) {
-      KC_TRACE(50, ("__kmp_atfork_child: zeroing cache at address %p\n",
-                    &(*__kmp_threadpriv_cache_list->addr)));
-
-      *__kmp_threadpriv_cache_list->addr = NULL;
-    }
-    __kmp_threadpriv_cache_list = __kmp_threadpriv_cache_list->next;
-  }
+  // re-use the same re-initialization code as __kmp_hard_reset()
+  __kmp_hard_pause_reinitialize(true);
 
   __kmp_init_runtime = FALSE;
 
-  /* reset statically initialized locks */
-  __kmp_init_bootstrap_lock(&__kmp_initz_lock);
-  __kmp_init_bootstrap_lock(&__kmp_stdio_lock);
-  __kmp_init_bootstrap_lock(&__kmp_console_lock);
-  __kmp_init_bootstrap_lock(&__kmp_task_team_lock);
-
-#if USE_ITT_BUILD
-  __kmp_itt_reset(); // reset ITT's global state
-#endif /* USE_ITT_BUILD */
-
-  {
-    // Child process often get terminated without any use of OpenMP. That might
-    // cause mapped shared memory file to be left unattended. Thus we postpone
-    // library registration till middle initialization in the child process.
-    __kmp_need_register_serial = FALSE;
-    __kmp_serial_initialize();
-  }
-
   /* This is necessary to make sure no stale data is left around */
   /* AC: customers complain that we use unsafe routines in the atfork
      handler. Mathworks: dlsym() is unsafe. We call dlsym and dlopen
diff --git a/openmp/runtime/test/api/omp_pause_resource.c b/openmp/runtime/test/api/omp_pause_resource.c
index fce83824df35b..6154377b9c03a 100644
--- a/openmp/runtime/test/api/omp_pause_resource.c
+++ b/openmp/runtime/test/api/omp_pause_resource.c
@@ -16,9 +16,13 @@ void doOmpWorkWithCritical(int *a_lockCtr, int *b_lockCtr) {
 #pragma omp parallel num_threads(NUM_THREADS)
   {
 #pragma omp critical(a_lock)
-    { *a_lockCtr = *a_lockCtr + 1; }
+    {
+      *a_lockCtr = *a_lockCtr + 1;
+    }
 #pragma omp critical(b_lock)
-    { *b_lockCtr = *b_lockCtr + 1; }
+    {
+      *b_lockCtr = *b_lockCtr + 1;
+    }
   }
 }
 
@@ -47,7 +51,8 @@ void test_omp_get_thread_num_after_omp_hard_pause_resource_all() {
 
 // use omp to do some work, guarantees omp initialization
 #pragma omp parallel num_threads(NUM_THREADS)
-  {}
+  {
+  }
 
   // omp hard pause should succeed
   int rc = omp_pause_resource_all(omp_pause_hard);
@@ -62,7 +67,8 @@ void test_omp_get_thread_num_after_omp_hard_pause_resource_all() {
 void test_omp_parallel_num_threads_after_omp_hard_pause_resource_all() {
 // use omp to do some work
 #pragma omp parallel num_threads(NUM_THREADS)
-  {}
+  {
+  }
 
   // omp hard pause should succeed
   int rc = omp_pause_resource_all(omp_pause_hard);
@@ -70,7 +76,8 @@ void test_omp_parallel_num_threads_after_omp_hard_pause_resource_all() {
 
 // this should not trigger any omp asserts
 #pragma omp parallel num_threads(NUM_THREADS)
-  {}
+  {
+  }
 }
 
 void test_KMP_INIT_AT_FORK_with_fork_after_omp_hard_pause_resource_all() {
@@ -106,7 +113,8 @@ void test_KMP_INIT_AT_FORK_with_fork_after_omp_hard_pause_resource_all() {
 void test_fork_child_exiting_after_omp_hard_pause_resource_all() {
 // use omp to do some work
 #pragma omp parallel num_threads(NUM_THREADS)
-  {}
+  {
+  }
 
   // omp hard pause should succeed
   int rc = omp_pause_resource_all(omp_pause_hard);



More information about the Openmp-commits mailing list