[Openmp-commits] [openmp] [OpenMP] Use distributed fork/join barrier for large teams by default (PR #195473)

Kim Walisch via Openmp-commits openmp-commits at lists.llvm.org
Tue May 5 06:08:45 PDT 2026


https://github.com/kimwalisch updated https://github.com/llvm/llvm-project/pull/195473

>From 82a1129e6f60d2714796524d48975520a39716f8 Mon Sep 17 00:00:00 2001
From: kimwalisch <kim.walisch at gmail.com>
Date: Tue, 5 May 2026 13:13:37 +0200
Subject: [PATCH 1/2] [libomp] Fix hyper_barrier scaling issue

---
 openmp/runtime/src/kmp_barrier.cpp    | 38 ++++++++++++++++++++++++---
 openmp/runtime/src/kmp_tasking.cpp    |  4 +++
 openmp/runtime/src/kmp_wait_release.h | 14 +++++-----
 3 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp
index 4d7989d1ce5eb..74cbf14c2d7f2 100644
--- a/openmp/runtime/src/kmp_barrier.cpp
+++ b/openmp/runtime/src/kmp_barrier.cpp
@@ -39,6 +39,23 @@
 
 void __kmp_print_structure(void); // Forward declaration
 
+static inline bool __kmp_hyper_forkjoin_uses_spin_wait(int nproc) {
+  static const int min_threads = 32;
+
+  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
+    return false;
+
+  if (nproc <= 0) {
+    nproc = __kmp_dflt_team_nth;
+    if (nproc <= 0)
+      nproc = __kmp_dflt_team_nth_ub;
+    if (__kmp_cg_max_nth > 0)
+      nproc = KMP_MIN(nproc, __kmp_cg_max_nth);
+  }
+
+  return nproc >= min_threads;
+}
+
 // ---------------------------- Barrier Algorithms ----------------------------
 // Distributed barrier
 
@@ -1105,8 +1122,14 @@ static void __kmp_hyper_barrier_gather(
                 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
                 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
       // Wait for child to arrive
-      kmp_flag_64<> c_flag(&child_bar->b_arrived, new_state);
-      c_flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
+      if (bt == bs_forkjoin_barrier &&
+          __kmp_hyper_forkjoin_uses_spin_wait(num_threads)) {
+        kmp_flag_64<false, false> c_flag(&child_bar->b_arrived, new_state);
+        c_flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
+      } else {
+        kmp_flag_64<> c_flag(&child_bar->b_arrived, new_state);
+        c_flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
+      }
       KMP_MB(); // Synchronize parent and child threads.
 #if USE_ITT_BUILD && USE_ITT_NOTIFY
       // Barrier imbalance - write min of the thread time and a child time to
@@ -1183,8 +1206,15 @@ static void __kmp_hyper_barrier_release(
     KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid,
                   &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
     // Wait for parent thread to release us
-    kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
-    flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
+    if (bt == bs_forkjoin_barrier &&
+        __kmp_hyper_forkjoin_uses_spin_wait(
+            thr_bar->th_fixed_icvs.nproc)) {
+      kmp_flag_64<false, false> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
+      flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
+    } else {
+      kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
+      flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
+    }
 #if USE_ITT_BUILD && USE_ITT_NOTIFY
     if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
       // In fork barrier where we could not get the object reliably
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index be42ed6f76abc..00f0c296e9013 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -3440,6 +3440,10 @@ template int __kmp_execute_tasks_64<false, true>(kmp_info_t *, kmp_int32,
                                                  int *USE_ITT_BUILD_ARG(void *),
                                                  kmp_int32);
 
+template int __kmp_execute_tasks_64<false, false>(
+    kmp_info_t *, kmp_int32, kmp_flag_64<false, false> *, int,
+    int *USE_ITT_BUILD_ARG(void *), kmp_int32);
+
 template int __kmp_execute_tasks_64<true, false>(kmp_info_t *, kmp_int32,
                                                  kmp_flag_64<true, false> *,
                                                  int,
diff --git a/openmp/runtime/src/kmp_wait_release.h b/openmp/runtime/src/kmp_wait_release.h
index 9baf280228ee5..d12ef5c72b4f3 100644
--- a/openmp/runtime/src/kmp_wait_release.h
+++ b/openmp/runtime/src/kmp_wait_release.h
@@ -484,8 +484,8 @@ final_spin=FALSE)
   KMP_INIT_YIELD(spins); // Setup for waiting
   KMP_INIT_BACKOFF(time);
 
-  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
-      __kmp_pause_status == kmp_soft_paused) {
+  if (Sleepable && (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
+                    __kmp_pause_status == kmp_soft_paused)) {
 #if KMP_USE_MONITOR
 // The worker threads cannot rely on the team struct existing at this point.
 // Use the bt values cached in the thread struct instead.
@@ -608,6 +608,11 @@ final_spin=FALSE)
       continue;
     }
 
+    // Don't suspend if wait loop designated non-sleepable
+    // in template parameters
+    if (!Sleepable)
+      continue;
+
     // Don't suspend if KMP_BLOCKTIME is set to "infinite"
     if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
         __kmp_pause_status != kmp_soft_paused)
@@ -626,11 +631,6 @@ final_spin=FALSE)
     if (KMP_BLOCKING(hibernate_goal, poll_count++))
       continue;
 #endif
-    // Don't suspend if wait loop designated non-sleepable
-    // in template parameters
-    if (!Sleepable)
-      continue;
-
 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
     if (__kmp_mwait_enabled || __kmp_umwait_enabled) {
       KF_TRACE(50, ("__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid));

>From a9918a6676e5d2e643002acd37ff80aa27fd1ac9 Mon Sep 17 00:00:00 2001
From: kimwalisch <kim.walisch at gmail.com>
Date: Tue, 5 May 2026 14:46:17 +0200
Subject: [PATCH 2/2] Fix OMP_WAIT_POLICY=PASSIVE issue

---
 openmp/runtime/src/kmp_barrier.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp
index 74cbf14c2d7f2..76884cab39579 100644
--- a/openmp/runtime/src/kmp_barrier.cpp
+++ b/openmp/runtime/src/kmp_barrier.cpp
@@ -42,7 +42,7 @@ void __kmp_print_structure(void); // Forward declaration
 static inline bool __kmp_hyper_forkjoin_uses_spin_wait(int nproc) {
   static const int min_threads = 32;
 
-  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
+  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME || __kmp_wpolicy_passive)
     return false;
 
   if (nproc <= 0) {



More information about the Openmp-commits mailing list