[Openmp-commits] [openmp] 2e02579 - [OpenMP] Add use of TPAUSE
Terry Wilmarth via Openmp-commits
openmp-commits at lists.llvm.org
Tue Jan 18 08:14:46 PST 2022
Author: Terry Wilmarth
Date: 2022-01-18T10:14:32-06:00
New Revision: 2e02579a76cf4ea3acdc0e076f53ff9cb15fa38f
URL: https://github.com/llvm/llvm-project/commit/2e02579a76cf4ea3acdc0e076f53ff9cb15fa38f
DIFF: https://github.com/llvm/llvm-project/commit/2e02579a76cf4ea3acdc0e076f53ff9cb15fa38f.diff
LOG: [OpenMP] Add use of TPAUSE
Add use of TPAUSE (from WAITPKG) to the runtime for Intel hardware,
with an envirable to turn it on in a particular C-state. Always uses
TPAUSE if it is selected and enabled by Intel hardware and presence of
WAITPKG, and if not, falls back to old way of checking
__kmp_use_yield, etc.
Differential Revision: https://reviews.llvm.org/D115758
Added:
Modified:
openmp/runtime/src/kmp.h
openmp/runtime/src/kmp_dispatch.cpp
openmp/runtime/src/kmp_dispatch.h
openmp/runtime/src/kmp_global.cpp
openmp/runtime/src/kmp_lock.cpp
openmp/runtime/src/kmp_lock.h
openmp/runtime/src/kmp_runtime.cpp
openmp/runtime/src/kmp_settings.cpp
openmp/runtime/src/kmp_tasking.cpp
openmp/runtime/src/kmp_wait_release.h
openmp/runtime/src/z_Windows_NT_util.cpp
Removed:
################################################################################
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index ede6aa992d53a..a41265a4763a3 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -1315,6 +1315,82 @@ static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }
#define KMP_X86_MXCSR_MASK 0xffffffc0 /* ignore status flags (6 lsb) */
+// User-level Monitor/Mwait
+#if KMP_HAVE_UMWAIT
+// We always try for UMWAIT first
+#if KMP_HAVE_WAITPKG_INTRINSICS
+#if KMP_HAVE_IMMINTRIN_H
+#include <immintrin.h>
+#elif KMP_HAVE_INTRIN_H
+#include <intrin.h>
+#endif
+#endif // KMP_HAVE_WAITPKG_INTRINSICS
+
+KMP_ATTRIBUTE_TARGET_WAITPKG
+static inline int __kmp_tpause(uint32_t hint, uint64_t counter) {
+#if !KMP_HAVE_WAITPKG_INTRINSICS
+ uint32_t timeHi = uint32_t(counter >> 32);
+ uint32_t timeLo = uint32_t(counter & 0xffffffff);
+ char flag;
+ __asm__ volatile("#tpause\n.byte 0x66, 0x0F, 0xAE, 0xF1\n"
+ "setb %0"
+ : "=r"(flag)
+ : "a"(timeLo), "d"(timeHi), "c"(hint)
+ :);
+ return flag;
+#else
+ return _tpause(hint, counter);
+#endif
+}
+KMP_ATTRIBUTE_TARGET_WAITPKG
+static inline void __kmp_umonitor(void *cacheline) {
+#if !KMP_HAVE_WAITPKG_INTRINSICS
+ __asm__ volatile("# umonitor\n.byte 0xF3, 0x0F, 0xAE, 0x01 "
+ :
+ : "a"(cacheline)
+ :);
+#else
+ _umonitor(cacheline);
+#endif
+}
+KMP_ATTRIBUTE_TARGET_WAITPKG
+static inline int __kmp_umwait(uint32_t hint, uint64_t counter) {
+#if !KMP_HAVE_WAITPKG_INTRINSICS
+ uint32_t timeHi = uint32_t(counter >> 32);
+ uint32_t timeLo = uint32_t(counter & 0xffffffff);
+ char flag;
+ __asm__ volatile("#umwait\n.byte 0xF2, 0x0F, 0xAE, 0xF1\n"
+ "setb %0"
+ : "=r"(flag)
+ : "a"(timeLo), "d"(timeHi), "c"(hint)
+ :);
+ return flag;
+#else
+ return _umwait(hint, counter);
+#endif
+}
+#elif KMP_HAVE_MWAIT
+#if KMP_OS_UNIX
+#include <pmmintrin.h>
+#else
+#include <intrin.h>
+#endif
+#if KMP_OS_UNIX
+__attribute__((target("sse3")))
+#endif
+static inline void
+__kmp_mm_monitor(void *cacheline, unsigned extensions, unsigned hints) {
+ _mm_monitor(cacheline, extensions, hints);
+}
+#if KMP_OS_UNIX
+__attribute__((target("sse3")))
+#endif
+static inline void
+__kmp_mm_mwait(unsigned extensions, unsigned hints) {
+ _mm_mwait(extensions, hints);
+}
+#endif // KMP_HAVE_UMWAIT
+
#if KMP_ARCH_X86
extern void __kmp_x86_pause(void);
#elif KMP_MIC
@@ -1344,6 +1420,9 @@ static inline void __kmp_x86_pause(void) { _mm_pause(); }
#define KMP_INIT_YIELD(count) \
{ (count) = __kmp_yield_init; }
+#define KMP_INIT_BACKOFF(time) \
+ { (time) = __kmp_pause_init; }
+
#define KMP_OVERSUBSCRIBED \
(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))
@@ -1381,7 +1460,36 @@ static inline void __kmp_x86_pause(void) { _mm_pause(); }
} \
}
-#define KMP_YIELD_OVERSUB_ELSE_SPIN(count) \
+// If TPAUSE is available & enabled, use it. If oversubscribed, use the slower
+// (C0.2) state, which improves performance of other SMT threads on the same
+// core, otherwise, use the fast (C0.1) default state, or whatever the user has
+// requested. Uses a timed TPAUSE, and exponential backoff. If TPAUSE isn't
+// available, fall back to the regular CPU pause and yield combination.
+#if KMP_HAVE_UMWAIT
+#define KMP_YIELD_OVERSUB_ELSE_SPIN(count, time) \
+ { \
+ if (__kmp_tpause_enabled) { \
+ if (KMP_OVERSUBSCRIBED) { \
+ __kmp_tpause(0, (time)); \
+ } else { \
+ __kmp_tpause(__kmp_tpause_hint, (time)); \
+ } \
+ (time) *= 2; \
+ } else { \
+ KMP_CPU_PAUSE(); \
+ if ((KMP_TRY_YIELD_OVERSUB)) { \
+ __kmp_yield(); \
+ } else if (__kmp_use_yield == 1) { \
+ (count) -= 2; \
+ if (!(count)) { \
+ __kmp_yield(); \
+ (count) = __kmp_yield_next; \
+ } \
+ } \
+ } \
+ }
+#else
+#define KMP_YIELD_OVERSUB_ELSE_SPIN(count, time) \
{ \
KMP_CPU_PAUSE(); \
if ((KMP_TRY_YIELD_OVERSUB)) \
@@ -1394,80 +1502,6 @@ static inline void __kmp_x86_pause(void) { _mm_pause(); }
} \
} \
}
-
-// User-level Monitor/Mwait
-#if KMP_HAVE_UMWAIT
-// We always try for UMWAIT first
-#if KMP_HAVE_WAITPKG_INTRINSICS
-#if KMP_HAVE_IMMINTRIN_H
-#include <immintrin.h>
-#elif KMP_HAVE_INTRIN_H
-#include <intrin.h>
-#endif
-#endif // KMP_HAVE_WAITPKG_INTRINSICS
-KMP_ATTRIBUTE_TARGET_WAITPKG
-static inline int __kmp_tpause(uint32_t hint, uint64_t counter) {
-#if !KMP_HAVE_WAITPKG_INTRINSICS
- uint32_t timeHi = uint32_t(counter >> 32);
- uint32_t timeLo = uint32_t(counter & 0xffffffff);
- char flag;
- __asm__ volatile("#tpause\n.byte 0x66, 0x0F, 0xAE, 0xF1\n"
- "setb %0"
- : "=r"(flag)
- : "a"(timeLo), "d"(timeHi), "c"(hint)
- :);
- return flag;
-#else
- return _tpause(hint, counter);
-#endif
-}
-KMP_ATTRIBUTE_TARGET_WAITPKG
-static inline void __kmp_umonitor(void *cacheline) {
-#if !KMP_HAVE_WAITPKG_INTRINSICS
- __asm__ volatile("# umonitor\n.byte 0xF3, 0x0F, 0xAE, 0x01 "
- :
- : "a"(cacheline)
- :);
-#else
- _umonitor(cacheline);
-#endif
-}
-KMP_ATTRIBUTE_TARGET_WAITPKG
-static inline int __kmp_umwait(uint32_t hint, uint64_t counter) {
-#if !KMP_HAVE_WAITPKG_INTRINSICS
- uint32_t timeHi = uint32_t(counter >> 32);
- uint32_t timeLo = uint32_t(counter & 0xffffffff);
- char flag;
- __asm__ volatile("#umwait\n.byte 0xF2, 0x0F, 0xAE, 0xF1\n"
- "setb %0"
- : "=r"(flag)
- : "a"(timeLo), "d"(timeHi), "c"(hint)
- :);
- return flag;
-#else
- return _umwait(hint, counter);
-#endif
-}
-#elif KMP_HAVE_MWAIT
-#if KMP_OS_UNIX
-#include <pmmintrin.h>
-#else
-#include <intrin.h>
-#endif
-#if KMP_OS_UNIX
-__attribute__((target("sse3")))
-#endif
-static inline void
-__kmp_mm_monitor(void *cacheline, unsigned extensions, unsigned hints) {
- _mm_monitor(cacheline, extensions, hints);
-}
-#if KMP_OS_UNIX
-__attribute__((target("sse3")))
-#endif
-static inline void
-__kmp_mm_mwait(unsigned extensions, unsigned hints) {
- _mm_mwait(extensions, hints);
-}
#endif // KMP_HAVE_UMWAIT
/* ------------------------------------------------------------------------ */
@@ -3088,6 +3122,7 @@ extern kmp_int32 __kmp_use_yield;
extern kmp_int32 __kmp_use_yield_exp_set;
extern kmp_uint32 __kmp_yield_init;
extern kmp_uint32 __kmp_yield_next;
+extern kmp_uint64 __kmp_pause_init;
/* ------------------------------------------------------------------------- */
extern int __kmp_allThreadsSpecified;
@@ -3290,6 +3325,13 @@ extern int __kmp_mwait_enabled; // Runtime check if ring3 mwait is enabled
extern int __kmp_mwait_hints; // Hints to pass in to mwait
#endif
+#if KMP_HAVE_UMWAIT
+extern int __kmp_waitpkg_enabled; // Runtime check if waitpkg exists
+extern int __kmp_tpause_state; // 0 (default), 1=C0.1, 2=C0.2; from KMP_TPAUSE
+extern int __kmp_tpause_hint; // 1=C0.1 (default), 0=C0.2; from KMP_TPAUSE
+extern int __kmp_tpause_enabled; // 0 (default), 1 (KMP_TPAUSE is non-zero)
+#endif
+
/* ------------------------------------------------------------------------- */
extern kmp_global_t __kmp_global; /* global status */
diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp
index 1aaffc76909a4..648332109dbb8 100644
--- a/openmp/runtime/src/kmp_dispatch.cpp
+++ b/openmp/runtime/src/kmp_dispatch.cpp
@@ -2655,9 +2655,11 @@ __kmp_wait_4(volatile kmp_uint32 *spinner, kmp_uint32 checker,
kmp_uint32 spins;
kmp_uint32 (*f)(kmp_uint32, kmp_uint32) = pred;
kmp_uint32 r;
+ kmp_uint64 time;
KMP_FSYNC_SPIN_INIT(obj, CCAST(kmp_uint32 *, spin));
KMP_INIT_YIELD(spins);
+ KMP_INIT_BACKOFF(time);
// main wait spin loop
while (!f(r = TCR_4(*spin), check)) {
KMP_FSYNC_SPIN_PREPARE(obj);
@@ -2665,7 +2667,7 @@ __kmp_wait_4(volatile kmp_uint32 *spinner, kmp_uint32 checker,
split. It causes problems with infinite recursion because of exit lock */
/* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort)
__kmp_abort_thread(); */
- KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
+ KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
}
KMP_FSYNC_SPIN_ACQUIRED(obj);
return r;
@@ -2680,15 +2682,17 @@ void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,
kmp_uint32 check = checker;
kmp_uint32 spins;
kmp_uint32 (*f)(void *, kmp_uint32) = pred;
+ kmp_uint64 time;
KMP_FSYNC_SPIN_INIT(obj, spin);
KMP_INIT_YIELD(spins);
+ KMP_INIT_BACKOFF(time);
// main wait spin loop
while (!f(spin, check)) {
KMP_FSYNC_SPIN_PREPARE(obj);
/* if we have waited a bit, or are noversubscribed, yield */
/* pause is in the following code */
- KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
+ KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
}
KMP_FSYNC_SPIN_ACQUIRED(obj);
}
diff --git a/openmp/runtime/src/kmp_dispatch.h b/openmp/runtime/src/kmp_dispatch.h
index ae11361ca512b..154db174613db 100644
--- a/openmp/runtime/src/kmp_dispatch.h
+++ b/openmp/runtime/src/kmp_dispatch.h
@@ -292,10 +292,12 @@ static UT __kmp_wait(volatile UT *spinner, UT checker,
UT check = checker;
kmp_uint32 spins;
kmp_uint32 (*f)(UT, UT) = pred;
+ kmp_uint64 time;
UT r;
KMP_FSYNC_SPIN_INIT(obj, CCAST(UT *, spin));
KMP_INIT_YIELD(spins);
+ KMP_INIT_BACKOFF(time);
// main wait spin loop
while (!f(r = *spin, check)) {
KMP_FSYNC_SPIN_PREPARE(obj);
@@ -305,7 +307,7 @@ static UT __kmp_wait(volatile UT *spinner, UT checker,
/* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort)
__kmp_abort_thread(); */
// If oversubscribed, or have waited a bit then yield.
- KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
+ KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
}
KMP_FSYNC_SPIN_ACQUIRED(obj);
return r;
diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp
index 4aea5a2d86632..fdabaad21f7b6 100644
--- a/openmp/runtime/src/kmp_global.cpp
+++ b/openmp/runtime/src/kmp_global.cpp
@@ -219,6 +219,13 @@ int __kmp_mwait_enabled = FALSE;
int __kmp_mwait_hints = 0;
#endif
+#if KMP_HAVE_UMWAIT
+int __kmp_waitpkg_enabled = 0;
+int __kmp_tpause_state = 0;
+int __kmp_tpause_hint = 1;
+int __kmp_tpause_enabled = 0;
+#endif
+
/* map OMP 3.0 schedule types with our internal schedule types */
enum sched_type __kmp_sch_map[kmp_sched_upper - kmp_sched_lower_ext +
kmp_sched_upper_std - kmp_sched_lower - 2] = {
@@ -425,6 +432,7 @@ kmp_int32 __kmp_use_yield_exp_set = 0;
kmp_uint32 __kmp_yield_init = KMP_INIT_WAIT;
kmp_uint32 __kmp_yield_next = KMP_NEXT_WAIT;
+kmp_uint64 __kmp_pause_init = 1; // for tpause
/* ------------------------------------------------------ */
/* STATE mostly syncronized with global lock */
diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp
index f3bdb03663a61..fff7305b57f50 100644
--- a/openmp/runtime/src/kmp_lock.cpp
+++ b/openmp/runtime/src/kmp_lock.cpp
@@ -96,12 +96,19 @@ __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) {
}
kmp_uint32 spins;
+ kmp_uint64 time;
KMP_FSYNC_PREPARE(lck);
KMP_INIT_YIELD(spins);
+ KMP_INIT_BACKOFF(time);
kmp_backoff_t backoff = __kmp_spin_backoff_params;
do {
+#if !KMP_HAVE_UMWAIT
__kmp_spin_backoff(&backoff);
- KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
+#else
+ if (!__kmp_tpause_enabled)
+ __kmp_spin_backoff(&backoff);
+#endif
+ KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
} while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
!__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy));
KMP_FSYNC_ACQUIRED(lck);
@@ -2227,10 +2234,12 @@ __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
// The current implementation of KMP_WAIT doesn't allow for mask
// and poll to be re-read every spin iteration.
kmp_uint32 spins;
+ kmp_uint64 time;
KMP_FSYNC_PREPARE(lck);
KMP_INIT_YIELD(spins);
+ KMP_INIT_BACKOFF(time);
while (polls[ticket & mask] < ticket) { // atomic load
- KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
+ KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
// Re-read the mask and the poll pointer from the lock structure.
//
// Make certain that "mask" is read before "polls" !!!
@@ -2659,9 +2668,17 @@ void __kmp_spin_backoff(kmp_backoff_t *boff) {
kmp_uint32 i;
for (i = boff->step; i > 0; i--) {
kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
- do {
- KMP_CPU_PAUSE();
- } while (before(__kmp_tsc(), goal));
+#if KMP_HAVE_UMWAIT
+ if (__kmp_umwait_enabled) {
+ __kmp_tpause(0, boff->min_tick);
+ } else {
+#endif
+ do {
+ KMP_CPU_PAUSE();
+ } while (before(__kmp_tsc(), goal));
+#if KMP_HAVE_UMWAIT
+ }
+#endif
}
boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1);
}
diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h
index 90afd8fd7eb3e..a19f4ca323b86 100644
--- a/openmp/runtime/src/kmp_lock.h
+++ b/openmp/runtime/src/kmp_lock.h
@@ -651,12 +651,15 @@ extern int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
if (lck->tas.lk.poll != 0 || \
!__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \
kmp_uint32 spins; \
+ kmp_uint64 time; \
KMP_FSYNC_PREPARE(lck); \
KMP_INIT_YIELD(spins); \
+ KMP_INIT_BACKOFF(time); \
do { \
- KMP_YIELD_OVERSUB_ELSE_SPIN(spins); \
- } while (lck->tas.lk.poll != 0 || !__kmp_atomic_compare_store_acq( \
- &lck->tas.lk.poll, 0, gtid + 1)); \
+ KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time); \
+ } while ( \
+ lck->tas.lk.poll != 0 || \
+ !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); \
} \
KMP_FSYNC_ACQUIRED(lck); \
} else { \
@@ -758,10 +761,12 @@ extern int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
if ((lck->tas.lk.poll != 0) || \
!__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \
kmp_uint32 spins; \
+ kmp_uint64 time; \
KMP_FSYNC_PREPARE(lck); \
KMP_INIT_YIELD(spins); \
+ KMP_INIT_BACKOFF(time); \
do { \
- KMP_YIELD_OVERSUB_ELSE_SPIN(spins); \
+ KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time); \
} while ( \
(lck->tas.lk.poll != 0) || \
!__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); \
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 7af970803a30a..e1af2f43dae76 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -6895,7 +6895,9 @@ static void __kmp_check_mic_type() {
static void __kmp_user_level_mwait_init() {
struct kmp_cpuid buf;
__kmp_x86_cpuid(7, 0, &buf);
- __kmp_umwait_enabled = ((buf.ecx >> 5) & 1) && __kmp_user_level_mwait;
+ __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
+ __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
+ __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
__kmp_umwait_enabled));
}
diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index 302abb4042d89..27ab51dbf7ed5 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -5171,6 +5171,27 @@ static void __kmp_stg_print_mwait_hints(kmp_str_buf_t *buffer, char const *name,
#endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
+#if KMP_HAVE_UMWAIT
+// -----------------------------------------------------------------------------
+// KMP_TPAUSE
+// 0 = don't use TPAUSE, 1 = use C0.1 state, 2 = use C0.2 state
+
+static void __kmp_stg_parse_tpause(char const *name, char const *value,
+ void *data) {
+ __kmp_stg_parse_int(name, value, 0, INT_MAX, &__kmp_tpause_state);
+ if (__kmp_tpause_state != 0) {
+ // The actual hint passed to tpause is: 0 for C0.2 and 1 for C0.1
+ if (__kmp_tpause_state == 2) // use C0.2
+ __kmp_tpause_hint = 0; // default was set to 1 for C0.1
+ }
+} // __kmp_stg_parse_tpause
+
+static void __kmp_stg_print_tpause(kmp_str_buf_t *buffer, char const *name,
+ void *data) {
+ __kmp_stg_print_int(buffer, name, __kmp_tpause_state);
+} // __kmp_stg_print_tpause
+#endif // KMP_HAVE_UMWAIT
+
// -----------------------------------------------------------------------------
// OMP_DISPLAY_ENV
@@ -5536,6 +5557,10 @@ static kmp_setting_t __kmp_stg_table[] = {
{"KMP_MWAIT_HINTS", __kmp_stg_parse_mwait_hints,
__kmp_stg_print_mwait_hints, NULL, 0, 0},
#endif
+
+#if KMP_HAVE_UMWAIT
+ {"KMP_TPAUSE", __kmp_stg_parse_tpause, __kmp_stg_print_tpause, NULL, 0, 0},
+#endif
{"", NULL, NULL, NULL, 0, 0}}; // settings
static int const __kmp_stg_count =
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index d6665a7ccfb41..e445438524c8e 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -3552,9 +3552,11 @@ void __kmp_reap_task_teams(void) {
void __kmp_wait_to_unref_task_teams(void) {
kmp_info_t *thread;
kmp_uint32 spins;
+ kmp_uint64 time;
int done;
KMP_INIT_YIELD(spins);
+ KMP_INIT_BACKOFF(time);
for (;;) {
done = TRUE;
@@ -3604,7 +3606,7 @@ void __kmp_wait_to_unref_task_teams(void) {
}
// If oversubscribed or have waited a bit, yield.
- KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
+ KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
}
}
diff --git a/openmp/runtime/src/kmp_wait_release.h b/openmp/runtime/src/kmp_wait_release.h
index 226150dfb7811..b32cb15de1b2d 100644
--- a/openmp/runtime/src/kmp_wait_release.h
+++ b/openmp/runtime/src/kmp_wait_release.h
@@ -377,6 +377,7 @@ __kmp_wait_template(kmp_info_t *this_thr,
#else
kmp_uint32 hibernate;
#endif
+ kmp_uint64 time;
KMP_FSYNC_SPIN_INIT(spin, NULL);
if (flag->done_check()) {
@@ -476,6 +477,7 @@ final_spin=FALSE)
#endif
KMP_INIT_YIELD(spins); // Setup for waiting
+ KMP_INIT_BACKOFF(time);
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
__kmp_pause_status == kmp_soft_paused) {
@@ -563,7 +565,7 @@ final_spin=FALSE)
// If we are oversubscribed, or have waited a bit (and
// KMP_LIBRARY=throughput), then yield
- KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
+ KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
#if KMP_STATS_ENABLED
// Check if thread has been signalled to idle state
diff --git a/openmp/runtime/src/z_Windows_NT_util.cpp b/openmp/runtime/src/z_Windows_NT_util.cpp
index 0a0801c7ece2f..8fa198563a79f 100644
--- a/openmp/runtime/src/z_Windows_NT_util.cpp
+++ b/openmp/runtime/src/z_Windows_NT_util.cpp
@@ -1327,16 +1327,18 @@ static void __kmp_reap_common(kmp_info_t *th) {
// KMP_WAIT to cover this usage also.
void *obj = NULL;
kmp_uint32 spins;
+ kmp_uint64 time;
#if USE_ITT_BUILD
KMP_FSYNC_SPIN_INIT(obj, (void *)&th->th.th_info.ds.ds_alive);
#endif /* USE_ITT_BUILD */
KMP_INIT_YIELD(spins);
+ KMP_INIT_BACKOFF(time);
do {
#if USE_ITT_BUILD
KMP_FSYNC_SPIN_PREPARE(obj);
#endif /* USE_ITT_BUILD */
__kmp_is_thread_alive(th, &exit_val);
- KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
+ KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
} while (exit_val == STILL_ACTIVE && TCR_4(th->th.th_info.ds.ds_alive));
#if USE_ITT_BUILD
if (exit_val == STILL_ACTIVE) {
More information about the Openmp-commits
mailing list