[Openmp-commits] [openmp] r355120 - [OpenMP] Make use of sched_yield optional in runtime

Jonathan Peyton via Openmp-commits openmp-commits at lists.llvm.org
Thu Feb 28 11:11:30 PST 2019


Author: jlpeyton
Date: Thu Feb 28 11:11:29 2019
New Revision: 355120

URL: http://llvm.org/viewvc/llvm-project?rev=355120&view=rev
Log:
[OpenMP] Make use of sched_yield optional in runtime

This patch cleans up the yielding code and makes it optional. An
environment variable, KMP_USE_YIELD, was added. Yielding is still
on by default (KMP_USE_YIELD=1), but can be turned off completely
(KMP_USE_YIELD=0), or turned on only when oversubscription is detected
(KMP_USE_YIELD=2). Note that oversubscription cannot always be detected
by the runtime (for example, when the runtime is initialized and the
process forks, oversubscription cannot be detected currently over
multiple instances of the runtime).

Because yielding can be controlled by user now, the library mode
settings (from KMP_LIBRARY) for throughput and turnaround have been
adjusted by altering blocktime, unless that was also explicitly set.

In the original code, there were a number of places where a double yield
might have been done under oversubscription. This version checks
oversubscription and if that's not going to yield, then it does
the spin check.

Patch by Terry Wilmarth

Differential Revision: https://reviews.llvm.org/D58148

Modified:
    openmp/trunk/runtime/src/dllexports
    openmp/trunk/runtime/src/exports_so.txt
    openmp/trunk/runtime/src/kmp.h
    openmp/trunk/runtime/src/kmp_csupport.cpp
    openmp/trunk/runtime/src/kmp_dispatch.cpp
    openmp/trunk/runtime/src/kmp_dispatch.h
    openmp/trunk/runtime/src/kmp_dispatch_hier.h
    openmp/trunk/runtime/src/kmp_global.cpp
    openmp/trunk/runtime/src/kmp_itt.h
    openmp/trunk/runtime/src/kmp_lock.cpp
    openmp/trunk/runtime/src/kmp_lock.h
    openmp/trunk/runtime/src/kmp_os.h
    openmp/trunk/runtime/src/kmp_runtime.cpp
    openmp/trunk/runtime/src/kmp_settings.cpp
    openmp/trunk/runtime/src/kmp_tasking.cpp
    openmp/trunk/runtime/src/kmp_taskq.cpp
    openmp/trunk/runtime/src/kmp_wait_release.h
    openmp/trunk/runtime/src/z_Linux_util.cpp
    openmp/trunk/runtime/src/z_Windows_NT_util.cpp

Modified: openmp/trunk/runtime/src/dllexports
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/dllexports?rev=355120&r1=355119&r2=355120&view=diff
==============================================================================
--- openmp/trunk/runtime/src/dllexports (original)
+++ openmp/trunk/runtime/src/dllexports Thu Feb 28 11:11:29 2019
@@ -158,7 +158,7 @@
     #
 
     # Regular entry points
-        __kmp_wait_yield_4
+        __kmp_wait_4
         __kmp_fork_call
         __kmp_invoke_microtask
     %ifdef KMP_USE_MONITOR

Modified: openmp/trunk/runtime/src/exports_so.txt
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/exports_so.txt?rev=355120&r1=355119&r2=355120&view=diff
==============================================================================
--- openmp/trunk/runtime/src/exports_so.txt (original)
+++ openmp/trunk/runtime/src/exports_so.txt Thu Feb 28 11:11:29 2019
@@ -83,7 +83,7 @@ VERSION {
         __kmp_reap_worker;
         __kmp_release_64;
         __kmp_wait_64;
-        __kmp_wait_yield_4;
+        __kmp_wait_4;
 
         # ittnotify symbols to be used by debugger
         __kmp_itt_fini_ittlib;

Modified: openmp/trunk/runtime/src/kmp.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp.h?rev=355120&r1=355119&r2=355120&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp.h (original)
+++ openmp/trunk/runtime/src/kmp.h Thu Feb 28 11:11:29 2019
@@ -981,10 +981,6 @@ extern kmp_uint64 __kmp_now_nsec();
   (KMP_BLOCKTIME(team, tid) * KMP_USEC_PER_SEC)
 #define KMP_BLOCKING(goal, count) ((count) % 1000 != 0 || (goal) > KMP_NOW())
 #endif
-#define KMP_YIELD_NOW()                                                        \
-  (KMP_NOW_MSEC() / KMP_MAX(__kmp_dflt_blocktime, 1) %                         \
-       (__kmp_yield_on_count + __kmp_yield_off_count) <                        \
-   (kmp_uint32)__kmp_yield_on_count)
 #endif // KMP_USE_MONITOR
 
 #define KMP_MIN_STATSCOLS 40
@@ -999,14 +995,6 @@ extern kmp_uint64 __kmp_now_nsec();
 #define KMP_MAX_CHUNK (INT_MAX - 1)
 #define KMP_DEFAULT_CHUNK 1
 
-#define KMP_MIN_INIT_WAIT 1
-#define KMP_MAX_INIT_WAIT (INT_MAX / 2)
-#define KMP_DEFAULT_INIT_WAIT 2048U
-
-#define KMP_MIN_NEXT_WAIT 1
-#define KMP_MAX_NEXT_WAIT (INT_MAX / 2)
-#define KMP_DEFAULT_NEXT_WAIT 1024U
-
 #define KMP_DFLT_DISP_NUM_BUFF 7
 #define KMP_MAX_ORDERED 8
 
@@ -1090,7 +1078,7 @@ extern void __kmp_x86_cpuid(int mode, in
 extern void __kmp_x86_pause(void);
 #elif KMP_MIC
 // Performance testing on KNC (C0QS-7120 P/A/X/D, 61-core, 16 GB Memory) showed
-// regression after removal of extra PAUSE from KMP_YIELD_SPIN(). Changing
+// regression after removal of extra PAUSE from spin loops. Changing
 // the delay from 100 to 300 showed even better performance than double PAUSE
 // on Spec OMP2001 and LCPC tasking tests, no regressions on EPCC.
 static inline void __kmp_x86_pause(void) { _mm_delay_32(300); }
@@ -1115,31 +1103,54 @@ static inline void __kmp_x86_pause(void)
 #define KMP_INIT_YIELD(count)                                                  \
   { (count) = __kmp_yield_init; }
 
+#define KMP_OVERSUBSCRIBED                                                     \
+  (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))
+
+#define KMP_TRY_YIELD                                                          \
+  ((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (KMP_OVERSUBSCRIBED)))
+
+#define KMP_TRY_YIELD_OVERSUB                                                  \
+  ((__kmp_use_yield == 1 || __kmp_use_yield == 2) && (KMP_OVERSUBSCRIBED))
+
 #define KMP_YIELD(cond)                                                        \
   {                                                                            \
     KMP_CPU_PAUSE();                                                           \
-    __kmp_yield((cond));                                                       \
+    if ((cond) && (KMP_TRY_YIELD))                                             \
+      __kmp_yield();                                                           \
+  }
+
+#define KMP_YIELD_OVERSUB()                                                    \
+  {                                                                            \
+    KMP_CPU_PAUSE();                                                           \
+    if ((KMP_TRY_YIELD_OVERSUB))                                               \
+      __kmp_yield();                                                           \
   }
 
 // Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround,
 // there should be no yielding since initial value from KMP_INIT_YIELD() is odd.
-
-#define KMP_YIELD_WHEN(cond, count)                                            \
+#define KMP_YIELD_SPIN(count)                                                  \
   {                                                                            \
     KMP_CPU_PAUSE();                                                           \
-    (count) -= 2;                                                              \
-    if (!(count)) {                                                            \
-      __kmp_yield(cond);                                                       \
-      (count) = __kmp_yield_next;                                              \
+    if (KMP_TRY_YIELD) {                                                       \
+      (count) -= 2;                                                            \
+      if (!(count)) {                                                          \
+        __kmp_yield();                                                         \
+        (count) = __kmp_yield_next;                                            \
+      }                                                                        \
     }                                                                          \
   }
-#define KMP_YIELD_SPIN(count)                                                  \
+
+#define KMP_YIELD_OVERSUB_ELSE_SPIN(count)                                     \
   {                                                                            \
     KMP_CPU_PAUSE();                                                           \
-    (count) -= 2;                                                              \
-    if (!(count)) {                                                            \
-      __kmp_yield(1);                                                          \
-      (count) = __kmp_yield_next;                                              \
+    if ((KMP_TRY_YIELD_OVERSUB))                                               \
+      __kmp_yield();                                                           \
+    else if (__kmp_use_yield == 1) {                                           \
+      (count) -= 2;                                                            \
+      if (!(count)) {                                                          \
+        __kmp_yield();                                                         \
+        (count) = __kmp_yield_next;                                            \
+      }                                                                        \
     }                                                                          \
   }
 
@@ -2945,10 +2956,6 @@ extern kmp_lock_t __kmp_global_lock; /*
 extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access  */
 extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */
 
-/* used for yielding spin-waits */
-extern unsigned int __kmp_init_wait; /* initial number of spin-tests   */
-extern unsigned int __kmp_next_wait; /* susequent number of spin-tests */
-
 extern enum library_type __kmp_library;
 
 extern enum sched_type __kmp_sched; /* default runtime scheduling */
@@ -2977,16 +2984,11 @@ extern int __kmp_reserve_warn; /* have w
 extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */
 #endif
 
+extern kmp_int32 __kmp_use_yield;
+extern kmp_int32 __kmp_use_yield_exp_set;
 extern kmp_uint32 __kmp_yield_init;
 extern kmp_uint32 __kmp_yield_next;
 
-#if KMP_USE_MONITOR
-extern kmp_uint32 __kmp_yielding_on;
-#endif
-extern kmp_uint32 __kmp_yield_cycle;
-extern kmp_int32 __kmp_yield_on_count;
-extern kmp_int32 __kmp_yield_off_count;
-
 /* ------------------------------------------------------------------------- */
 extern int __kmp_allThreadsSpecified;
 
@@ -3309,7 +3311,7 @@ extern void __kmp_push_num_teams(ident_t
                                  int num_threads);
 #endif
 
-extern void __kmp_yield(int cond);
+extern void __kmp_yield();
 
 extern void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
                                    enum sched_type schedule, kmp_int32 lb,
@@ -3374,13 +3376,11 @@ extern kmp_uint32 __kmp_neq_4(kmp_uint32
 extern kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker);
 extern kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker);
 extern kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker);
-extern kmp_uint32 __kmp_wait_yield_4(kmp_uint32 volatile *spinner,
-                                     kmp_uint32 checker,
-                                     kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
-                                     void *obj);
-extern void __kmp_wait_yield_4_ptr(void *spinner, kmp_uint32 checker,
-                                   kmp_uint32 (*pred)(void *, kmp_uint32),
-                                   void *obj);
+extern kmp_uint32 __kmp_wait_4(kmp_uint32 volatile *spinner, kmp_uint32 checker,
+                               kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
+                               void *obj);
+extern void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,
+                             kmp_uint32 (*pred)(void *, kmp_uint32), void *obj);
 
 class kmp_flag_32;
 class kmp_flag_64;

Modified: openmp/trunk/runtime/src/kmp_csupport.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_csupport.cpp?rev=355120&r1=355119&r2=355120&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_csupport.cpp (original)
+++ openmp/trunk/runtime/src/kmp_csupport.cpp Thu Feb 28 11:11:29 2019
@@ -683,7 +683,7 @@ void __kmpc_flush(ident_t *loc) {
   // }
   // and adding the yield here is good for at least a 10x speedup
   // when running >2 threads per core (on the NAS LU benchmark).
-  __kmp_yield(TRUE);
+  __kmp_yield();
 #endif
 #else
 #error Unknown or unsupported architecture
@@ -993,24 +993,18 @@ __kmp_init_indirect_csptr(kmp_critical_n
       kmp_uint32 spins;                                                        \
       KMP_FSYNC_PREPARE(l);                                                    \
       KMP_INIT_YIELD(spins);                                                   \
-      if (TCR_4(__kmp_nth) >                                                   \
-          (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {               \
-        KMP_YIELD(TRUE);                                                       \
-      } else {                                                                 \
-        KMP_YIELD_SPIN(spins);                                                 \
-      }                                                                        \
       kmp_backoff_t backoff = __kmp_spin_backoff_params;                       \
-      while (                                                                  \
-          KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free ||                        \
-          !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) {  \
-        __kmp_spin_backoff(&backoff);                                          \
+      do {                                                                     \
         if (TCR_4(__kmp_nth) >                                                 \
             (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {             \
           KMP_YIELD(TRUE);                                                     \
         } else {                                                               \
           KMP_YIELD_SPIN(spins);                                               \
         }                                                                      \
-      }                                                                        \
+        __kmp_spin_backoff(&backoff);                                          \
+      } while (                                                                \
+          KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free ||                        \
+          !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy));   \
     }                                                                          \
     KMP_FSYNC_ACQUIRED(l);                                                     \
   }
@@ -1096,8 +1090,7 @@ __kmp_init_indirect_csptr(kmp_critical_n
               KMP_LOCK_BUSY(1, futex), NULL, NULL, 0);                         \
     }                                                                          \
     KMP_MB();                                                                  \
-    KMP_YIELD(TCR_4(__kmp_nth) >                                               \
-              (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));            \
+    KMP_YIELD_OVERSUB();                                                       \
   }
 
 #endif // KMP_USE_FUTEX
@@ -3976,8 +3969,8 @@ void __kmpc_doacross_init(ident_t *loc,
   // __kmp_dispatch_num_buffers)
   if (idx != sh_buf->doacross_buf_idx) {
     // Shared buffer is occupied, wait for it to be free
-    __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
-                       __kmp_eq_4, NULL);
+    __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
+                 __kmp_eq_4, NULL);
   }
 #if KMP_32_BIT_ARCH
   // Check if we are the first thread. After the CAS the first thread gets 0,

Modified: openmp/trunk/runtime/src/kmp_dispatch.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_dispatch.cpp?rev=355120&r1=355119&r2=355120&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_dispatch.cpp (original)
+++ openmp/trunk/runtime/src/kmp_dispatch.cpp Thu Feb 28 11:11:29 2019
@@ -858,9 +858,9 @@ __kmp_dispatch_init(ident_t *loc, int gt
     KD_TRACE(100, ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d "
                    "sh->buffer_index:%d\n",
                    gtid, my_buffer_index, sh->buffer_index));
-    __kmp_wait_yield<kmp_uint32>(&sh->buffer_index, my_buffer_index,
-                                 __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL));
-    // Note: KMP_WAIT_YIELD() cannot be used there: buffer index and
+    __kmp_wait<kmp_uint32>(&sh->buffer_index, my_buffer_index,
+                           __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL));
+    // Note: KMP_WAIT() cannot be used there: buffer index and
     // my_buffer_index are *always* 32-bit integers.
     KMP_MB(); /* is this necessary? */
     KD_TRACE(100, ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d "
@@ -1004,8 +1004,8 @@ static void __kmp_dispatch_finish(int gt
       }
 #endif
 
-      __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower,
-                           __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
+      __kmp_wait<UT>(&sh->u.s.ordered_iteration, lower,
+                     __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
       KMP_MB(); /* is this necessary? */
 #ifdef KMP_DEBUG
       {
@@ -1073,8 +1073,8 @@ static void __kmp_dispatch_finish_chunk(
       }
 #endif
 
-      __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower,
-                           __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
+      __kmp_wait<UT>(&sh->u.s.ordered_iteration, lower,
+                     __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
 
       KMP_MB(); /* is this necessary? */
       KD_TRACE(1000, ("__kmp_dispatch_finish_chunk: T#%d resetting "
@@ -2489,10 +2489,10 @@ kmp_uint32 __kmp_le_4(kmp_uint32 value,
 }
 
 kmp_uint32
-__kmp_wait_yield_4(volatile kmp_uint32 *spinner, kmp_uint32 checker,
-                   kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
-                   void *obj // Higher-level synchronization object, or NULL.
-                   ) {
+__kmp_wait_4(volatile kmp_uint32 *spinner, kmp_uint32 checker,
+             kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
+             void *obj // Higher-level synchronization object, or NULL.
+             ) {
   // note: we may not belong to a team at this point
   volatile kmp_uint32 *spin = spinner;
   kmp_uint32 check = checker;
@@ -2509,20 +2509,16 @@ __kmp_wait_yield_4(volatile kmp_uint32 *
        split. It causes problems with infinite recursion because of exit lock */
     /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort)
         __kmp_abort_thread(); */
-
-    /* if we have waited a bit, or are oversubscribed, yield */
-    /* pause is in the following code */
-    KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
-    KMP_YIELD_SPIN(spins);
+    KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
   }
   KMP_FSYNC_SPIN_ACQUIRED(obj);
   return r;
 }
 
-void __kmp_wait_yield_4_ptr(
-    void *spinner, kmp_uint32 checker, kmp_uint32 (*pred)(void *, kmp_uint32),
-    void *obj // Higher-level synchronization object, or NULL.
-    ) {
+void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,
+                      kmp_uint32 (*pred)(void *, kmp_uint32),
+                      void *obj // Higher-level synchronization object, or NULL.
+                      ) {
   // note: we may not belong to a team at this point
   void *spin = spinner;
   kmp_uint32 check = checker;
@@ -2534,10 +2530,9 @@ void __kmp_wait_yield_4_ptr(
   // main wait spin loop
   while (!f(spin, check)) {
     KMP_FSYNC_SPIN_PREPARE(obj);
-    /* if we have waited a bit, or are oversubscribed, yield */
+    /* if we have waited a bit, or are noversubscribed, yield */
     /* pause is in the following code */
-    KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
-    KMP_YIELD_SPIN(spins);
+    KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
   }
   KMP_FSYNC_SPIN_ACQUIRED(obj);
 }

Modified: openmp/trunk/runtime/src/kmp_dispatch.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_dispatch.h?rev=355120&r1=355119&r2=355120&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_dispatch.h (original)
+++ openmp/trunk/runtime/src/kmp_dispatch.h Thu Feb 28 11:11:29 2019
@@ -269,7 +269,7 @@ template <typename T> kmp_uint32 __kmp_e
 }
 
 /*
-    Spin wait loop that first does pause, then yield.
+    Spin wait loop that pauses between checks.
     Waits until function returns non-zero when called with *spinner and check.
     Does NOT put threads to sleep.
     Arguments:
@@ -282,15 +282,14 @@ template <typename T> kmp_uint32 __kmp_e
         is used to report locks consistently. For example, if lock is acquired
         immediately, its address is reported to ittnotify via
         KMP_FSYNC_ACQUIRED(). However, it lock cannot be acquired immediately
-        and lock routine calls to KMP_WAIT_YIELD(), the later should report the
+        and lock routine calls to KMP_WAIT(), the later should report the
         same address, not an address of low-level spinner.
 #endif // USE_ITT_BUILD
     TODO: make inline function (move to header file for icl)
 */
 template <typename UT>
-static UT __kmp_wait_yield(volatile UT *spinner, UT checker,
-                           kmp_uint32 (*pred)(UT, UT)
-                               USE_ITT_BUILD_ARG(void *obj)) {
+static UT __kmp_wait(volatile UT *spinner, UT checker,
+                     kmp_uint32 (*pred)(UT, UT) USE_ITT_BUILD_ARG(void *obj)) {
   // note: we may not belong to a team at this point
   volatile UT *spin = spinner;
   UT check = checker;
@@ -308,12 +307,8 @@ static UT __kmp_wait_yield(volatile UT *
        It causes problems with infinite recursion because of exit lock */
     /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort)
         __kmp_abort_thread(); */
-
-    // if we are oversubscribed,
-    // or have waited a bit (and KMP_LIBRARY=throughput, then yield
-    // pause is in the following code
-    KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
-    KMP_YIELD_SPIN(spins);
+    // If oversubscribed, or have waited a bit then yield.
+    KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
   }
   KMP_FSYNC_SPIN_ACQUIRED(obj);
   return r;
@@ -379,8 +374,8 @@ void __kmp_dispatch_deo(int *gtid_ref, i
       __kmp_str_free(&buff);
     }
 #endif
-    __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower,
-                         __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
+    __kmp_wait<UT>(&sh->u.s.ordered_iteration, lower,
+                   __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
     KMP_MB(); /* is this necessary? */
 #ifdef KMP_DEBUG
     {

Modified: openmp/trunk/runtime/src/kmp_dispatch_hier.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_dispatch_hier.h?rev=355120&r1=355119&r2=355120&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_dispatch_hier.h (original)
+++ openmp/trunk/runtime/src/kmp_dispatch_hier.h Thu Feb 28 11:11:29 2019
@@ -263,8 +263,8 @@ void core_barrier_impl<T>::barrier(kmp_i
                 next_wait_value));
   char v = (current_wait_value ? 0x1 : 0x0);
   (RCAST(volatile char *, &(bdata->val[current_index])))[id] = v;
-  __kmp_wait_yield<kmp_uint64>(&(bdata->val[current_index]), current_wait_value,
-                               __kmp_eq<kmp_uint64> USE_ITT_BUILD_ARG(NULL));
+  __kmp_wait<kmp_uint64>(&(bdata->val[current_index]), current_wait_value,
+                         __kmp_eq<kmp_uint64> USE_ITT_BUILD_ARG(NULL));
   tdata->wait_val[current_index] = next_wait_value;
   tdata->index = next_index;
 }
@@ -310,8 +310,8 @@ void counter_barrier_impl<T>::barrier(km
                 next_wait_value));
   val = RCAST(volatile kmp_int64 *, &(bdata->val[current_index]));
   KMP_TEST_THEN_INC64(val);
-  __kmp_wait_yield<kmp_uint64>(&(bdata->val[current_index]), current_wait_value,
-                               __kmp_ge<kmp_uint64> USE_ITT_BUILD_ARG(NULL));
+  __kmp_wait<kmp_uint64>(&(bdata->val[current_index]), current_wait_value,
+                         __kmp_ge<kmp_uint64> USE_ITT_BUILD_ARG(NULL));
   tdata->wait_val[current_index] = next_wait_value;
   tdata->index = next_index;
 }

Modified: openmp/trunk/runtime/src/kmp_global.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_global.cpp?rev=355120&r1=355119&r2=355120&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_global.cpp (original)
+++ openmp/trunk/runtime/src/kmp_global.cpp Thu Feb 28 11:11:29 2019
@@ -62,11 +62,6 @@ int __kmp_version = 0;
 std::atomic<kmp_int32> __kmp_team_counter = ATOMIC_VAR_INIT(0);
 std::atomic<kmp_int32> __kmp_task_counter = ATOMIC_VAR_INIT(0);
 
-unsigned int __kmp_init_wait =
-    KMP_DEFAULT_INIT_WAIT; /* initial number of spin-tests   */
-unsigned int __kmp_next_wait =
-    KMP_DEFAULT_NEXT_WAIT; /* susequent number of spin-tests */
-
 size_t __kmp_stksize = KMP_DEFAULT_STKSIZE;
 #if KMP_USE_MONITOR
 size_t __kmp_monitor_stksize = 0; // auto adjust
@@ -395,22 +390,17 @@ int __kmp_env_blocktime = FALSE; /* KMP_
 int __kmp_env_checks = FALSE; /* KMP_CHECKS specified?    */
 int __kmp_env_consistency_check = FALSE; /* KMP_CONSISTENCY_CHECK specified? */
 
+// From KMP_USE_YIELD:
+// 0 = never yield;
+// 1 = always yield (default);
+// 2 = yield only if oversubscribed
+kmp_int32 __kmp_use_yield = 1;
+// This will be 1 if KMP_USE_YIELD environment variable was set explicitly
+kmp_int32 __kmp_use_yield_exp_set = 0;
+
 kmp_uint32 __kmp_yield_init = KMP_INIT_WAIT;
 kmp_uint32 __kmp_yield_next = KMP_NEXT_WAIT;
 
-#if KMP_USE_MONITOR
-kmp_uint32 __kmp_yielding_on = 1;
-#endif
-#if KMP_OS_CNK
-kmp_uint32 __kmp_yield_cycle = 0;
-#else
-kmp_uint32 __kmp_yield_cycle = 1; /* Yield-cycle is on by default */
-#endif
-kmp_int32 __kmp_yield_on_count =
-    10; /* By default, yielding is on for 10 monitor periods. */
-kmp_int32 __kmp_yield_off_count =
-    1; /* By default, yielding is off for 1 monitor periods. */
-
 /* ------------------------------------------------------ */
 /* STATE mostly syncronized with global lock */
 /* data written to rarely by masters, read often by workers */

Modified: openmp/trunk/runtime/src/kmp_itt.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_itt.h?rev=355120&r1=355119&r2=355120&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_itt.h (original)
+++ openmp/trunk/runtime/src/kmp_itt.h Thu Feb 28 11:11:29 2019
@@ -219,7 +219,7 @@ __kmp_inline void __kmp_itt_stack_callee
    with a delay (and not called at all if waiting time is small). So, in spin
    loops, do not use KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before
    spin loop), KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and
-   KMP_FSYNC_SPIN_ACQUIRED(). See KMP_WAIT_YIELD() for example. */
+   KMP_FSYNC_SPIN_ACQUIRED(). See KMP_WAIT() for example. */
 
 #undef KMP_FSYNC_SPIN_INIT
 #define KMP_FSYNC_SPIN_INIT(obj, spin)                                         \

Modified: openmp/trunk/runtime/src/kmp_lock.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_lock.cpp?rev=355120&r1=355119&r2=355120&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_lock.cpp (original)
+++ openmp/trunk/runtime/src/kmp_lock.cpp Thu Feb 28 11:11:29 2019
@@ -100,23 +100,12 @@ __kmp_acquire_tas_lock_timed_template(km
   kmp_uint32 spins;
   KMP_FSYNC_PREPARE(lck);
   KMP_INIT_YIELD(spins);
-  if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
-    KMP_YIELD(TRUE);
-  } else {
-    KMP_YIELD_SPIN(spins);
-  }
-
   kmp_backoff_t backoff = __kmp_spin_backoff_params;
-  while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
-         !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
+  do {
     __kmp_spin_backoff(&backoff);
-    if (TCR_4(__kmp_nth) >
-        (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
-      KMP_YIELD(TRUE);
-    } else {
-      KMP_YIELD_SPIN(spins);
-    }
-  }
+    KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
+  } while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
+           !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy));
   KMP_FSYNC_ACQUIRED(lck);
   return KMP_LOCK_ACQUIRED_FIRST;
 }
@@ -169,8 +158,7 @@ int __kmp_release_tas_lock(kmp_tas_lock_
   KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas));
   KMP_MB(); /* Flush all pending memory write invalidates.  */
 
-  KMP_YIELD(TCR_4(__kmp_nth) >
-            (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
+  KMP_YIELD_OVERSUB();
   return KMP_LOCK_RELEASED;
 }
 
@@ -474,8 +462,7 @@ int __kmp_release_futex_lock(kmp_futex_l
   KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
                   lck->lk.poll, gtid));
 
-  KMP_YIELD(TCR_4(__kmp_nth) >
-            (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
+  KMP_YIELD_OVERSUB();
   return KMP_LOCK_RELEASED;
 }
 
@@ -651,7 +638,7 @@ __kmp_acquire_ticket_lock_timed_template
                                 std::memory_order_acquire) == my_ticket) {
     return KMP_LOCK_ACQUIRED_FIRST;
   }
-  KMP_WAIT_YIELD_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
+  KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
   return KMP_LOCK_ACQUIRED_FIRST;
 }
 
@@ -1249,10 +1236,9 @@ __kmp_acquire_queuing_lock_timed_templat
                ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n",
                 lck, gtid));
 
-      /* ToDo: May want to consider using __kmp_wait_sleep  or something that
-         sleeps for throughput only here. */
       KMP_MB();
-      KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
+      // ToDo: Use __kmp_wait_sleep or similar when blocktime != inf
+      KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck);
 
 #ifdef DEBUG_QUEUING_LOCKS
       TRACE_LOCK(gtid + 1, "acq spin");
@@ -1282,8 +1268,8 @@ __kmp_acquire_queuing_lock_timed_templat
     /* Yield if number of threads > number of logical processors */
     /* ToDo: Not sure why this should only be in oversubscription case,
        maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
-    KMP_YIELD(TCR_4(__kmp_nth) >
-              (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
+    KMP_YIELD_OVERSUB();
+
 #ifdef DEBUG_QUEUING_LOCKS
     TRACE_LOCK(gtid + 1, "acq retry");
 #endif
@@ -1462,8 +1448,8 @@ int __kmp_release_queuing_lock(kmp_queui
         KMP_MB();
         /* make sure enqueuing thread has time to update next waiting thread
          * field */
-        *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32 *)waiting_id_p, 0,
-                                    KMP_NEQ, NULL);
+        *head_id_p =
+            KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL);
 #ifdef DEBUG_QUEUING_LOCKS
         TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)");
 #endif
@@ -2131,7 +2117,7 @@ static void __kmp_acquire_adaptive_lock(
       // lock from now on.
       while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
         KMP_INC_STAT(lck, lemmingYields);
-        __kmp_yield(TRUE);
+        KMP_YIELD(TRUE);
       }
 
       if (__kmp_test_adaptive_lock_only(lck, gtid))
@@ -2259,23 +2245,14 @@ __kmp_acquire_drdpa_lock_timed_template(
   // polling area has been reconfigured.  Unless it is reconfigured, the
   // reloads stay in L1 cache and are cheap.
   //
-  // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.cpp !!!
-  //
-  // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
+  // Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!!
+  // The current implementation of KMP_WAIT doesn't allow for mask
   // and poll to be re-read every spin iteration.
   kmp_uint32 spins;
-
   KMP_FSYNC_PREPARE(lck);
   KMP_INIT_YIELD(spins);
   while (polls[ticket & mask] < ticket) { // atomic load
-    // If we are oversubscribed,
-    // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
-    // CPU Pause is in the macros for yield.
-    //
-    KMP_YIELD(TCR_4(__kmp_nth) >
-              (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
-    KMP_YIELD_SPIN(spins);
-
+    KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
     // Re-read the mask and the poll pointer from the lock structure.
     //
     // Make certain that "mask" is read before "polls" !!!
@@ -2807,8 +2784,9 @@ static void __kmp_acquire_rtm_lock(kmp_q
     }
     if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
       // Wait until lock becomes free
-      while (!__kmp_is_unlocked_queuing_lock(lck))
-        __kmp_yield(TRUE);
+      while (!__kmp_is_unlocked_queuing_lock(lck)) {
+        KMP_YIELD(TRUE);
+      }
     } else if (!(status & _XABORT_RETRY))
       break;
   } while (retries--);

Modified: openmp/trunk/runtime/src/kmp_lock.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_lock.h?rev=355120&r1=355119&r2=355120&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_lock.h (original)
+++ openmp/trunk/runtime/src/kmp_lock.h Thu Feb 28 11:11:29 2019
@@ -652,21 +652,11 @@ extern int (*__kmp_acquire_user_lock_wit
       kmp_uint32 spins;                                                        \
       KMP_FSYNC_PREPARE(lck);                                                  \
       KMP_INIT_YIELD(spins);                                                   \
-      if (TCR_4(__kmp_nth) >                                                   \
-          (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {               \
-        KMP_YIELD(TRUE);                                                       \
-      } else {                                                                 \
-        KMP_YIELD_SPIN(spins);                                                 \
-      }                                                                        \
-      while (lck->tas.lk.poll != 0 || !__kmp_atomic_compare_store_acq(         \
-                                          &lck->tas.lk.poll, 0, gtid + 1)) {   \
-        if (TCR_4(__kmp_nth) >                                                 \
-            (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {             \
-          KMP_YIELD(TRUE);                                                     \
-        } else {                                                               \
-          KMP_YIELD_SPIN(spins);                                               \
-        }                                                                      \
-      }                                                                        \
+      do {                                                                     \
+        KMP_YIELD_OVERSUB_ELSE_SPIN(spins);                                    \
+      } while (                                                                \
+          lck->tas.lk.poll != 0 ||                                             \
+          !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1));    \
     }                                                                          \
     KMP_FSYNC_ACQUIRED(lck);                                                   \
   } else {                                                                     \
@@ -770,22 +760,11 @@ extern int (*__kmp_acquire_nested_user_l
         kmp_uint32 spins;                                                      \
         KMP_FSYNC_PREPARE(lck);                                                \
         KMP_INIT_YIELD(spins);                                                 \
-        if (TCR_4(__kmp_nth) >                                                 \
-            (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {             \
-          KMP_YIELD(TRUE);                                                     \
-        } else {                                                               \
-          KMP_YIELD_SPIN(spins);                                               \
-        }                                                                      \
-        while (                                                                \
+        do {                                                                   \
+          KMP_YIELD_OVERSUB_ELSE_SPIN(spins);                                  \
+        } while (                                                              \
             (lck->tas.lk.poll != 0) ||                                         \
-            !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \
-          if (TCR_4(__kmp_nth) >                                               \
-              (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {           \
-            KMP_YIELD(TRUE);                                                   \
-          } else {                                                             \
-            KMP_YIELD_SPIN(spins);                                             \
-          }                                                                    \
-        }                                                                      \
+            !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1));  \
       }                                                                        \
       lck->tas.lk.depth_locked = 1;                                            \
       *depth = KMP_LOCK_ACQUIRED_FIRST;                                        \

Modified: openmp/trunk/runtime/src/kmp_os.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_os.h?rev=355120&r1=355119&r2=355120&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_os.h (original)
+++ openmp/trunk/runtime/src/kmp_os.h Thu Feb 28 11:11:29 2019
@@ -304,7 +304,7 @@ extern "C" {
 
 #define KMP_CACHE_PREFETCH(ADDR) /* nothing */
 
-// Define attribute that indicates that the fall through from the previous 
+// Define attribute that indicates that the fall through from the previous
 // case label is intentional and should not be diagnosed by a compiler
 //   Code from libcxx/include/__config
 // Use a function like macro to imply that it must be followed by a semicolon
@@ -882,8 +882,8 @@ typedef void (*microtask_t)(int *gtid, i
 #define VOLATILE_CAST(x) (x)
 #endif
 
-#define KMP_WAIT_YIELD __kmp_wait_yield_4
-#define KMP_WAIT_YIELD_PTR __kmp_wait_yield_4_ptr
+#define KMP_WAIT __kmp_wait_4
+#define KMP_WAIT_PTR __kmp_wait_4_ptr
 #define KMP_EQ __kmp_eq_4
 #define KMP_NEQ __kmp_neq_4
 #define KMP_LT __kmp_lt_4

Modified: openmp/trunk/runtime/src/kmp_runtime.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_runtime.cpp?rev=355120&r1=355119&r2=355120&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_runtime.cpp (original)
+++ openmp/trunk/runtime/src/kmp_runtime.cpp Thu Feb 28 11:11:29 2019
@@ -327,7 +327,7 @@ void __kmp_infinite_loop(void) {
   static int done = FALSE;
 
   while (!done) {
-    KMP_YIELD(1);
+    KMP_YIELD(TRUE);
   }
 }
 
@@ -672,24 +672,6 @@ BOOL WINAPI DllMain(HINSTANCE hInstDLL,
 #endif /* KMP_OS_WINDOWS */
 #endif /* KMP_DYNAMIC_LIB */
 
-/* Change the library type to "status" and return the old type */
-/* called from within initialization routines where __kmp_initz_lock is held */
-int __kmp_change_library(int status) {
-  int old_status;
-
-  old_status = __kmp_yield_init &
-               1; // check whether KMP_LIBRARY=throughput (even init count)
-
-  if (status) {
-    __kmp_yield_init |= 1; // throughput => turnaround (odd init count)
-  } else {
-    __kmp_yield_init &= ~1; // turnaround => throughput (even init count)
-  }
-
-  return old_status; // return previous setting of whether
-  // KMP_LIBRARY=throughput
-}
-
 /* __kmp_parallel_deo -- Wait until it's our turn. */
 void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
   int gtid = *gtid_ref;
@@ -708,8 +690,8 @@ void __kmp_parallel_deo(int *gtid_ref, i
 #ifdef BUILD_PARALLEL_ORDERED
   if (!team->t.t_serialized) {
     KMP_MB();
-    KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid),
-                   KMP_EQ, NULL);
+    KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
+             NULL);
     KMP_MB();
   }
 #endif /* BUILD_PARALLEL_ORDERED */
@@ -7735,13 +7717,14 @@ void __kmp_aux_set_library(enum library_
   switch (__kmp_library) {
   case library_serial: {
     KMP_INFORM(LibraryIsSerial);
-    (void)__kmp_change_library(TRUE);
   } break;
   case library_turnaround:
-    (void)__kmp_change_library(TRUE);
+    if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
+      __kmp_use_yield = 2; // only yield when oversubscribed
     break;
   case library_throughput:
-    (void)__kmp_change_library(FALSE);
+    if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
+      __kmp_dflt_blocktime = 200;
     break;
   default:
     KMP_FATAL(UnknownLibraryType, arg);

Modified: openmp/trunk/runtime/src/kmp_settings.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_settings.cpp?rev=355120&r1=355119&r2=355120&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_settings.cpp (original)
+++ openmp/trunk/runtime/src/kmp_settings.cpp Thu Feb 28 11:11:29 2019
@@ -629,6 +629,19 @@ static void __kmp_stg_print_teams_thread
 } // __kmp_stg_print_teams_thread_limit
 
 // -----------------------------------------------------------------------------
+// KMP_USE_YIELD
+static void __kmp_stg_parse_use_yield(char const *name, char const *value,
+                                      void *data) {
+  __kmp_stg_parse_int(name, value, 0, 2, &__kmp_use_yield);
+  __kmp_use_yield_exp_set = 1;
+} // __kmp_stg_parse_use_yield
+
+static void __kmp_stg_print_use_yield(kmp_str_buf_t *buffer, char const *name,
+                                      void *data) {
+  __kmp_stg_print_int(buffer, name, __kmp_use_yield);
+} // __kmp_stg_print_use_yield
+
+// -----------------------------------------------------------------------------
 // KMP_BLOCKTIME
 
 static void __kmp_stg_parse_blocktime(char const *name, char const *value,
@@ -745,18 +758,24 @@ static void __kmp_stg_parse_wait_policy(
       __kmp_library = library_serial;
     } else if (__kmp_str_match("throughput", 2, value)) { /* TH */
       __kmp_library = library_throughput;
+      if (blocktime_str == NULL) {
+        // KMP_BLOCKTIME not specified, so set default to 0.
+        __kmp_dflt_blocktime = 0;
+      }
     } else if (__kmp_str_match("turnaround", 2, value)) { /* TU */
       __kmp_library = library_turnaround;
     } else if (__kmp_str_match("dedicated", 1, value)) { /* D */
       __kmp_library = library_turnaround;
     } else if (__kmp_str_match("multiuser", 1, value)) { /* M */
       __kmp_library = library_throughput;
+      if (blocktime_str == NULL) {
+        // KMP_BLOCKTIME not specified, so set default to 0.
+        __kmp_dflt_blocktime = 0;
+      }
     } else {
       KMP_WARNING(StgInvalidValue, name, value);
     }
   }
-  __kmp_aux_set_library(__kmp_library);
-
 } // __kmp_stg_parse_wait_policy
 
 static void __kmp_stg_print_wait_policy(kmp_str_buf_t *buffer, char const *name,
@@ -3944,79 +3963,9 @@ static void __kmp_stg_print_par_range_en
   }
 } // __kmp_stg_print_par_range_env
 
-// -----------------------------------------------------------------------------
-// KMP_YIELD_CYCLE, KMP_YIELD_ON, KMP_YIELD_OFF
-
-static void __kmp_stg_parse_yield_cycle(char const *name, char const *value,
-                                        void *data) {
-  int flag = __kmp_yield_cycle;
-  __kmp_stg_parse_bool(name, value, &flag);
-  __kmp_yield_cycle = flag;
-} // __kmp_stg_parse_yield_cycle
-
-static void __kmp_stg_print_yield_cycle(kmp_str_buf_t *buffer, char const *name,
-                                        void *data) {
-  __kmp_stg_print_bool(buffer, name, __kmp_yield_cycle);
-} // __kmp_stg_print_yield_cycle
-
-static void __kmp_stg_parse_yield_on(char const *name, char const *value,
-                                     void *data) {
-  __kmp_stg_parse_int(name, value, 2, INT_MAX, &__kmp_yield_on_count);
-} // __kmp_stg_parse_yield_on
-
-static void __kmp_stg_print_yield_on(kmp_str_buf_t *buffer, char const *name,
-                                     void *data) {
-  __kmp_stg_print_int(buffer, name, __kmp_yield_on_count);
-} // __kmp_stg_print_yield_on
-
-static void __kmp_stg_parse_yield_off(char const *name, char const *value,
-                                      void *data) {
-  __kmp_stg_parse_int(name, value, 2, INT_MAX, &__kmp_yield_off_count);
-} // __kmp_stg_parse_yield_off
-
-static void __kmp_stg_print_yield_off(kmp_str_buf_t *buffer, char const *name,
-                                      void *data) {
-  __kmp_stg_print_int(buffer, name, __kmp_yield_off_count);
-} // __kmp_stg_print_yield_off
-
 #endif
 
 // -----------------------------------------------------------------------------
-// KMP_INIT_WAIT, KMP_NEXT_WAIT
-
-static void __kmp_stg_parse_init_wait(char const *name, char const *value,
-                                      void *data) {
-  int wait;
-  KMP_ASSERT((__kmp_init_wait & 1) == 0);
-  wait = __kmp_init_wait / 2;
-  __kmp_stg_parse_int(name, value, KMP_MIN_INIT_WAIT, KMP_MAX_INIT_WAIT, &wait);
-  __kmp_init_wait = wait * 2;
-  KMP_ASSERT((__kmp_init_wait & 1) == 0);
-  __kmp_yield_init = __kmp_init_wait;
-} // __kmp_stg_parse_init_wait
-
-static void __kmp_stg_print_init_wait(kmp_str_buf_t *buffer, char const *name,
-                                      void *data) {
-  __kmp_stg_print_int(buffer, name, __kmp_init_wait);
-} // __kmp_stg_print_init_wait
-
-static void __kmp_stg_parse_next_wait(char const *name, char const *value,
-                                      void *data) {
-  int wait;
-  KMP_ASSERT((__kmp_next_wait & 1) == 0);
-  wait = __kmp_next_wait / 2;
-  __kmp_stg_parse_int(name, value, KMP_MIN_NEXT_WAIT, KMP_MAX_NEXT_WAIT, &wait);
-  __kmp_next_wait = wait * 2;
-  KMP_ASSERT((__kmp_next_wait & 1) == 0);
-  __kmp_yield_next = __kmp_next_wait;
-} // __kmp_stg_parse_next_wait
-
-static void __kmp_stg_print_next_wait(kmp_str_buf_t *buffer, char const *name,
-                                      void *data) {
-  __kmp_stg_print_int(buffer, name, __kmp_next_wait);
-} //__kmp_stg_print_next_wait
-
-// -----------------------------------------------------------------------------
 // KMP_GTID_MODE
 
 static void __kmp_stg_parse_gtid_mode(char const *name, char const *value,
@@ -4726,6 +4675,8 @@ static kmp_setting_t __kmp_stg_table[] =
     {"KMP_ALL_THREADS", __kmp_stg_parse_device_thread_limit, NULL, NULL, 0, 0},
     {"KMP_BLOCKTIME", __kmp_stg_parse_blocktime, __kmp_stg_print_blocktime,
      NULL, 0, 0},
+    {"KMP_USE_YIELD", __kmp_stg_parse_use_yield, __kmp_stg_print_use_yield,
+     NULL, 0, 0},
     {"KMP_DUPLICATE_LIB_OK", __kmp_stg_parse_duplicate_lib_ok,
      __kmp_stg_print_duplicate_lib_ok, NULL, 0, 0},
     {"KMP_LIBRARY", __kmp_stg_parse_wait_policy, __kmp_stg_print_wait_policy,
@@ -4830,12 +4781,6 @@ static kmp_setting_t __kmp_stg_table[] =
 
     {"KMP_PAR_RANGE", __kmp_stg_parse_par_range_env,
      __kmp_stg_print_par_range_env, NULL, 0, 0},
-    {"KMP_YIELD_CYCLE", __kmp_stg_parse_yield_cycle,
-     __kmp_stg_print_yield_cycle, NULL, 0, 0},
-    {"KMP_YIELD_ON", __kmp_stg_parse_yield_on, __kmp_stg_print_yield_on, NULL,
-     0, 0},
-    {"KMP_YIELD_OFF", __kmp_stg_parse_yield_off, __kmp_stg_print_yield_off,
-     NULL, 0, 0},
 #endif // KMP_DEBUG
 
     {"KMP_ALIGN_ALLOC", __kmp_stg_parse_align_alloc,
@@ -4927,10 +4872,6 @@ static kmp_setting_t __kmp_stg_table[] =
 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
     {"KMP_MALLOC_POOL_INCR", __kmp_stg_parse_malloc_pool_incr,
      __kmp_stg_print_malloc_pool_incr, NULL, 0, 0},
-    {"KMP_INIT_WAIT", __kmp_stg_parse_init_wait, __kmp_stg_print_init_wait,
-     NULL, 0, 0},
-    {"KMP_NEXT_WAIT", __kmp_stg_parse_next_wait, __kmp_stg_print_next_wait,
-     NULL, 0, 0},
     {"KMP_GTID_MODE", __kmp_stg_parse_gtid_mode, __kmp_stg_print_gtid_mode,
      NULL, 0, 0},
     {"OMP_DYNAMIC", __kmp_stg_parse_omp_dynamic, __kmp_stg_print_omp_dynamic,

Modified: openmp/trunk/runtime/src/kmp_tasking.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_tasking.cpp?rev=355120&r1=355119&r2=355120&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_tasking.cpp (original)
+++ openmp/trunk/runtime/src/kmp_tasking.cpp Thu Feb 28 11:11:29 2019
@@ -2705,8 +2705,7 @@ static inline int __kmp_execute_tasks_te
       if (thread->th.th_task_team == NULL) {
         break;
       }
-      // Yield before executing next task
-      KMP_YIELD(__kmp_library == library_throughput);
+      KMP_YIELD(__kmp_library == library_throughput); // Yield before next task
       // If execution of a stolen task results in more tasks being placed on our
       // run queue, reset use_own_tasks
       if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
@@ -3242,10 +3241,8 @@ void __kmp_wait_to_unref_task_teams(void
       break;
     }
 
-    // If we are oversubscribed, or have waited a bit (and library mode is
-    // throughput), yield. Pause is in the following code.
-    KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
-    KMP_YIELD_SPIN(spins); // Yields only if KMP_LIBRARY=throughput
+    // If oversubscribed or have waited a bit, yield.
+    KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
   }
 }
 
@@ -3410,7 +3407,7 @@ void __kmp_tasking_barrier(kmp_team_t *t
         __kmp_abort_thread();
       break;
     }
-    KMP_YIELD(TRUE); // GH: We always yield here
+    KMP_YIELD(TRUE);
   }
 #if USE_ITT_BUILD
   KMP_FSYNC_SPIN_ACQUIRED(RCAST(void *, spin));

Modified: openmp/trunk/runtime/src/kmp_taskq.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_taskq.cpp?rev=355120&r1=355119&r2=355120&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_taskq.cpp (original)
+++ openmp/trunk/runtime/src/kmp_taskq.cpp Thu Feb 28 11:11:29 2019
@@ -51,7 +51,7 @@ static void __kmp_taskq_eo(int *gtid_ref
 
     taskq = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue;
 
-    KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL);
+    KMP_WAIT(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL);
     KMP_MB();
   }
 }
@@ -95,7 +95,7 @@ static void __kmp_taskq_check_ordered(km
   taskq = thunk->th.th_shareds->sv_queue;
 
   if (taskq->tq_tasknum_serving <= my_token) {
-    KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL);
+    KMP_WAIT(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL);
     KMP_MB();
     taskq->tq_tasknum_serving = my_token + 1;
     KMP_MB();
@@ -1056,8 +1056,7 @@ static void __kmp_remove_queue_from_tree
     while (queue->tq_ref_count > 1) {
       __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
 
-      KMP_WAIT_YIELD((volatile kmp_uint32 *)&queue->tq_ref_count, 1, KMP_LE,
-                     NULL);
+      KMP_WAIT((volatile kmp_uint32 *)&queue->tq_ref_count, 1, KMP_LE, NULL);
 
       __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
       // Make sure data structures are in consistent state before querying them
@@ -1538,8 +1537,6 @@ void __kmpc_end_taskq(ident_t *loc, kmp_
   in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
 
   if (in_parallel) {
-    kmp_uint32 spins;
-
     /* this is just a safeguard to release the waiting threads if */
     /* the outermost taskq never queues a task                    */
 
@@ -1556,12 +1553,10 @@ void __kmpc_end_taskq(ident_t *loc, kmp_
 
     do {
       /* wait until something is available to dequeue */
-      KMP_INIT_YIELD(spins);
-
       while ((queue->tq_nfull == 0) && (queue->tq_taskq_slot == NULL) &&
              (!__kmp_taskq_has_any_children(queue)) &&
              (!(queue->tq_flags & TQF_ALL_TASKS_QUEUED))) {
-        KMP_YIELD_WHEN(TRUE, spins);
+        KMP_CPU_PAUSE();
       }
 
       /* check to see if we can execute tasks in the queue */
@@ -1628,7 +1623,6 @@ void __kmpc_end_taskq(ident_t *loc, kmp_
 
       /* WAIT until all tasks are finished and no child queues exist before
        * proceeding */
-      KMP_INIT_YIELD(spins);
 
       while (!__kmp_taskq_tasks_finished(queue) ||
              __kmp_taskq_has_any_children(queue)) {
@@ -1643,7 +1637,8 @@ void __kmpc_end_taskq(ident_t *loc, kmp_
                                         in_parallel);
         }
 
-        KMP_YIELD_WHEN(thunk == NULL, spins);
+        if (thunk == NULL)
+          KMP_CPU_PAUSE();
 
         __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue);
       }
@@ -1669,8 +1664,6 @@ void __kmpc_end_taskq(ident_t *loc, kmp_
 
     // Outermost Queue: steal work from descendants until all tasks are finished
 
-    KMP_INIT_YIELD(spins);
-
     while (!__kmp_taskq_tasks_finished(queue)) {
       thunk = __kmp_find_task_in_descendant_queue(global_tid, queue);
 
@@ -1683,7 +1676,8 @@ void __kmpc_end_taskq(ident_t *loc, kmp_
         __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
       }
 
-      KMP_YIELD_WHEN(thunk == NULL, spins);
+      if (thunk == NULL)
+        KMP_CPU_PAUSE();
     }
 
     /* Need this barrier to prevent destruction of queue before threads have all

Modified: openmp/trunk/runtime/src/kmp_wait_release.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_wait_release.h?rev=355120&r1=355119&r2=355120&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_wait_release.h (original)
+++ openmp/trunk/runtime/src/kmp_wait_release.h Thu Feb 28 11:11:29 2019
@@ -150,8 +150,8 @@ static void __ompt_implicit_task_end(kmp
 }
 #endif
 
-/* Spin wait loop that first does pause, then yield, then sleep. A thread that
-   calls __kmp_wait_*  must make certain that another thread calls __kmp_release
+/* Spin wait loop that first does pause/yield, then sleep. A thread that calls
+   __kmp_wait_*  must make certain that another thread calls __kmp_release
    to wake it back up to prevent deadlocks!
 
    NOTE: We may not belong to a team at this point.  */
@@ -270,8 +270,7 @@ final_spin=FALSE)
   }
 #endif
 
-  // Setup for waiting
-  KMP_INIT_YIELD(spins);
+  KMP_INIT_YIELD(spins); // Setup for waiting
 
   if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME
 #if OMP_50_ENABLED
@@ -368,14 +367,8 @@ final_spin=FALSE)
 
     // If we are oversubscribed, or have waited a bit (and
     // KMP_LIBRARY=throughput), then yield
-    // TODO: Should it be number of cores instead of thread contexts? Like:
-    // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores);
-    // Need performance improvement data to make the change...
-    if (oversubscribed) {
-      KMP_YIELD(1);
-    } else {
-      KMP_YIELD_SPIN(spins);
-    }
+    KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
+
     // Check if this thread was transferred from a team
     // to the thread pool (or vice-versa) while spinning.
     in_pool = !!TCR_4(this_thr->th.th_in_pool);

Modified: openmp/trunk/runtime/src/z_Linux_util.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/z_Linux_util.cpp?rev=355120&r1=355119&r2=355120&view=diff
==============================================================================
--- openmp/trunk/runtime/src/z_Linux_util.cpp (original)
+++ openmp/trunk/runtime/src/z_Linux_util.cpp Thu Feb 28 11:11:29 2019
@@ -437,7 +437,7 @@ void __kmp_terminate_thread(int gtid) {
                 __kmp_msg_null);
   }
 #endif
-  __kmp_yield(TRUE);
+  KMP_YIELD(TRUE);
 } //
 
 /* Set thread stack info according to values returned by pthread_getattr_np().
@@ -580,8 +580,6 @@ static void *__kmp_launch_monitor(void *
   sigset_t new_set;
 #endif /* KMP_BLOCK_SIGNALS */
   struct timespec interval;
-  int yield_count;
-  int yield_cycles = 0;
 
   KMP_MB(); /* Flush all pending memory write invalidates.  */
 
@@ -665,13 +663,6 @@ static void *__kmp_launch_monitor(void *
 
   KA_TRACE(10, ("__kmp_launch_monitor: #2 monitor\n"));
 
-  if (__kmp_yield_cycle) {
-    __kmp_yielding_on = 0; /* Start out with yielding shut off */
-    yield_count = __kmp_yield_off_count;
-  } else {
-    __kmp_yielding_on = 1; /* Yielding is on permanently */
-  }
-
   while (!TCR_4(__kmp_global.g.g_done)) {
     struct timespec now;
     struct timeval tval;
@@ -707,22 +698,6 @@ static void *__kmp_launch_monitor(void *
     status = pthread_mutex_unlock(&__kmp_wait_mx.m_mutex);
     KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
 
-    if (__kmp_yield_cycle) {
-      yield_cycles++;
-      if ((yield_cycles % yield_count) == 0) {
-        if (__kmp_yielding_on) {
-          __kmp_yielding_on = 0; /* Turn it off now */
-          yield_count = __kmp_yield_off_count;
-        } else {
-          __kmp_yielding_on = 1; /* Turn it on now */
-          yield_count = __kmp_yield_on_count;
-        }
-        yield_cycles = 0;
-      }
-    } else {
-      __kmp_yielding_on = 1;
-    }
-
     TCW_4(__kmp_global.g.g_time.dt.t_value,
           TCR_4(__kmp_global.g.g_time.dt.t_value) + 1);
 
@@ -1011,8 +986,8 @@ retry:
   // Wait for the monitor thread is really started and set its *priority*.
   KMP_DEBUG_ASSERT(sizeof(kmp_uint32) ==
                    sizeof(__kmp_global.g.g_time.dt.t_value));
-  __kmp_wait_yield_4((kmp_uint32 volatile *)&__kmp_global.g.g_time.dt.t_value,
-                     -1, &__kmp_neq_4, NULL);
+  __kmp_wait_4((kmp_uint32 volatile *)&__kmp_global.g.g_time.dt.t_value, -1,
+               &__kmp_neq_4, NULL);
 #endif // KMP_REAL_TIME_FIX
 
 #ifdef KMP_THREAD_ATTR
@@ -1688,18 +1663,7 @@ void __kmp_resume_monitor() {
 }
 #endif // KMP_USE_MONITOR
 
-void __kmp_yield(int cond) {
-  if (!cond)
-    return;
-#if KMP_USE_MONITOR
-  if (!__kmp_yielding_on)
-    return;
-#else
-  if (__kmp_yield_cycle && !KMP_YIELD_NOW())
-    return;
-#endif
-  sched_yield();
-}
+void __kmp_yield() { sched_yield(); }
 
 void __kmp_gtid_set_specific(int gtid) {
   if (__kmp_init_gtid) {

Modified: openmp/trunk/runtime/src/z_Windows_NT_util.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/z_Windows_NT_util.cpp?rev=355120&r1=355119&r2=355120&view=diff
==============================================================================
--- openmp/trunk/runtime/src/z_Windows_NT_util.cpp (original)
+++ openmp/trunk/runtime/src/z_Windows_NT_util.cpp Thu Feb 28 11:11:29 2019
@@ -483,10 +483,7 @@ void __kmp_resume_oncore(int target_gtid
   __kmp_resume_template(target_gtid, flag);
 }
 
-void __kmp_yield(int cond) {
-  if (cond)
-    Sleep(0);
-}
+void __kmp_yield() { Sleep(0); }
 
 void __kmp_gtid_set_specific(int gtid) {
   if (__kmp_init_gtid) {
@@ -1245,8 +1242,8 @@ static void __kmp_reap_common(kmp_info_t
      Right solution seems to be waiting for *either* thread termination *or*
      ds_alive resetting. */
   {
-    // TODO: This code is very similar to KMP_WAIT_YIELD. Need to generalize
-    // KMP_WAIT_YIELD to cover this usage also.
+    // TODO: This code is very similar to KMP_WAIT. Need to generalize
+    // KMP_WAIT to cover this usage also.
     void *obj = NULL;
     kmp_uint32 spins;
 #if USE_ITT_BUILD
@@ -1258,8 +1255,7 @@ static void __kmp_reap_common(kmp_info_t
       KMP_FSYNC_SPIN_PREPARE(obj);
 #endif /* USE_ITT_BUILD */
       __kmp_is_thread_alive(th, &exit_val);
-      KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
-      KMP_YIELD_SPIN(spins);
+      KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
     } while (exit_val == STILL_ACTIVE && TCR_4(th->th.th_info.ds.ds_alive));
 #if USE_ITT_BUILD
     if (exit_val == STILL_ACTIVE) {




More information about the Openmp-commits mailing list