[Openmp-commits] [openmp] 4eb90e8 - Revert "[OpenMP] Add Two-level Distributed Barrier"
Johannes Doerfert via Openmp-commits
openmp-commits at lists.llvm.org
Tue Jun 29 07:39:11 PDT 2021
Author: Johannes Doerfert
Date: 2021-06-29T09:38:27-05:00
New Revision: 4eb90e893f82314def571f7129dfd88bd098208b
URL: https://github.com/llvm/llvm-project/commit/4eb90e893f82314def571f7129dfd88bd098208b
DIFF: https://github.com/llvm/llvm-project/commit/4eb90e893f82314def571f7129dfd88bd098208b.diff
LOG: Revert "[OpenMP] Add Two-level Distributed Barrier"
This reverts commit 25073a4ecfc9b2e3cb76776185e63bfdb094cd98.
This breaks non-x86 OpenMP builds for a while now. Until a solution is
ready to be upstreamed we revert the feature and unblock those builds.
See:
https://reviews.llvm.org/rG25073a4ecfc9b2e3cb76776185e63bfdb094cd98#1005821
and
https://reviews.llvm.org/rG25073a4ecfc9b2e3cb76776185e63bfdb094cd98#1005821
The currently proposed fix (D104788) seems not to be ready yet:
https://reviews.llvm.org/D104788#2841928
Added:
Modified:
openmp/runtime/src/i18n/en_US.txt
openmp/runtime/src/kmp.h
openmp/runtime/src/kmp_atomic.cpp
openmp/runtime/src/kmp_barrier.cpp
openmp/runtime/src/kmp_global.cpp
openmp/runtime/src/kmp_os.h
openmp/runtime/src/kmp_runtime.cpp
openmp/runtime/src/kmp_settings.cpp
openmp/runtime/src/kmp_stats.h
openmp/runtime/src/kmp_str.cpp
openmp/runtime/src/kmp_str.h
openmp/runtime/src/kmp_tasking.cpp
openmp/runtime/src/kmp_wait_release.cpp
openmp/runtime/src/kmp_wait_release.h
openmp/runtime/src/z_Linux_util.cpp
openmp/runtime/src/z_Windows_NT_util.cpp
openmp/runtime/test/barrier/omp_barrier.c
Removed:
openmp/runtime/src/kmp_barrier.h
################################################################################
diff --git a/openmp/runtime/src/i18n/en_US.txt b/openmp/runtime/src/i18n/en_US.txt
index 435579fc7e7a..0b5436fd5801 100644
--- a/openmp/runtime/src/i18n/en_US.txt
+++ b/openmp/runtime/src/i18n/en_US.txt
@@ -269,7 +269,6 @@ Using_int_Value "%1$s value \"%2$d\" will be used."
Using_uint_Value "%1$s value \"%2$u\" will be used."
Using_uint64_Value "%1$s value \"%2$s\" will be used."
Using_str_Value "%1$s value \"%2$s\" will be used."
-BarrierPatternOverride "Mixing other barrier patterns with dist is prohibited. Using dist for all barrier patterns."
MaxValueUsing "%1$s maximum value \"%2$d\" will be used."
MinValueUsing "%1$s minimum value \"%2$d\" will be used."
MemoryAllocFailed "Memory allocation failed."
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 835d4ad55980..05264f4433d3 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -115,7 +115,6 @@ typedef unsigned int kmp_hwloc_depth_t;
#include "kmp_debug.h"
#include "kmp_lock.h"
#include "kmp_version.h"
-#include "kmp_barrier.h"
#if USE_DEBUGGER
#include "kmp_debugger.h"
#endif
@@ -264,7 +263,6 @@ typedef union kmp_root kmp_root_p;
template <bool C = false, bool S = true> class kmp_flag_32;
template <bool C = false, bool S = true> class kmp_flag_64;
-template <bool C = false, bool S = true> class kmp_atomic_flag_64;
class kmp_flag_oncore;
#ifdef __cplusplus
@@ -1881,15 +1879,6 @@ typedef struct kmp_disp {
0 // Thread th_reap_state: not safe to reap (tasking)
#define KMP_SAFE_TO_REAP 1 // Thread th_reap_state: safe to reap (not tasking)
-// The flag_type describes the storage used for the flag.
-enum flag_type {
- flag32, /**< atomic 32 bit flags */
- flag64, /**< 64 bit flags */
- atomic_flag64, /**< atomic 64 bit flags */
- flag_oncore, /**< special 64-bit flag for on-core barrier (hierarchical) */
- flag_unset
-};
-
enum barrier_type {
bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction
barriers if enabled) */
@@ -1913,7 +1902,6 @@ typedef enum kmp_bar_pat { /* Barrier communication patterns */
bp_hyper_bar = 2, /* Hypercube-embedded tree with min
branching factor 2^n */
bp_hierarchical_bar = 3, /* Machine hierarchy tree */
- bp_dist_bar = 4, /* Distributed barrier */
bp_last_bar /* Placeholder to mark the end */
} kmp_bar_pat_e;
@@ -2638,7 +2626,6 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
/* while awaiting queuing lock acquire */
volatile void *th_sleep_loc; // this points at a kmp_flag<T>
- flag_type th_sleep_loc_type; // enum type of flag stored in th_sleep_loc
ident_t *th_ident;
unsigned th_x; // Random number generator data
@@ -2659,9 +2646,6 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
written by the worker thread) */
kmp_uint8 th_active_in_pool; // included in count of #active threads in pool
int th_active; // ! sleeping; 32 bits for TCR/TCW
- std::atomic<kmp_uint32> th_used_in_team; // Flag indicating use in team
- // 0 = not used in team; 1 = used in team;
- // 2 = transitioning to not used in team; 3 = transitioning to used in team
struct cons_header *th_cons; // used for consistency check
#if KMP_USE_HIER_SCHED
// used for hierarchical scheduling
@@ -2841,7 +2825,6 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team {
#if USE_ITT_BUILD
void *t_stack_id; // team specific stack stitching id (for ittnotify)
#endif /* USE_ITT_BUILD */
- distributedBarrier *b; // Distributed barrier data associated with team
} kmp_base_team_t;
union KMP_ALIGN_CACHE kmp_team {
@@ -4143,26 +4126,18 @@ template <bool C, bool S>
extern void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag);
template <bool C, bool S>
extern void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag);
-template <bool C, bool S>
-extern void __kmp_atomic_suspend_64(int th_gtid,
- kmp_atomic_flag_64<C, S> *flag);
extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag);
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
template <bool C, bool S>
extern void __kmp_mwait_32(int th_gtid, kmp_flag_32<C, S> *flag);
template <bool C, bool S>
extern void __kmp_mwait_64(int th_gtid, kmp_flag_64<C, S> *flag);
-template <bool C, bool S>
-extern void __kmp_atomic_mwait_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag);
extern void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag);
#endif
template <bool C, bool S>
extern void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag);
template <bool C, bool S>
extern void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag);
-template <bool C, bool S>
-extern void __kmp_atomic_resume_64(int target_gtid,
- kmp_atomic_flag_64<C, S> *flag);
extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag);
template <bool C, bool S>
@@ -4181,14 +4156,6 @@ int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
void *itt_sync_obj,
#endif /* USE_ITT_BUILD */
kmp_int32 is_constrained);
-template <bool C, bool S>
-int __kmp_atomic_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
- kmp_atomic_flag_64<C, S> *flag,
- int final_spin, int *thread_finished,
-#if USE_ITT_BUILD
- void *itt_sync_obj,
-#endif /* USE_ITT_BUILD */
- kmp_int32 is_constrained);
int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid,
kmp_flag_oncore *flag, int final_spin,
int *thread_finished,
diff --git a/openmp/runtime/src/kmp_atomic.cpp b/openmp/runtime/src/kmp_atomic.cpp
index fdcfc6ef540f..fcc06216a4fa 100644
--- a/openmp/runtime/src/kmp_atomic.cpp
+++ b/openmp/runtime/src/kmp_atomic.cpp
@@ -732,7 +732,7 @@ static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
#define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
__kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- (*lhs) = (TYPE)((*lhs)OP rhs); \
+ (*lhs) = (TYPE)((*lhs)OP((TYPE)rhs)); \
__kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
// ------------------------------------------------------------------------
@@ -791,14 +791,14 @@ static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
{ \
TYPE old_value, new_value; \
old_value = *(TYPE volatile *)lhs; \
- new_value = (TYPE)(old_value OP rhs); \
+ new_value = (TYPE)(old_value OP((TYPE)rhs)); \
while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
(kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
*VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
KMP_DO_PAUSE; \
\
old_value = *(TYPE volatile *)lhs; \
- new_value = (TYPE)(old_value OP rhs); \
+ new_value = (TYPE)(old_value OP((TYPE)rhs)); \
} \
}
diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp
index 134163e23fa7..237d18a73dcd 100644
--- a/openmp/runtime/src/kmp_barrier.cpp
+++ b/openmp/runtime/src/kmp_barrier.cpp
@@ -10,14 +10,12 @@
//
//===----------------------------------------------------------------------===//
+#include "kmp.h"
#include "kmp_wait_release.h"
-#include "kmp_barrier.h"
#include "kmp_itt.h"
#include "kmp_os.h"
#include "kmp_stats.h"
#include "ompt-specific.h"
-// for distributed barrier
-#include "kmp_affinity.h"
#if KMP_MIC
#include <immintrin.h>
@@ -42,517 +40,6 @@
void __kmp_print_structure(void); // Forward declaration
// ---------------------------- Barrier Algorithms ----------------------------
-// Distributed barrier
-
-// Compute how many threads to have polling each cache-line.
-// We want to limit the number of writes to IDEAL_GO_RESOLUTION.
-void distributedBarrier::computeVarsForN(size_t n) {
- int nsockets = 1;
- if (__kmp_topology) {
- int socket_level = __kmp_topology->get_level(KMP_HW_SOCKET);
- int core_level = __kmp_topology->get_level(KMP_HW_CORE);
- int ncores_per_socket =
- __kmp_topology->calculate_ratio(core_level, socket_level);
- nsockets = __kmp_topology->get_count(socket_level);
-
- if (nsockets <= 0)
- nsockets = 1;
- if (ncores_per_socket <= 0)
- ncores_per_socket = 1;
-
- threads_per_go = ncores_per_socket >> 1;
- if (!fix_threads_per_go) {
- // Minimize num_gos
- if (threads_per_go > 4) {
- if (KMP_OPTIMIZE_FOR_REDUCTIONS) {
- threads_per_go = threads_per_go >> 1;
- }
- if (threads_per_go > 4 && nsockets == 1)
- threads_per_go = threads_per_go >> 1;
- }
- }
- if (threads_per_go == 0)
- threads_per_go = 1;
- fix_threads_per_go = true;
- num_gos = n / threads_per_go;
- if (n % threads_per_go)
- num_gos++;
- if (nsockets == 1 || num_gos == 1)
- num_groups = 1;
- else {
- num_groups = num_gos / nsockets;
- if (num_gos % nsockets)
- num_groups++;
- }
- if (num_groups <= 0)
- num_groups = 1;
- gos_per_group = num_gos / num_groups;
- if (num_gos % num_groups)
- gos_per_group++;
- threads_per_group = threads_per_go * gos_per_group;
- } else {
- num_gos = n / threads_per_go;
- if (n % threads_per_go)
- num_gos++;
- if (num_gos == 1)
- num_groups = 1;
- else {
- num_groups = num_gos / 2;
- if (num_gos % 2)
- num_groups++;
- }
- gos_per_group = num_gos / num_groups;
- if (num_gos % num_groups)
- gos_per_group++;
- threads_per_group = threads_per_go * gos_per_group;
- }
-}
-
-void distributedBarrier::computeGo(size_t n) {
- // Minimize num_gos
- for (num_gos = 1;; num_gos++)
- if (IDEAL_CONTENTION * num_gos >= n)
- break;
- threads_per_go = n / num_gos;
- if (n % num_gos)
- threads_per_go++;
- while (num_gos > MAX_GOS) {
- threads_per_go++;
- num_gos = n / threads_per_go;
- if (n % threads_per_go)
- num_gos++;
- }
- computeVarsForN(n);
-}
-
-// This function is to resize the barrier arrays when the new number of threads
-// exceeds max_threads, which is the current size of all the arrays
-void distributedBarrier::resize(size_t nthr) {
- KMP_DEBUG_ASSERT(nthr > max_threads);
-
- // expand to requested size * 2
- max_threads = nthr * 2;
-
- // allocate arrays to new max threads
- for (int i = 0; i < MAX_ITERS; ++i) {
- if (flags[i])
- flags[i] = (flags_s *)KMP_INTERNAL_REALLOC(flags[i],
- max_threads * sizeof(flags_s));
- else
- flags[i] = (flags_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(flags_s));
- }
-
- if (go)
- go = (go_s *)KMP_INTERNAL_REALLOC(go, max_threads * sizeof(go_s));
- else
- go = (go_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(go_s));
-
- if (iter)
- iter = (iter_s *)KMP_INTERNAL_REALLOC(iter, max_threads * sizeof(iter_s));
- else
- iter = (iter_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(iter_s));
-
- if (sleep)
- sleep =
- (sleep_s *)KMP_INTERNAL_REALLOC(sleep, max_threads * sizeof(sleep_s));
- else
- sleep = (sleep_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(sleep_s));
-}
-
-// This function is to set all the go flags that threads might be waiting
-// on, and when blocktime is not infinite, it should be followed by a wake-up
-// call to each thread
-kmp_uint64 distributedBarrier::go_release() {
- kmp_uint64 next_go = iter[0].iter + distributedBarrier::MAX_ITERS;
- for (size_t j = 0; j < num_gos; j++) {
- go[j].go.store(next_go);
- }
- return next_go;
-}
-
-void distributedBarrier::go_reset() {
- for (size_t j = 0; j < max_threads; ++j) {
- for (size_t i = 0; i < distributedBarrier::MAX_ITERS; ++i) {
- flags[i][j].stillNeed = 1;
- }
- go[j].go.store(0);
- iter[j].iter = 0;
- }
-}
-
-// This function inits/re-inits the distributed barrier for a particular number
-// of threads. If a resize of arrays is needed, it calls the resize function.
-void distributedBarrier::init(size_t nthr) {
- size_t old_max = max_threads;
- if (nthr > max_threads) { // need more space in arrays
- resize(nthr);
- }
-
- for (size_t i = 0; i < max_threads; i++) {
- for (size_t j = 0; j < distributedBarrier::MAX_ITERS; j++) {
- flags[j][i].stillNeed = 1;
- }
- go[i].go.store(0);
- iter[i].iter = 0;
- if (i >= old_max)
- sleep[i].sleep = false;
- }
-
- // Recalculate num_gos, etc. based on new nthr
- computeVarsForN(nthr);
-
- num_threads = nthr;
-
- if (team_icvs == NULL)
- team_icvs = __kmp_allocate(sizeof(kmp_internal_control_t));
-}
-
-// This function is used only when KMP_BLOCKTIME is not infinite.
-// static
-void __kmp_dist_barrier_wakeup(enum barrier_type bt, kmp_team_t *team,
- size_t start, size_t stop, size_t inc,
- size_t tid) {
- KMP_DEBUG_ASSERT(__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME);
- if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
- return;
-
- kmp_info_t **other_threads = team->t.t_threads;
- for (size_t thr = start; thr < stop; thr += inc) {
- KMP_DEBUG_ASSERT(other_threads[thr]);
- int gtid = other_threads[thr]->th.th_info.ds.ds_gtid;
- // Wake up worker regardless of if it appears to be sleeping or not
- __kmp_atomic_resume_64(gtid, (kmp_atomic_flag_64<> *)NULL);
- }
-}
-
-static void
-__kmp_dist_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
- int tid, void (*reduce)(void *, void *)
- USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
- KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_gather);
- kmp_team_t *team;
- distributedBarrier *b;
- kmp_info_t **other_threads;
- kmp_uint64 my_current_iter, my_next_iter;
- kmp_uint32 nproc;
- bool group_leader;
-
- team = this_thr->th.th_team;
- nproc = this_thr->th.th_team_nproc;
- other_threads = team->t.t_threads;
- b = team->t.b;
- my_current_iter = b->iter[tid].iter;
- my_next_iter = (my_current_iter + 1) % distributedBarrier::MAX_ITERS;
- group_leader = ((tid % b->threads_per_group) == 0);
-
- KA_TRACE(20,
- ("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n",
- gtid, team->t.t_id, tid, bt));
-
-#if USE_ITT_BUILD && USE_ITT_NOTIFY
- // Barrier imbalance - save arrive time to the thread
- if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
- this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
- __itt_get_timestamp();
- }
-#endif
-
- if (group_leader) {
- // Start from the thread after the group leader
- size_t group_start = tid + 1;
- size_t group_end = tid + b->threads_per_group;
- size_t threads_pending = 0;
-
- if (group_end > nproc)
- group_end = nproc;
- do { // wait for threads in my group
- threads_pending = 0;
- // Check all the flags every time to avoid branch misspredict
- for (size_t thr = group_start; thr < group_end; thr++) {
- // Each thread uses a
diff erent cache line
- threads_pending += b->flags[my_current_iter][thr].stillNeed;
- }
- // Execute tasks here
- if (__kmp_tasking_mode != tskm_immediate_exec) {
- kmp_task_team_t *task_team = this_thr->th.th_task_team;
- if (task_team != NULL) {
- if (TCR_SYNC_4(task_team->tt.tt_active)) {
- if (KMP_TASKING_ENABLED(task_team)) {
- int tasks_completed = FALSE;
- __kmp_atomic_execute_tasks_64(
- this_thr, gtid, (kmp_atomic_flag_64<> *)NULL, FALSE,
- &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
- } else
- this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
- }
- } else {
- this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
- } // if
- }
- if (TCR_4(__kmp_global.g.g_done)) {
- if (__kmp_global.g.g_abort)
- __kmp_abort_thread();
- break;
- } else if (__kmp_tasking_mode != tskm_immediate_exec &&
- this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
- this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
- }
- } while (threads_pending > 0);
-
- if (reduce) { // Perform reduction if needed
- OMPT_REDUCTION_DECL(this_thr, gtid);
- OMPT_REDUCTION_BEGIN;
- // Group leader reduces all threads in group
- for (size_t thr = group_start; thr < group_end; thr++) {
- (*reduce)(this_thr->th.th_local.reduce_data,
- other_threads[thr]->th.th_local.reduce_data);
- }
- OMPT_REDUCTION_END;
- }
-
- // Set flag for next iteration
- b->flags[my_next_iter][tid].stillNeed = 1;
- // Each thread uses a
diff erent cache line; resets stillNeed to 0 to
- // indicate it has reached the barrier
- b->flags[my_current_iter][tid].stillNeed = 0;
-
- do { // wait for all group leaders
- threads_pending = 0;
- for (size_t thr = 0; thr < nproc; thr += b->threads_per_group) {
- threads_pending += b->flags[my_current_iter][thr].stillNeed;
- }
- // Execute tasks here
- if (__kmp_tasking_mode != tskm_immediate_exec) {
- kmp_task_team_t *task_team = this_thr->th.th_task_team;
- if (task_team != NULL) {
- if (TCR_SYNC_4(task_team->tt.tt_active)) {
- if (KMP_TASKING_ENABLED(task_team)) {
- int tasks_completed = FALSE;
- __kmp_atomic_execute_tasks_64(
- this_thr, gtid, (kmp_atomic_flag_64<> *)NULL, FALSE,
- &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
- } else
- this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
- }
- } else {
- this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
- } // if
- }
- if (TCR_4(__kmp_global.g.g_done)) {
- if (__kmp_global.g.g_abort)
- __kmp_abort_thread();
- break;
- } else if (__kmp_tasking_mode != tskm_immediate_exec &&
- this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
- this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
- }
- } while (threads_pending > 0);
-
- if (reduce) { // Perform reduction if needed
- if (KMP_MASTER_TID(tid)) { // Master reduces over group leaders
- OMPT_REDUCTION_DECL(this_thr, gtid);
- OMPT_REDUCTION_BEGIN;
- for (size_t thr = b->threads_per_group; thr < nproc;
- thr += b->threads_per_group) {
- (*reduce)(this_thr->th.th_local.reduce_data,
- other_threads[thr]->th.th_local.reduce_data);
- }
- OMPT_REDUCTION_END;
- }
- }
- } else {
- // Set flag for next iteration
- b->flags[my_next_iter][tid].stillNeed = 1;
- // Each thread uses a
diff erent cache line; resets stillNeed to 0 to
- // indicate it has reached the barrier
- b->flags[my_current_iter][tid].stillNeed = 0;
- }
-
- KMP_MFENCE();
-
- KA_TRACE(20,
- ("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
- gtid, team->t.t_id, tid, bt));
-}
-
-static void __kmp_dist_barrier_release(
- enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
- int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
- KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_release);
- kmp_team_t *team;
- distributedBarrier *b;
- kmp_bstate_t *thr_bar;
- kmp_uint64 my_current_iter, next_go;
- size_t my_go_index;
- bool group_leader;
-
- KA_TRACE(20, ("__kmp_dist_barrier_release: T#%d(%d) enter; barrier type %d\n",
- gtid, tid, bt));
-
- thr_bar = &this_thr->th.th_bar[bt].bb;
-
- if (!KMP_MASTER_TID(tid)) {
- // workers and non-master group leaders need to check their presence in team
- do {
- if (this_thr->th.th_used_in_team.load() != 1 &&
- this_thr->th.th_used_in_team.load() != 3) {
- // Thread is not in use in a team. Wait on location in tid's thread
- // struct. The 0 value tells anyone looking that this thread is spinning
- // or sleeping until this location becomes 3 again; 3 is the transition
- // state to get to 1 which is waiting on go and being in the team
- kmp_flag_32<false, false> my_flag(&(this_thr->th.th_used_in_team), 3);
- if (KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 2,
- 0) ||
- this_thr->th.th_used_in_team.load() == 0) {
- my_flag.wait(this_thr, true, itt_sync_obj);
- }
-#if USE_ITT_BUILD && USE_ITT_NOTIFY
- if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
- // In fork barrier where we could not get the object reliably
- itt_sync_obj =
- __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
- // Cancel wait on previous parallel region...
- __kmp_itt_task_starting(itt_sync_obj);
-
- if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
- return;
-
- itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
- if (itt_sync_obj != NULL)
- // Call prepare as early as possible for "new" barrier
- __kmp_itt_task_finished(itt_sync_obj);
- } else
-#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
- if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
- return;
- }
- if (this_thr->th.th_used_in_team.load() != 1 &&
- this_thr->th.th_used_in_team.load() != 3) // spurious wake-up?
- continue;
- if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
- return;
-
- // At this point, the thread thinks it is in use in a team, or in
- // transition to be used in a team, but it might have reached this barrier
- // before it was marked unused by the team. Unused threads are awoken and
- // shifted to wait on local thread struct elsewhere. It also might reach
- // this point by being picked up for use by a
diff erent team. Either way,
- // we need to update the tid.
- tid = __kmp_tid_from_gtid(gtid);
- team = this_thr->th.th_team;
- KMP_DEBUG_ASSERT(tid >= 0);
- KMP_DEBUG_ASSERT(team);
- b = team->t.b;
- my_current_iter = b->iter[tid].iter;
- next_go = my_current_iter + distributedBarrier::MAX_ITERS;
- my_go_index = tid / b->threads_per_go;
- if (this_thr->th.th_used_in_team.load() == 3) {
- KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 3, 1);
- }
- // Check if go flag is set
- if (b->go[my_go_index].go.load() != next_go) {
- // Wait on go flag on team
- kmp_atomic_flag_64<false, true> my_flag(
- &(b->go[my_go_index].go), next_go, &(b->sleep[tid].sleep));
- my_flag.wait(this_thr, true, itt_sync_obj);
- KMP_DEBUG_ASSERT(my_current_iter == b->iter[tid].iter ||
- b->iter[tid].iter == 0);
- KMP_DEBUG_ASSERT(b->sleep[tid].sleep == false);
- }
-
- if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
- return;
- // At this point, the thread's go location was set. This means the primary
- // thread is safely in the barrier, and so this thread's data is
- // up-to-date, but we should check again that this thread is really in
- // use in the team, as it could have been woken up for the purpose of
- // changing team size, or reaping threads at shutdown.
- if (this_thr->th.th_used_in_team.load() == 1)
- break;
- } while (1);
-
- if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
- return;
-
- group_leader = ((tid % b->threads_per_group) == 0);
- if (group_leader) {
- // Tell all the threads in my group they can go!
- for (size_t go_idx = my_go_index + 1;
- go_idx < my_go_index + b->gos_per_group; go_idx++) {
- b->go[go_idx].go.store(next_go);
- }
- // Fence added so that workers can see changes to go. sfence inadequate.
- KMP_MFENCE();
- }
-
-#if KMP_BARRIER_ICV_PUSH
- if (propagate_icvs) { // copy ICVs to final dest
- __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team,
- tid, FALSE);
- copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
- (kmp_internal_control_t *)team->t.b->team_icvs);
- copy_icvs(&thr_bar->th_fixed_icvs,
- &team->t.t_implicit_task_taskdata[tid].td_icvs);
- }
-#endif
- if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && group_leader) {
- // This thread is now awake and participating in the barrier;
- // wake up the other threads in the group
- size_t nproc = this_thr->th.th_team_nproc;
- size_t group_end = tid + b->threads_per_group;
- if (nproc < group_end)
- group_end = nproc;
- __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid);
- }
- } else { // Primary thread
- team = this_thr->th.th_team;
- b = team->t.b;
- my_current_iter = b->iter[tid].iter;
- next_go = my_current_iter + distributedBarrier::MAX_ITERS;
-#if KMP_BARRIER_ICV_PUSH
- if (propagate_icvs) {
- // primary thread has ICVs in final destination; copy
- copy_icvs(&thr_bar->th_fixed_icvs,
- &team->t.t_implicit_task_taskdata[tid].td_icvs);
- }
-#endif
- // Tell all the group leaders they can go!
- for (size_t go_idx = 0; go_idx < b->num_gos; go_idx += b->gos_per_group) {
- b->go[go_idx].go.store(next_go);
- }
-
- if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
- // Wake-up the group leaders
- size_t nproc = this_thr->th.th_team_nproc;
- __kmp_dist_barrier_wakeup(bt, team, tid + b->threads_per_group, nproc,
- b->threads_per_group, tid);
- }
-
- // Tell all the threads in my group they can go!
- for (size_t go_idx = 1; go_idx < b->gos_per_group; go_idx++) {
- b->go[go_idx].go.store(next_go);
- }
-
- // Fence added so that workers can see changes to go. sfence inadequate.
- KMP_MFENCE();
-
- if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
- // Wake-up the other threads in my group
- size_t nproc = this_thr->th.th_team_nproc;
- size_t group_end = tid + b->threads_per_group;
- if (nproc < group_end)
- group_end = nproc;
- __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid);
- }
- }
- // Update to next iteration
- KMP_ASSERT(my_current_iter == b->iter[tid].iter);
- b->iter[tid].iter = (b->iter[tid].iter + 1) % distributedBarrier::MAX_ITERS;
-
- KA_TRACE(
- 20, ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
- gtid, team->t.t_id, tid, bt));
-}
// Linear Barrier
template <bool cancellable = false>
@@ -1907,11 +1394,6 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split,
bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj));
} else {
switch (__kmp_barrier_gather_pattern[bt]) {
- case bp_dist_bar: {
- __kmp_dist_barrier_gather(bt, this_thr, gtid, tid,
- reduce USE_ITT_BUILD_ARG(itt_sync_obj));
- break;
- }
case bp_hyper_bar: {
// don't set branch bits to 0; use linear
KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
@@ -2025,12 +1507,6 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split,
bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
} else {
switch (__kmp_barrier_release_pattern[bt]) {
- case bp_dist_bar: {
- KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
- __kmp_dist_barrier_release(bt, this_thr, gtid, tid,
- FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
- break;
- }
case bp_hyper_bar: {
KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
__kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
@@ -2162,11 +1638,6 @@ void __kmp_end_split_barrier(enum barrier_type bt, int gtid) {
if (!team->t.t_serialized) {
if (KMP_MASTER_GTID(gtid)) {
switch (__kmp_barrier_release_pattern[bt]) {
- case bp_dist_bar: {
- __kmp_dist_barrier_release(bt, this_thr, gtid, tid,
- FALSE USE_ITT_BUILD_ARG(NULL));
- break;
- }
case bp_hyper_bar: {
KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
__kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
@@ -2278,8 +1749,8 @@ void __kmp_join_barrier(int gtid) {
if (__kmp_tasking_mode == tskm_extra_barrier) {
__kmp_tasking_barrier(team, this_thr, gtid);
- KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past tasking barrier\n",
- gtid, team_id, tid));
+ KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid,
+ team_id, tid));
}
#ifdef KMP_DEBUG
if (__kmp_tasking_mode != tskm_immediate_exec) {
@@ -2288,9 +1759,8 @@ void __kmp_join_barrier(int gtid) {
__kmp_gtid_from_thread(this_thr), team_id,
team->t.t_task_team[this_thr->th.th_task_state],
this_thr->th.th_task_team));
- if (this_thr->th.th_task_team)
- KMP_DEBUG_ASSERT(this_thr->th.th_task_team ==
- team->t.t_task_team[this_thr->th.th_task_state]);
+ KMP_DEBUG_ASSERT(this_thr->th.th_task_team ==
+ team->t.t_task_team[this_thr->th.th_task_state]);
}
#endif /* KMP_DEBUG */
@@ -2316,11 +1786,6 @@ void __kmp_join_barrier(int gtid) {
#endif /* USE_ITT_BUILD */
switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {
- case bp_dist_bar: {
- __kmp_dist_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
- NULL USE_ITT_BUILD_ARG(itt_sync_obj));
- break;
- }
case bp_hyper_bar: {
KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
__kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
@@ -2366,7 +1831,8 @@ void __kmp_join_barrier(int gtid) {
team_thread->th.th_stats->setIdleFlag();
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME &&
team_thread->th.th_sleep_loc != NULL)
- __kmp_null_resume_wrapper(team_thread);
+ __kmp_null_resume_wrapper(__kmp_gtid_from_thread(team_thread),
+ team_thread->th.th_sleep_loc);
}
#endif
#if USE_ITT_BUILD
@@ -2513,11 +1979,6 @@ void __kmp_fork_barrier(int gtid, int tid) {
} // primary thread
switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {
- case bp_dist_bar: {
- __kmp_dist_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
- TRUE USE_ITT_BUILD_ARG(NULL));
- break;
- }
case bp_hyper_bar: {
KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
__kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
diff --git a/openmp/runtime/src/kmp_barrier.h b/openmp/runtime/src/kmp_barrier.h
deleted file mode 100644
index 5510fcaebd0a..000000000000
--- a/openmp/runtime/src/kmp_barrier.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * kmp_barrier.h
- */
-
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef KMP_BARRIER_H
-#define KMP_BARRIER_H
-
-#include "kmp.h"
-
-// Use four cache lines: MLC tends to prefetch the next or previous cache line
-// creating a possible fake conflict between cores, so this is the only way to
-// guarantee that no such prefetch can happen.
-#ifndef KMP_FOURLINE_ALIGN_CACHE
-#define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE)
-#endif
-
-#define KMP_OPTIMIZE_FOR_REDUCTIONS 0
-
-class distributedBarrier {
- struct flags_s {
- kmp_uint32 volatile KMP_FOURLINE_ALIGN_CACHE stillNeed;
- };
-
- struct go_s {
- std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go;
- };
-
- struct iter_s {
- kmp_uint64 volatile KMP_FOURLINE_ALIGN_CACHE iter;
- };
-
- struct sleep_s {
- std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep;
- };
-
- void init(size_t nthr);
- void resize(size_t nthr);
- void computeGo(size_t n);
- void computeVarsForN(size_t n);
-
-public:
- enum {
- MAX_ITERS = 3,
- MAX_GOS = 8,
- IDEAL_GOS = 4,
- IDEAL_CONTENTION = 16,
- };
-
- flags_s *flags[MAX_ITERS];
- go_s *go;
- iter_s *iter;
- sleep_s *sleep;
-
- size_t KMP_ALIGN_CACHE num_threads; // number of threads in barrier
- size_t KMP_ALIGN_CACHE max_threads; // size of arrays in data structure
- // number of go signals each requiring one write per iteration
- size_t KMP_ALIGN_CACHE num_gos;
- // number of groups of gos
- size_t KMP_ALIGN_CACHE num_groups;
- // threads per go signal
- size_t KMP_ALIGN_CACHE threads_per_go;
- bool KMP_ALIGN_CACHE fix_threads_per_go;
- // threads per group
- size_t KMP_ALIGN_CACHE threads_per_group;
- // number of go signals in a group
- size_t KMP_ALIGN_CACHE gos_per_group;
- void *team_icvs;
-
- distributedBarrier() = delete;
- ~distributedBarrier() = delete;
-
- // Used instead of constructor to create aligned data
- static distributedBarrier *allocate(int nThreads) {
- distributedBarrier *d = (distributedBarrier *)_mm_malloc(
- sizeof(distributedBarrier), 4 * CACHE_LINE);
- d->num_threads = 0;
- d->max_threads = 0;
- for (int i = 0; i < MAX_ITERS; ++i)
- d->flags[i] = NULL;
- d->go = NULL;
- d->iter = NULL;
- d->sleep = NULL;
- d->team_icvs = NULL;
- d->fix_threads_per_go = false;
- // calculate gos and groups ONCE on base size
- d->computeGo(nThreads);
- d->init(nThreads);
- return d;
- }
-
- static void deallocate(distributedBarrier *db) { _mm_free(db); }
-
- void update_num_threads(size_t nthr) { init(nthr); }
-
- bool need_resize(size_t new_nthr) { return (new_nthr > max_threads); }
- size_t get_num_threads() { return num_threads; }
- kmp_uint64 go_release();
- void go_reset();
-};
-
-#endif // KMP_BARRIER_H
diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp
index b6babbe0e97e..24de14fe8c33 100644
--- a/openmp/runtime/src/kmp_global.cpp
+++ b/openmp/runtime/src/kmp_global.cpp
@@ -110,8 +110,8 @@ char const *__kmp_barrier_type_name[bs_last_barrier] = {"plain", "forkjoin"
"reduction"
#endif // KMP_FAST_REDUCTION_BARRIER
};
-char const *__kmp_barrier_pattern_name[bp_last_bar] = {
- "linear", "tree", "hyper", "hierarchical", "dist"};
+char const *__kmp_barrier_pattern_name[bp_last_bar] = {"linear", "tree",
+ "hyper", "hierarchical"};
int __kmp_allThreadsSpecified = 0;
size_t __kmp_align_alloc = CACHE_LINE;
diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h
index e203f876016c..858acd9c1d7a 100644
--- a/openmp/runtime/src/kmp_os.h
+++ b/openmp/runtime/src/kmp_os.h
@@ -1019,27 +1019,6 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v);
#define KMP_MB() /* nothing to do */
#endif
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64
-#if KMP_COMPILER_ICC
-#define KMP_MFENCE_() _mm_mfence()
-#define KMP_SFENCE_() _mm_sfence()
-#elif KMP_COMPILER_MSVC
-#define KMP_MFENCE_() MemoryBarrier()
-#define KMP_SFENCE_() MemoryBarrier()
-#else
-#define KMP_MFENCE_() __sync_synchronize()
-#define KMP_SFENCE_() __sync_synchronize()
-#endif
-#define KMP_MFENCE() \
- if (UNLIKELY(!__kmp_cpuinfo.initialized)) { \
- __kmp_query_cpuid(&__kmp_cpuinfo); \
- } \
- if (__kmp_cpuinfo.sse2) { \
- KMP_MFENCE_(); \
- }
-#define KMP_SFENCE() KMP_SFENCE_()
-#endif
-
#ifndef KMP_IMB
#define KMP_IMB() /* nothing to do */
#endif
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 40d2ed7f7a11..414e9ba4e36d 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -109,10 +109,6 @@ static int __kmp_unregister_root_other_thread(int gtid);
static void __kmp_reap_thread(kmp_info_t *thread, int is_root);
kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
-void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
- int new_nthreads);
-void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads);
-
/* Calculate the identifier of the current thread */
/* fast (and somewhat portable) way to get unique identifier of executing
thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */
@@ -1210,7 +1206,7 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
this_thr->th.th_team = serial_team;
serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
- KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
+ KF_TRACE(10, ("__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
this_thr->th.th_current_task));
KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
this_thr->th.th_current_task->td_flags.executing = 0;
@@ -1569,24 +1565,15 @@ int __kmp_fork_call(ident_t *loc, int gtid,
/* Change number of threads in the team if requested */
if (master_set_numthreads) { // The parallel has num_threads clause
- if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
+ if (master_set_numthreads < master_th->th.th_teams_size.nth) {
// AC: only can reduce number of threads dynamically, can't increase
kmp_info_t **other_threads = parent_team->t.t_threads;
- // NOTE: if using distributed barrier, we need to run this code block
- // even when the team size appears not to have changed from the max.
- int old_proc = master_th->th.th_teams_size.nth;
- if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
- bp_dist_bar) {
- __kmp_resize_dist_barrier(parent_team, old_proc,
- master_set_numthreads);
- __kmp_add_threads_to_team(parent_team, master_set_numthreads);
- }
parent_team->t.t_nproc = master_set_numthreads;
for (i = 0; i < master_set_numthreads; ++i) {
other_threads[i]->th.th_team_nproc = master_set_numthreads;
}
+ // Keep extra threads hot in the team for possible next parallels
}
- // Keep extra threads hot in the team for possible next parallels
master_th->th.th_set_nproc = 0;
}
@@ -1650,9 +1637,6 @@ int __kmp_fork_call(ident_t *loc, int gtid,
}
#endif
- // Need this to happen before we determine the number of threads, not while
- // we are allocating the team
- //__kmp_push_current_task_to_thread(master_th, parent_team, 0);
int enter_teams = 0;
if (parent_team->t.t_active_level >=
master_th->th.th_current_task->td_icvs.max_active_levels) {
@@ -1660,10 +1644,13 @@ int __kmp_fork_call(ident_t *loc, int gtid,
} else {
enter_teams = ((ap == NULL && active_level == 0) ||
(ap && teams_level > 0 && teams_level == level));
- nthreads = master_set_numthreads
- ? master_set_numthreads
- // TODO: get nproc directly from current task
- : get__nproc_2(parent_team, master_tid);
+ nthreads =
+ master_set_numthreads
+ ? master_set_numthreads
+ : get__nproc_2(
+ parent_team,
+ master_tid); // TODO: get nproc directly from current task
+
// Check if we need to take forkjoin lock? (no need for serialized
// parallel out of teams construct). This code moved here from
// __kmp_reserve_threads() to speedup nested serialized parallels.
@@ -1998,8 +1985,6 @@ int __kmp_fork_call(ident_t *loc, int gtid,
#endif
proc_bind, &new_icvs,
argc USE_NESTED_HOT_ARG(master_th));
- if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
- copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
} else {
/* allocate a new parallel team */
KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
@@ -2010,9 +1995,6 @@ int __kmp_fork_call(ident_t *loc, int gtid,
proc_bind,
&master_th->th.th_current_task->td_icvs,
argc USE_NESTED_HOT_ARG(master_th));
- if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
- copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
- &master_th->th.th_current_task->td_icvs);
}
KF_TRACE(
10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
@@ -2379,12 +2361,6 @@ void __kmp_join_call(ident_t *loc, int gtid
parent_team->t.t_stack_id = NULL;
}
#endif
-
- if (team->t.t_nproc > 1 &&
- __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
- team->t.b->update_num_threads(team->t.t_nproc);
- __kmp_add_threads_to_team(team, team->t.t_nproc);
- }
}
KMP_MB();
@@ -2672,9 +2648,6 @@ void __kmp_set_num_threads(int new_nth, int gtid) {
__kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
- if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
- __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
- }
// Release the extra threads we don't need any more.
for (f = new_nth; f < hot_team->t.t_nproc; f++) {
KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
@@ -2694,11 +2667,6 @@ void __kmp_set_num_threads(int new_nth, int gtid) {
}
#endif
- if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
- hot_team->t.b->update_num_threads(new_nth);
- __kmp_add_threads_to_team(hot_team, new_nth);
- }
-
__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
// Update the t_nproc field in the threads that are still active.
@@ -4146,6 +4114,7 @@ static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
this_thr->th.th_team_nproc = team->t.t_nproc;
this_thr->th.th_team_master = master;
this_thr->th.th_team_serialized = team->t.t_serialized;
+ TCW_PTR(this_thr->th.th_sleep_loc, NULL);
KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
@@ -4314,12 +4283,6 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
new_thr->th.th_task_state_top = 0;
new_thr->th.th_task_state_stack_sz = 4;
- if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
- // Make sure pool thread has transitioned to waiting on own thread struct
- KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
- // Thread activated in __kmp_allocate_team when increasing team size
- }
-
#ifdef KMP_ADJUST_BLOCKTIME
/* Adjust blocktime back to zero if necessary */
/* Middle initialization might not have occurred yet */
@@ -4487,9 +4450,6 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
balign[b].bb.use_oncore_barrier = 0;
}
- TCW_PTR(new_thr->th.th_sleep_loc, NULL);
- new_thr->th.th_sleep_loc_type = flag_unset;
-
new_thr->th.th_spin_here = FALSE;
new_thr->th.th_next_waiting = 0;
#if KMP_OS_UNIX
@@ -5069,13 +5029,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
}
#endif
- if (team->t.t_nproc != new_nproc &&
- __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
- // Distributed barrier may need a resize
- int old_nthr = team->t.t_nproc;
- __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
- }
-
// Has the number of threads changed?
/* Let's assume the most common case is that the number of threads is
unchanged, and put that case first. */
@@ -5125,11 +5078,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
new_nproc));
team->t.t_size_changed = 1;
- if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
- // Barrier size already reduced earlier in this function
- // Activate team threads via th_used_in_team
- __kmp_add_threads_to_team(team, new_nproc);
- }
#if KMP_NESTED_HOT_TEAMS
if (__kmp_hot_teams_mode == 0) {
// AC: saved number of threads should correspond to team's value in this
@@ -5206,7 +5154,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
KA_TRACE(20,
("__kmp_allocate_team: increasing hot team thread count to %d\n",
new_nproc));
- int old_nproc = team->t.t_nproc; // save old value and use to update only
+
team->t.t_size_changed = 1;
#if KMP_NESTED_HOT_TEAMS
@@ -5233,9 +5181,10 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
team->t.t_nproc = new_nproc; // just get reserved threads involved
} else {
- // We may have some threads in reserve, but not enough;
- // get reserved threads involved if any.
- team->t.t_nproc = hot_teams[level].hot_team_nth;
+ // we may have some threads in reserve, but not enough
+ team->t.t_nproc =
+ hot_teams[level]
+ .hot_team_nth; // get reserved threads involved if any
hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
#endif // KMP_NESTED_HOT_TEAMS
if (team->t.t_max_nproc < new_nproc) {
@@ -5290,12 +5239,8 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
#if KMP_NESTED_HOT_TEAMS
} // end of check of t_nproc vs. new_nproc vs. hot_team_nth
#endif // KMP_NESTED_HOT_TEAMS
- if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
- // Barrier size already increased earlier in this function
- // Activate team threads via th_used_in_team
- __kmp_add_threads_to_team(team, new_nproc);
- }
/* make sure everyone is syncronized */
+ int old_nproc = team->t.t_nproc; // save old value and use to update only
// new threads below
__kmp_initialize_team(team, new_nproc, new_icvs,
root->r.r_uber_thread->th.th_ident);
@@ -5399,13 +5344,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
/* take this team from the team pool */
__kmp_team_pool = team->t.t_next_pool;
- if (max_nproc > 1 &&
- __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
- if (!team->t.b) { // Allocate barrier structure
- team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
- }
- }
-
/* setup the team for fresh use */
__kmp_initialize_team(team, new_nproc, new_icvs, NULL);
@@ -5461,12 +5399,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
/* and set it up */
team->t.t_max_nproc = max_nproc;
- if (max_nproc > 1 &&
- __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
- // Allocate barrier structure
- team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
- }
-
/* NOTE well, for some reason allocating one big buffer and dividing it up
seems to really hurt performance a lot on the P4, so, let's not use this */
__kmp_allocate_team_arrays(team, max_nproc);
@@ -5623,43 +5555,10 @@ void __kmp_free_team(kmp_root_t *root,
/* free the worker threads */
for (f = 1; f < team->t.t_nproc; ++f) {
KMP_DEBUG_ASSERT(team->t.t_threads[f]);
- if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
- KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
- 1, 2);
- }
__kmp_free_thread(team->t.t_threads[f]);
- }
-
- if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
- if (team->t.b) {
- // wake up thread at old location
- team->t.b->go_release();
- if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
- for (f = 1; f < team->t.t_nproc; ++f) {
- if (team->t.b->sleep[f].sleep) {
- __kmp_atomic_resume_64(
- team->t.t_threads[f]->th.th_info.ds.ds_gtid,
- (kmp_atomic_flag_64<> *)NULL);
- }
- }
- }
- // Wait for threads to be removed from team
- for (int f = 1; f < team->t.t_nproc; ++f) {
- while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
- KMP_CPU_PAUSE();
- }
- }
- }
-
- for (f = 1; f < team->t.t_nproc; ++f) {
team->t.t_threads[f] = NULL;
}
- if (team->t.t_max_nproc > 1 &&
- __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
- distributedBarrier::deallocate(team->t.b);
- team->t.b = NULL;
- }
/* put the team back in the team pool */
/* TODO limit size of team pool, call reap_team if pool too large */
team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
@@ -6058,19 +5957,12 @@ static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
KA_TRACE(
20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
gtid));
- if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
- while (
- !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
- KMP_CPU_PAUSE();
- __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
- } else {
- /* Need release fence here to prevent seg faults for tree forkjoin
- barrier (GEH) */
- ANNOTATE_HAPPENS_BEFORE(thread);
- kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
- thread);
- __kmp_release_64(&flag);
- }
+ /* Need release fence here to prevent seg faults for tree forkjoin barrier
+ * (GEH) */
+ ANNOTATE_HAPPENS_BEFORE(thread);
+ kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
+ thread);
+ __kmp_release_64(&flag);
}
// Terminate OS thread.
@@ -6944,8 +6836,8 @@ static void __kmp_do_serial_initialize(void) {
#if KMP_FAST_REDUCTION_BARRIER
#define kmp_reduction_barrier_gather_bb ((int)1)
#define kmp_reduction_barrier_release_bb ((int)1)
-#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
-#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
+#define kmp_reduction_barrier_gather_pat bp_hyper_bar
+#define kmp_reduction_barrier_release_pat bp_hyper_bar
#endif // KMP_FAST_REDUCTION_BARRIER
for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
__kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
@@ -8802,96 +8694,6 @@ void __kmp_omp_display_env(int verbose) {
__kmp_release_bootstrap_lock(&__kmp_initz_lock);
}
-// The team size is changing, so distributed barrier must be modified
-void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
- int new_nthreads) {
- KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
- bp_dist_bar);
- kmp_info_t **other_threads = team->t.t_threads;
-
- // We want all the workers to stop waiting on the barrier while we adjust the
- // size of the team.
- for (int f = 1; f < old_nthreads; ++f) {
- KMP_DEBUG_ASSERT(other_threads[f] != NULL);
- // Ignore threads that are already inactive or not present in the team
- if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
- // teams construct causes thread_limit to get passed in, and some of
- // those could be inactive; just ignore them
- continue;
- }
- // If thread is transitioning still to in_use state, wait for it
- if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
- while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
- KMP_CPU_PAUSE();
- }
- // The thread should be in_use now
- KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
- // Transition to unused state
- team->t.t_threads[f]->th.th_used_in_team.store(2);
- KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
- }
- // Release all the workers
- kmp_uint64 new_value; // new value for go
- new_value = team->t.b->go_release();
-
- KMP_MFENCE();
-
- // Workers should see transition status 2 and move to 0; but may need to be
- // woken up first
- size_t my_go_index;
- int count = old_nthreads - 1;
- while (count > 0) {
- count = old_nthreads - 1;
- for (int f = 1; f < old_nthreads; ++f) {
- my_go_index = f / team->t.b->threads_per_go;
- if (other_threads[f]->th.th_used_in_team.load() != 0) {
- if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up the workers
- kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
- void *, other_threads[f]->th.th_sleep_loc);
- __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
- }
- } else {
- KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
- count--;
- }
- }
- }
- // Now update the barrier size
- team->t.b->update_num_threads(new_nthreads);
- team->t.b->go_reset();
-}
-
-void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads) {
- // Add the threads back to the team
- KMP_DEBUG_ASSERT(team);
- // Threads were paused and pointed at th_used_in_team temporarily during a
- // resize of the team. We're going to set th_used_in_team to 3 to indicate to
- // the thread that it should transition itself back into the team. Then, if
- // blocktime isn't infinite, the thread could be sleeping, so we send a resume
- // to wake it up.
- for (int f = 1; f < new_nthreads; ++f) {
- KMP_DEBUG_ASSERT(team->t.t_threads[f]);
- KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
- 3);
- if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up sleeping threads
- __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
- (kmp_flag_32<false, false> *)NULL);
- }
- }
- // The threads should be transitioning to the team; when they are done, they
- // should have set th_used_in_team to 1. This loop forces master to wait until
- // all threads have moved into the team and are waiting in the barrier.
- int count = new_nthreads - 1;
- while (count > 0) {
- count = new_nthreads - 1;
- for (int f = 1; f < new_nthreads; ++f) {
- if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
- count--;
- }
- }
- }
-}
-
// Globals and functions for hidden helper task
kmp_info_t **__kmp_hidden_helper_threads;
kmp_info_t *__kmp_hidden_helper_main_thread;
diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index a98a2a43b0d0..f287c27f29a5 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -1684,8 +1684,6 @@ static void __kmp_stg_parse_barrier_pattern(char const *name, char const *value,
const char *var;
/* ---------- Barrier method control ------------ */
- static int dist_req = 0, non_dist_req = 0;
- static bool warn = 1;
for (int i = bs_plain_barrier; i < bs_last_barrier; i++) {
var = __kmp_barrier_pattern_env_name[i];
@@ -1697,11 +1695,6 @@ static void __kmp_stg_parse_barrier_pattern(char const *name, char const *value,
for (j = bp_linear_bar; j < bp_last_bar; j++) {
if (__kmp_match_with_sentinel(__kmp_barrier_pattern_name[j], value, 1,
',')) {
- if (j == bp_dist_bar) {
- dist_req++;
- } else {
- non_dist_req++;
- }
__kmp_barrier_gather_pattern[i] = (kmp_bar_pat_e)j;
break;
}
@@ -1716,11 +1709,6 @@ static void __kmp_stg_parse_barrier_pattern(char const *name, char const *value,
if (comma != NULL) {
for (j = bp_linear_bar; j < bp_last_bar; j++) {
if (__kmp_str_match(__kmp_barrier_pattern_name[j], 1, comma + 1)) {
- if (j == bp_dist_bar) {
- dist_req++;
- } else {
- non_dist_req++;
- }
__kmp_barrier_release_pattern[i] = (kmp_bar_pat_e)j;
break;
}
@@ -1735,28 +1723,6 @@ static void __kmp_stg_parse_barrier_pattern(char const *name, char const *value,
}
}
}
- if ((dist_req == 0) && (non_dist_req != 0)) {
- // Something was set to a barrier other than dist; set all others to hyper
- for (int i = bs_plain_barrier; i < bs_last_barrier; i++) {
- if (__kmp_barrier_release_pattern[i] == bp_dist_bar)
- __kmp_barrier_release_pattern[i] = bp_hyper_bar;
- if (__kmp_barrier_gather_pattern[i] == bp_dist_bar)
- __kmp_barrier_gather_pattern[i] = bp_hyper_bar;
- }
- } else if (non_dist_req != 0) {
- // some requests for dist, plus requests for others; set all to dist
- if (non_dist_req > 0 && dist_req > 0 && warn) {
- KMP_INFORM(BarrierPatternOverride, name,
- __kmp_barrier_pattern_name[bp_dist_bar]);
- warn = 0;
- }
- for (int i = bs_plain_barrier; i < bs_last_barrier; i++) {
- if (__kmp_barrier_release_pattern[i] != bp_dist_bar)
- __kmp_barrier_release_pattern[i] = bp_dist_bar;
- if (__kmp_barrier_gather_pattern[i] != bp_dist_bar)
- __kmp_barrier_gather_pattern[i] = bp_dist_bar;
- }
- }
} // __kmp_stg_parse_barrier_pattern
static void __kmp_stg_print_barrier_pattern(kmp_str_buf_t *buffer,
@@ -1773,7 +1739,7 @@ static void __kmp_stg_print_barrier_pattern(kmp_str_buf_t *buffer,
__kmp_str_buf_print(buffer, " %s='",
__kmp_barrier_pattern_env_name[i]);
}
- KMP_DEBUG_ASSERT(j < bp_last_bar && k < bp_last_bar);
+ KMP_DEBUG_ASSERT(j < bs_last_barrier && k < bs_last_barrier);
__kmp_str_buf_print(buffer, "%s,%s'\n", __kmp_barrier_pattern_name[j],
__kmp_barrier_pattern_name[k]);
}
diff --git a/openmp/runtime/src/kmp_stats.h b/openmp/runtime/src/kmp_stats.h
index 113221c066a3..4c5053df3fef 100644
--- a/openmp/runtime/src/kmp_stats.h
+++ b/openmp/runtime/src/kmp_stats.h
@@ -246,8 +246,6 @@ enum stats_state_e {
// KMP_tree_release -- time in __kmp_tree_barrier_release
// KMP_hyper_gather -- time in __kmp_hyper_barrier_gather
// KMP_hyper_release -- time in __kmp_hyper_barrier_release
-// KMP_dist_gather -- time in __kmp_dist_barrier_gather
-// KMP_dist_release -- time in __kmp_dist_barrier_release
// clang-format off
#define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
macro(KMP_fork_call, 0, arg) \
@@ -257,8 +255,6 @@ enum stats_state_e {
macro(KMP_hier_release, 0, arg) \
macro(KMP_hyper_gather, 0, arg) \
macro(KMP_hyper_release, 0, arg) \
- macro(KMP_dist_gather, 0, arg) \
- macro(KMP_dist_release, 0, arg) \
macro(KMP_linear_gather, 0, arg) \
macro(KMP_linear_release, 0, arg) \
macro(KMP_tree_gather, 0, arg) \
diff --git a/openmp/runtime/src/kmp_str.cpp b/openmp/runtime/src/kmp_str.cpp
index e64f989fbc69..ffce2b88ab35 100644
--- a/openmp/runtime/src/kmp_str.cpp
+++ b/openmp/runtime/src/kmp_str.cpp
@@ -515,31 +515,6 @@ int __kmp_str_match(char const *target, int len, char const *data) {
return ((len > 0) ? i >= len : (!target[i] && (len || !data[i])));
} // __kmp_str_match
-// If data contains all of target, returns true, otherwise returns false.
-// len should be the length of target
-bool __kmp_str_contains(char const *target, int len, char const *data) {
- int i = 0, j = 0, start = 0;
- if (target == NULL || data == NULL) {
- return FALSE;
- }
- while (target[i]) {
- if (!data[j])
- return FALSE;
- if (TOLOWER(target[i]) != TOLOWER(data[j])) {
- j = start + 1;
- start = j;
- i = 0;
- } else {
- if (i == 0)
- start = j;
- j++;
- i++;
- }
- }
-
- return i == len;
-} // __kmp_str_contains
-
int __kmp_str_match_false(char const *data) {
int result =
__kmp_str_match("false", 1, data) || __kmp_str_match("off", 2, data) ||
diff --git a/openmp/runtime/src/kmp_str.h b/openmp/runtime/src/kmp_str.h
index 855b5df55d69..ff6179908ef1 100644
--- a/openmp/runtime/src/kmp_str.h
+++ b/openmp/runtime/src/kmp_str.h
@@ -106,7 +106,6 @@ int __kmp_str_eqf(char const *lhs, char const *rhs);
char *__kmp_str_format(char const *format, ...);
void __kmp_str_free(char **str);
int __kmp_str_match(char const *target, int len, char const *data);
-bool __kmp_str_contains(char const *target, int len, char const *data);
int __kmp_str_match_false(char const *data);
int __kmp_str_match_true(char const *data);
void __kmp_str_replace(char *str, char search_for, char replace_with);
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index b1a1fb1798be..62f0bdca4be9 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -2963,7 +2963,8 @@ static inline int __kmp_execute_tasks_template(
(TCR_PTR(CCAST(void *, other_thread->th.th_sleep_loc)) !=
NULL)) {
asleep = 1;
- __kmp_null_resume_wrapper(other_thread);
+ __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread),
+ other_thread->th.th_sleep_loc);
// A sleeping thread should not have any tasks on it's queue.
// There is a slight possibility that it resumes, steals a task
// from another thread, which spawns more tasks, all in the time
@@ -3112,16 +3113,6 @@ int __kmp_execute_tasks_64(
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
}
-template <bool C, bool S>
-int __kmp_atomic_execute_tasks_64(
- kmp_info_t *thread, kmp_int32 gtid, kmp_atomic_flag_64<C, S> *flag,
- int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
- kmp_int32 is_constrained) {
- return __kmp_execute_tasks_template(
- thread, gtid, flag, final_spin,
- thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
-}
-
int __kmp_execute_tasks_oncore(
kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
@@ -3148,14 +3139,6 @@ template int __kmp_execute_tasks_64<true, false>(kmp_info_t *, kmp_int32,
int *USE_ITT_BUILD_ARG(void *),
kmp_int32);
-template int __kmp_atomic_execute_tasks_64<false, true>(
- kmp_info_t *, kmp_int32, kmp_atomic_flag_64<false, true> *, int,
- int *USE_ITT_BUILD_ARG(void *), kmp_int32);
-
-template int __kmp_atomic_execute_tasks_64<true, false>(
- kmp_info_t *, kmp_int32, kmp_atomic_flag_64<true, false> *, int,
- int *USE_ITT_BUILD_ARG(void *), kmp_int32);
-
// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
// next barrier so they can assist in executing enqueued tasks.
// First thread in allocates the task team atomically.
@@ -3194,7 +3177,7 @@ static void __kmp_enable_tasking(kmp_task_team_t *task_team,
// tasks and execute them. In extra barrier mode, tasks do not sleep
// at the separate tasking barrier, so this isn't a problem.
for (i = 0; i < nthreads; i++) {
- void *sleep_loc;
+ volatile void *sleep_loc;
kmp_info_t *thread = threads_data[i].td.td_thr;
if (i == this_thr->th.th_info.ds.ds_tid) {
@@ -3211,7 +3194,7 @@ static void __kmp_enable_tasking(kmp_task_team_t *task_team,
KF_TRACE(50, ("__kmp_enable_tasking: T#%d waking up thread T#%d\n",
__kmp_gtid_from_thread(this_thr),
__kmp_gtid_from_thread(thread)));
- __kmp_null_resume_wrapper(thread);
+ __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
} else {
KF_TRACE(50, ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
__kmp_gtid_from_thread(this_thr),
@@ -3581,7 +3564,7 @@ void __kmp_wait_to_unref_task_teams(void) {
__kmp_gtid_from_thread(thread)));
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
- void *sleep_loc;
+ volatile void *sleep_loc;
// If the thread is sleeping, awaken it.
if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) !=
NULL) {
@@ -3589,7 +3572,7 @@ void __kmp_wait_to_unref_task_teams(void) {
10,
("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
__kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
- __kmp_null_resume_wrapper(thread);
+ __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
}
}
}
diff --git a/openmp/runtime/src/kmp_wait_release.cpp b/openmp/runtime/src/kmp_wait_release.cpp
index d41ddf231e3f..cabb5722f4dc 100644
--- a/openmp/runtime/src/kmp_wait_release.cpp
+++ b/openmp/runtime/src/kmp_wait_release.cpp
@@ -33,10 +33,6 @@ template <bool C, bool S>
void __kmp_mwait_64(int th_gtid, kmp_flag_64<C, S> *flag) {
__kmp_mwait_template(th_gtid, flag);
}
-template <bool C, bool S>
-void __kmp_atomic_mwait_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag) {
- __kmp_mwait_template(th_gtid, flag);
-}
void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag) {
__kmp_mwait_template(th_gtid, flag);
}
@@ -44,8 +40,4 @@ void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag) {
template void __kmp_mwait_32<false, false>(int, kmp_flag_32<false, false> *);
template void __kmp_mwait_64<false, true>(int, kmp_flag_64<false, true> *);
template void __kmp_mwait_64<true, false>(int, kmp_flag_64<true, false> *);
-template void
-__kmp_atomic_mwait_64<false, true>(int, kmp_atomic_flag_64<false, true> *);
-template void
-__kmp_atomic_mwait_64<true, false>(int, kmp_atomic_flag_64<true, false> *);
#endif
diff --git a/openmp/runtime/src/kmp_wait_release.h b/openmp/runtime/src/kmp_wait_release.h
index 5c250d94886f..d528ce9f1801 100644
--- a/openmp/runtime/src/kmp_wait_release.h
+++ b/openmp/runtime/src/kmp_wait_release.h
@@ -33,285 +33,96 @@ higher level operations such as barriers and fork/join.
@{
*/
+/*!
+ * The flag_type describes the storage used for the flag.
+ */
+enum flag_type {
+ flag32, /**< 32 bit flags */
+ flag64, /**< 64 bit flags */
+ flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */
+};
+
struct flag_properties {
unsigned int type : 16;
unsigned int reserved : 16;
};
-template <enum flag_type FlagType> struct flag_traits {};
-
-template <> struct flag_traits<flag32> {
- typedef kmp_uint32 flag_t;
- static const flag_type t = flag32;
- static inline flag_t tcr(flag_t f) { return TCR_4(f); }
- static inline flag_t test_then_add4(volatile flag_t *f) {
- return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
- }
- static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
- return KMP_TEST_THEN_OR32(f, v);
- }
- static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
- return KMP_TEST_THEN_AND32(f, v);
- }
-};
-
-template <> struct flag_traits<atomic_flag64> {
- typedef kmp_uint64 flag_t;
- static const flag_type t = atomic_flag64;
- static inline flag_t tcr(flag_t f) { return TCR_8(f); }
- static inline flag_t test_then_add4(volatile flag_t *f) {
- return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
- }
- static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
- return KMP_TEST_THEN_OR64(f, v);
- }
- static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
- return KMP_TEST_THEN_AND64(f, v);
- }
-};
-
-template <> struct flag_traits<flag64> {
- typedef kmp_uint64 flag_t;
- static const flag_type t = flag64;
- static inline flag_t tcr(flag_t f) { return TCR_8(f); }
- static inline flag_t test_then_add4(volatile flag_t *f) {
- return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
- }
- static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
- return KMP_TEST_THEN_OR64(f, v);
- }
- static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
- return KMP_TEST_THEN_AND64(f, v);
- }
-};
-
-template <> struct flag_traits<flag_oncore> {
- typedef kmp_uint64 flag_t;
- static const flag_type t = flag_oncore;
- static inline flag_t tcr(flag_t f) { return TCR_8(f); }
- static inline flag_t test_then_add4(volatile flag_t *f) {
- return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
- }
- static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
- return KMP_TEST_THEN_OR64(f, v);
- }
- static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
- return KMP_TEST_THEN_AND64(f, v);
- }
-};
-
-/*! Base class for all flags */
-template <flag_type FlagType> class kmp_flag {
-protected:
- flag_properties t; /**< "Type" of the flag in loc */
- kmp_info_t *waiting_threads[1]; /**< Threads sleeping on this thread. */
- kmp_uint32 num_waiting_threads; /**< #threads sleeping on this thread. */
- std::atomic<bool> *sleepLoc;
+/*!
+ * Base class for wait/release volatile flag
+ */
+template <typename P> class kmp_flag_native {
+ volatile P *loc;
+ flag_properties t;
public:
- typedef flag_traits<FlagType> traits_type;
- kmp_flag() : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(nullptr) {}
- kmp_flag(int nwaiters)
- : t({FlagType, 0U}), num_waiting_threads(nwaiters), sleepLoc(nullptr) {}
- kmp_flag(std::atomic<bool> *sloc)
- : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(sloc) {}
- /*! @result the flag_type */
+ typedef P flag_t;
+ kmp_flag_native(volatile P *p, flag_type ft)
+ : loc(p), t({(short unsigned int)ft, 0U}) {}
+ volatile P *get() { return loc; }
+ void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); }
+ void set(volatile P *new_loc) { loc = new_loc; }
flag_type get_type() { return (flag_type)(t.type); }
-
- /*! param i in index into waiting_threads
- * @result the thread that is waiting at index i */
- kmp_info_t *get_waiter(kmp_uint32 i) {
- KMP_DEBUG_ASSERT(i < num_waiting_threads);
- return waiting_threads[i];
- }
- /*! @result num_waiting_threads */
- kmp_uint32 get_num_waiters() { return num_waiting_threads; }
- /*! @param thr in the thread which is now waiting
- * Insert a waiting thread at index 0. */
- void set_waiter(kmp_info_t *thr) {
- waiting_threads[0] = thr;
- num_waiting_threads = 1;
- }
- enum barrier_type get_bt() { return bs_last_barrier; }
+ P load() { return *loc; }
+ void store(P val) { *loc = val; }
};
-/*! Base class for wait/release volatile flag */
-template <typename PtrType, flag_type FlagType, bool Sleepable>
-class kmp_flag_native : public kmp_flag<FlagType> {
-protected:
- volatile PtrType *loc;
- PtrType checker; /**< When flag==checker, it has been released. */
- typedef flag_traits<FlagType> traits_type;
-
-public:
- typedef PtrType flag_t;
- kmp_flag_native(volatile PtrType *p) : kmp_flag<FlagType>(), loc(p) {}
- kmp_flag_native(volatile PtrType *p, kmp_info_t *thr)
- : kmp_flag<FlagType>(1), loc(p) {
- this->waiting_threads[0] = thr;
- }
- kmp_flag_native(volatile PtrType *p, PtrType c)
- : kmp_flag<FlagType>(), loc(p), checker(c) {}
- kmp_flag_native(volatile PtrType *p, PtrType c, std::atomic<bool> *sloc)
- : kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
- volatile PtrType *get() { return loc; }
- void *get_void_p() { return RCAST(void *, CCAST(PtrType *, loc)); }
- void set(volatile PtrType *new_loc) { loc = new_loc; }
- PtrType load() { return *loc; }
- void store(PtrType val) { *loc = val; }
- /*! @result true if the flag object has been released. */
- virtual bool done_check() {
- if (Sleepable && !(this->sleepLoc))
- return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) ==
- checker;
- else
- return traits_type::tcr(*(this->get())) == checker;
- }
- /*! @param old_loc in old value of flag
- * @result true if the flag's old value indicates it was released. */
- virtual bool done_check_val(PtrType old_loc) { return old_loc == checker; }
- /*! @result true if the flag object is not yet released.
- * Used in __kmp_wait_template like:
- * @code
- * while (flag.notdone_check()) { pause(); }
- * @endcode */
- virtual bool notdone_check() {
- return traits_type::tcr(*(this->get())) != checker;
- }
- /*! @result Actual flag value before release was applied.
- * Trigger all waiting threads to run by modifying flag to release state. */
- void internal_release() {
- (void)traits_type::test_then_add4((volatile PtrType *)this->get());
- }
- /*! @result Actual flag value before sleep bit(s) set.
- * Notes that there is at least one thread sleeping on the flag by setting
- * sleep bit(s). */
- PtrType set_sleeping() {
- if (this->sleepLoc) {
- this->sleepLoc->store(true);
- return *(this->get());
- }
- return traits_type::test_then_or((volatile PtrType *)this->get(),
- KMP_BARRIER_SLEEP_STATE);
- }
- /*! @result Actual flag value before sleep bit(s) cleared.
- * Notes that there are no longer threads sleeping on the flag by clearing
- * sleep bit(s). */
- void unset_sleeping() {
- if (this->sleepLoc) {
- this->sleepLoc->store(false);
- return;
- }
- traits_type::test_then_and((volatile PtrType *)this->get(),
- ~KMP_BARRIER_SLEEP_STATE);
- }
- /*! @param old_loc in old value of flag
- * Test if there are threads sleeping on the flag's old value in old_loc. */
- bool is_sleeping_val(PtrType old_loc) {
- if (this->sleepLoc)
- return this->sleepLoc->load();
- return old_loc & KMP_BARRIER_SLEEP_STATE;
- }
- /*! Test whether there are threads sleeping on the flag. */
- bool is_sleeping() {
- if (this->sleepLoc)
- return this->sleepLoc->load();
- return is_sleeping_val(*(this->get()));
- }
- bool is_any_sleeping() {
- if (this->sleepLoc)
- return this->sleepLoc->load();
- return is_sleeping_val(*(this->get()));
- }
- kmp_uint8 *get_stolen() { return NULL; }
-};
-
-/*! Base class for wait/release atomic flag */
-template <typename PtrType, flag_type FlagType, bool Sleepable>
-class kmp_flag_atomic : public kmp_flag<FlagType> {
-protected:
- std::atomic<PtrType> *loc; /**< Pointer to flag location to wait on */
- PtrType checker; /**< Flag == checker means it has been released. */
+/*!
+ * Base class for wait/release atomic flag
+ */
+template <typename P> class kmp_flag {
+ std::atomic<P>
+ *loc; /**< Pointer to the flag storage that is modified by another thread
+ */
+ flag_properties t; /**< "Type" of the flag in loc */
public:
- typedef flag_traits<FlagType> traits_type;
- typedef PtrType flag_t;
- kmp_flag_atomic(std::atomic<PtrType> *p) : kmp_flag<FlagType>(), loc(p) {}
- kmp_flag_atomic(std::atomic<PtrType> *p, kmp_info_t *thr)
- : kmp_flag<FlagType>(1), loc(p) {
- this->waiting_threads[0] = thr;
- }
- kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c)
- : kmp_flag<FlagType>(), loc(p), checker(c) {}
- kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c, std::atomic<bool> *sloc)
- : kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
- /*! @result the pointer to the actual flag */
- std::atomic<PtrType> *get() { return loc; }
- /*! @result void* pointer to the actual flag */
+ typedef P flag_t;
+ kmp_flag(std::atomic<P> *p, flag_type ft)
+ : loc(p), t({(short unsigned int)ft, 0U}) {}
+ /*!
+ * @result the pointer to the actual flag
+ */
+ std::atomic<P> *get() { return loc; }
+ /*!
+ * @result void* pointer to the actual flag
+ */
void *get_void_p() { return RCAST(void *, loc); }
- /*! @param new_loc in set loc to point at new_loc */
- void set(std::atomic<PtrType> *new_loc) { loc = new_loc; }
- /*! @result flag value */
- PtrType load() { return loc->load(std::memory_order_acquire); }
- /*! @param val the new flag value to be stored */
- void store(PtrType val) { loc->store(val, std::memory_order_release); }
- /*! @result true if the flag object has been released. */
- bool done_check() {
- if (Sleepable && !(this->sleepLoc))
- return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker;
- else
- return this->load() == checker;
- }
- /*! @param old_loc in old value of flag
- * @result true if the flag's old value indicates it was released. */
- bool done_check_val(PtrType old_loc) { return old_loc == checker; }
- /*! @result true if the flag object is not yet released.
- * Used in __kmp_wait_template like:
- * @code
- * while (flag.notdone_check()) { pause(); }
- * @endcode */
- bool notdone_check() { return this->load() != checker; }
- /*! @result Actual flag value before release was applied.
- * Trigger all waiting threads to run by modifying flag to release state. */
- void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
- /*! @result Actual flag value before sleep bit(s) set.
- * Notes that there is at least one thread sleeping on the flag by setting
- * sleep bit(s). */
- PtrType set_sleeping() {
- if (this->sleepLoc) {
- this->sleepLoc->store(true);
- return *(this->get());
- }
- return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
- }
- /*! @result Actual flag value before sleep bit(s) cleared.
- * Notes that there are no longer threads sleeping on the flag by clearing
- * sleep bit(s). */
- void unset_sleeping() {
- if (this->sleepLoc) {
- this->sleepLoc->store(false);
- return;
- }
- KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
- }
- /*! @param old_loc in old value of flag
- * Test whether there are threads sleeping on flag's old value in old_loc. */
- bool is_sleeping_val(PtrType old_loc) {
- if (this->sleepLoc)
- return this->sleepLoc->load();
- return old_loc & KMP_BARRIER_SLEEP_STATE;
- }
- /*! Test whether there are threads sleeping on the flag. */
- bool is_sleeping() {
- if (this->sleepLoc)
- return this->sleepLoc->load();
- return is_sleeping_val(this->load());
- }
- bool is_any_sleeping() {
- if (this->sleepLoc)
- return this->sleepLoc->load();
- return is_sleeping_val(this->load());
- }
- kmp_uint8 *get_stolen() { return NULL; }
+ /*!
+ * @param new_loc in set loc to point at new_loc
+ */
+ void set(std::atomic<P> *new_loc) { loc = new_loc; }
+ /*!
+ * @result the flag_type
+ */
+ flag_type get_type() { return (flag_type)(t.type); }
+ /*!
+ * @result flag value
+ */
+ P load() { return loc->load(std::memory_order_acquire); }
+ /*!
+ * @param val the new flag value to be stored
+ */
+ void store(P val) { loc->store(val, std::memory_order_release); }
+ // Derived classes must provide the following:
+ /*
+ kmp_info_t * get_waiter(kmp_uint32 i);
+ kmp_uint32 get_num_waiters();
+ bool done_check();
+ bool done_check_val(P old_loc);
+ bool notdone_check();
+ P internal_release();
+ void suspend(int th_gtid);
+ void mwait(int th_gtid);
+ void resume(int th_gtid);
+ P set_sleeping();
+ P unset_sleeping();
+ bool is_sleeping();
+ bool is_any_sleeping();
+ bool is_sleeping_val(P old_loc);
+ int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
+ int *thread_finished
+ USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
+ is_constrained);
+ */
};
#if OMPT_SUPPORT
@@ -453,9 +264,8 @@ final_spin=FALSE)
ompt_entry_state = this_thr->th.ompt_thread_info.state;
if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
- ompt_lw_taskteam_t *team = NULL;
- if (this_thr->th.th_team)
- team = this_thr->th.th_team->t.ompt_serialized_team_info;
+ ompt_lw_taskteam_t *team =
+ this_thr->th.th_team->t.ompt_serialized_team_info;
if (team) {
tId = &(team->ompt_task_info.task_data);
} else {
@@ -530,11 +340,11 @@ final_spin=FALSE)
disabled (KMP_TASKING=0). */
if (task_team != NULL) {
if (TCR_SYNC_4(task_team->tt.tt_active)) {
- if (KMP_TASKING_ENABLED(task_team)) {
+ if (KMP_TASKING_ENABLED(task_team))
flag->execute_tasks(
this_thr, th_gtid, final_spin,
&tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
- } else
+ else
this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
} else {
KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
@@ -747,7 +557,6 @@ static inline void __kmp_mwait_template(int th_gtid, C *flag) {
else {
// if flag changes here, wake-up happens immediately
TCW_PTR(th->th.th_sleep_loc, (void *)flag);
- th->th.th_sleep_loc_type = flag->get_type();
__kmp_unlock_suspend_mx(th);
KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid));
#if KMP_HAVE_UMWAIT
@@ -765,7 +574,6 @@ static inline void __kmp_mwait_template(int th_gtid, C *flag) {
if (flag->is_sleeping())
flag->unset_sleeping();
TCW_PTR(th->th.th_sleep_loc, NULL);
- th->th.th_sleep_loc_type = flag_unset;
}
// Mark thread as active again
th->th.th_active = TRUE;
@@ -816,15 +624,251 @@ template <class C> static inline void __kmp_release_template(C *flag) {
}
}
+template <typename FlagType> struct flag_traits {};
+
+template <> struct flag_traits<kmp_uint32> {
+ typedef kmp_uint32 flag_t;
+ static const flag_type t = flag32;
+ static inline flag_t tcr(flag_t f) { return TCR_4(f); }
+ static inline flag_t test_then_add4(volatile flag_t *f) {
+ return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
+ }
+ static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
+ return KMP_TEST_THEN_OR32(f, v);
+ }
+ static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
+ return KMP_TEST_THEN_AND32(f, v);
+ }
+};
+
+template <> struct flag_traits<kmp_uint64> {
+ typedef kmp_uint64 flag_t;
+ static const flag_type t = flag64;
+ static inline flag_t tcr(flag_t f) { return TCR_8(f); }
+ static inline flag_t test_then_add4(volatile flag_t *f) {
+ return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
+ }
+ static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
+ return KMP_TEST_THEN_OR64(f, v);
+ }
+ static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
+ return KMP_TEST_THEN_AND64(f, v);
+ }
+};
+
+// Basic flag that does not use C11 Atomics
+template <typename FlagType, bool Sleepable>
+class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
+ typedef flag_traits<FlagType> traits_type;
+ FlagType checker; /**< Value to compare flag to to check if flag has been
+ released. */
+ kmp_info_t
+ *waiting_threads[1]; /**< Array of threads sleeping on this thread. */
+ kmp_uint32
+ num_waiting_threads; /**< Number of threads sleeping on this thread. */
+public:
+ kmp_basic_flag_native(volatile FlagType *p)
+ : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
+ kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr)
+ : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
+ waiting_threads[0] = thr;
+ }
+ kmp_basic_flag_native(volatile FlagType *p, FlagType c)
+ : kmp_flag_native<FlagType>(p, traits_type::t), checker(c),
+ num_waiting_threads(0) {}
+ /*!
+ * param i in index into waiting_threads
+ * @result the thread that is waiting at index i
+ */
+ kmp_info_t *get_waiter(kmp_uint32 i) {
+ KMP_DEBUG_ASSERT(i < num_waiting_threads);
+ return waiting_threads[i];
+ }
+ /*!
+ * @result num_waiting_threads
+ */
+ kmp_uint32 get_num_waiters() { return num_waiting_threads; }
+ /*!
+ * @param thr in the thread which is now waiting
+ *
+ * Insert a waiting thread at index 0.
+ */
+ void set_waiter(kmp_info_t *thr) {
+ waiting_threads[0] = thr;
+ num_waiting_threads = 1;
+ }
+ /*!
+ * @result true if the flag object has been released.
+ */
+ bool done_check() {
+ if (Sleepable)
+ return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) ==
+ checker;
+ else
+ return traits_type::tcr(*(this->get())) == checker;
+ }
+ /*!
+ * @param old_loc in old value of flag
+ * @result true if the flag's old value indicates it was released.
+ */
+ bool done_check_val(FlagType old_loc) { return old_loc == checker; }
+ /*!
+ * @result true if the flag object is not yet released.
+ * Used in __kmp_wait_template like:
+ * @code
+ * while (flag.notdone_check()) { pause(); }
+ * @endcode
+ */
+ bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
+ /*!
+ * @result Actual flag value before release was applied.
+ * Trigger all waiting threads to run by modifying flag to release state.
+ */
+ void internal_release() {
+ (void)traits_type::test_then_add4((volatile FlagType *)this->get());
+ }
+ /*!
+ * @result Actual flag value before sleep bit(s) set.
+ * Notes that there is at least one thread sleeping on the flag by setting
+ * sleep bit(s).
+ */
+ FlagType set_sleeping() {
+ return traits_type::test_then_or((volatile FlagType *)this->get(),
+ KMP_BARRIER_SLEEP_STATE);
+ }
+ /*!
+ * @result Actual flag value before sleep bit(s) cleared.
+ * Notes that there are no longer threads sleeping on the flag by clearing
+ * sleep bit(s).
+ */
+ FlagType unset_sleeping() {
+ return traits_type::test_then_and((volatile FlagType *)this->get(),
+ ~KMP_BARRIER_SLEEP_STATE);
+ }
+ /*!
+ * @param old_loc in old value of flag
+ * Test whether there are threads sleeping on the flag's old value in old_loc.
+ */
+ bool is_sleeping_val(FlagType old_loc) {
+ return old_loc & KMP_BARRIER_SLEEP_STATE;
+ }
+ /*!
+ * Test whether there are threads sleeping on the flag.
+ */
+ bool is_sleeping() { return is_sleeping_val(*(this->get())); }
+ bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
+ kmp_uint8 *get_stolen() { return NULL; }
+ enum barrier_type get_bt() { return bs_last_barrier; }
+};
+
+template <typename FlagType, bool Sleepable>
+class kmp_basic_flag : public kmp_flag<FlagType> {
+ typedef flag_traits<FlagType> traits_type;
+ FlagType checker; /**< Value to compare flag to to check if flag has been
+ released. */
+ kmp_info_t
+ *waiting_threads[1]; /**< Array of threads sleeping on this thread. */
+ kmp_uint32
+ num_waiting_threads; /**< Number of threads sleeping on this thread. */
+public:
+ kmp_basic_flag(std::atomic<FlagType> *p)
+ : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
+ kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
+ : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
+ waiting_threads[0] = thr;
+ }
+ kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
+ : kmp_flag<FlagType>(p, traits_type::t), checker(c),
+ num_waiting_threads(0) {}
+ /*!
+ * param i in index into waiting_threads
+ * @result the thread that is waiting at index i
+ */
+ kmp_info_t *get_waiter(kmp_uint32 i) {
+ KMP_DEBUG_ASSERT(i < num_waiting_threads);
+ return waiting_threads[i];
+ }
+ /*!
+ * @result num_waiting_threads
+ */
+ kmp_uint32 get_num_waiters() { return num_waiting_threads; }
+ /*!
+ * @param thr in the thread which is now waiting
+ *
+ * Insert a waiting thread at index 0.
+ */
+ void set_waiter(kmp_info_t *thr) {
+ waiting_threads[0] = thr;
+ num_waiting_threads = 1;
+ }
+ /*!
+ * @result true if the flag object has been released.
+ */
+ bool done_check() {
+ if (Sleepable)
+ return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker;
+ else
+ return this->load() == checker;
+ }
+ /*!
+ * @param old_loc in old value of flag
+ * @result true if the flag's old value indicates it was released.
+ */
+ bool done_check_val(FlagType old_loc) { return old_loc == checker; }
+ /*!
+ * @result true if the flag object is not yet released.
+ * Used in __kmp_wait_template like:
+ * @code
+ * while (flag.notdone_check()) { pause(); }
+ * @endcode
+ */
+ bool notdone_check() { return this->load() != checker; }
+ /*!
+ * @result Actual flag value before release was applied.
+ * Trigger all waiting threads to run by modifying flag to release state.
+ */
+ void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
+ /*!
+ * @result Actual flag value before sleep bit(s) set.
+ * Notes that there is at least one thread sleeping on the flag by setting
+ * sleep bit(s).
+ */
+ FlagType set_sleeping() {
+ return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
+ }
+ /*!
+ * @result Actual flag value before sleep bit(s) cleared.
+ * Notes that there are no longer threads sleeping on the flag by clearing
+ * sleep bit(s).
+ */
+ FlagType unset_sleeping() {
+ return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
+ }
+ /*!
+ * @param old_loc in old value of flag
+ * Test whether there are threads sleeping on the flag's old value in old_loc.
+ */
+ bool is_sleeping_val(FlagType old_loc) {
+ return old_loc & KMP_BARRIER_SLEEP_STATE;
+ }
+ /*!
+ * Test whether there are threads sleeping on the flag.
+ */
+ bool is_sleeping() { return is_sleeping_val(this->load()); }
+ bool is_any_sleeping() { return is_sleeping_val(this->load()); }
+ kmp_uint8 *get_stolen() { return NULL; }
+ enum barrier_type get_bt() { return bs_last_barrier; }
+};
+
template <bool Cancellable, bool Sleepable>
-class kmp_flag_32 : public kmp_flag_atomic<kmp_uint32, flag32, Sleepable> {
+class kmp_flag_32 : public kmp_basic_flag<kmp_uint32, Sleepable> {
public:
kmp_flag_32(std::atomic<kmp_uint32> *p)
- : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p) {}
+ : kmp_basic_flag<kmp_uint32, Sleepable>(p) {}
kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
- : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, thr) {}
+ : kmp_basic_flag<kmp_uint32, Sleepable>(p, thr) {}
kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
- : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, c) {}
+ : kmp_basic_flag<kmp_uint32, Sleepable>(p, c) {}
void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); }
@@ -851,16 +895,14 @@ class kmp_flag_32 : public kmp_flag_atomic<kmp_uint32, flag32, Sleepable> {
};
template <bool Cancellable, bool Sleepable>
-class kmp_flag_64 : public kmp_flag_native<kmp_uint64, flag64, Sleepable> {
+class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64, Sleepable> {
public:
kmp_flag_64(volatile kmp_uint64 *p)
- : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p) {}
+ : kmp_basic_flag_native<kmp_uint64, Sleepable>(p) {}
kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
- : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, thr) {}
+ : kmp_basic_flag_native<kmp_uint64, Sleepable>(p, thr) {}
kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
- : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c) {}
- kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c, std::atomic<bool> *loc)
- : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c, loc) {}
+ : kmp_basic_flag_native<kmp_uint64, Sleepable>(p, c) {}
void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); }
@@ -886,52 +928,20 @@ class kmp_flag_64 : public kmp_flag_native<kmp_uint64, flag64, Sleepable> {
flag_type get_ptr_type() { return flag64; }
};
-template <bool Cancellable, bool Sleepable>
-class kmp_atomic_flag_64
- : public kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable> {
-public:
- kmp_atomic_flag_64(std::atomic<kmp_uint64> *p)
- : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p) {}
- kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_info_t *thr)
- : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, thr) {}
- kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c)
- : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c) {}
- kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c,
- std::atomic<bool> *loc)
- : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c, loc) {}
- void suspend(int th_gtid) { __kmp_atomic_suspend_64(th_gtid, this); }
- void mwait(int th_gtid) { __kmp_atomic_mwait_64(th_gtid, this); }
- void resume(int th_gtid) { __kmp_atomic_resume_64(th_gtid, this); }
- int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
- int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
- kmp_int32 is_constrained) {
- return __kmp_atomic_execute_tasks_64(
- this_thr, gtid, this, final_spin,
- thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
- }
- bool wait(kmp_info_t *this_thr,
- int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
- if (final_spin)
- return __kmp_wait_template<kmp_atomic_flag_64, TRUE, Cancellable,
- Sleepable>(
- this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
- else
- return __kmp_wait_template<kmp_atomic_flag_64, FALSE, Cancellable,
- Sleepable>(
- this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
- }
- void release() { __kmp_release_template(this); }
- flag_type get_ptr_type() { return atomic_flag64; }
-};
-
// Hierarchical 64-bit on-core barrier instantiation
-class kmp_flag_oncore : public kmp_flag_native<kmp_uint64, flag_oncore, false> {
- kmp_uint32 offset; /**< Portion of flag of interest for an operation. */
+class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
+ kmp_uint64 checker;
+ kmp_info_t *waiting_threads[1];
+ kmp_uint32 num_waiting_threads;
+ kmp_uint32
+ offset; /**< Portion of flag that is of interest for an operation. */
bool flag_switch; /**< Indicates a switch in flag location. */
enum barrier_type bt; /**< Barrier type. */
- kmp_info_t *this_thr; /**< Thread to redirect to
diff erent flag location. */
+ kmp_info_t *this_thr; /**< Thread that may be redirected to
diff erent flag
+ location. */
#if USE_ITT_BUILD
- void *itt_sync_obj; /**< ITT object to pass to new flag location. */
+ void *
+ itt_sync_obj; /**< ITT object that must be passed to new flag location. */
#endif
unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
@@ -939,17 +949,26 @@ class kmp_flag_oncore : public kmp_flag_native<kmp_uint64, flag_oncore, false> {
public:
kmp_flag_oncore(volatile kmp_uint64 *p)
- : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), flag_switch(false) {
- }
+ : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
+ flag_switch(false) {}
kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
- : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), offset(idx),
- flag_switch(false), bt(bs_last_barrier), itt_sync_obj(nullptr) {}
+ : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
+ offset(idx), flag_switch(false) {}
kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
enum barrier_type bar_t,
kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
- : kmp_flag_native<kmp_uint64, flag_oncore, false>(p, c), offset(idx),
- flag_switch(false), bt(bar_t),
+ : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c),
+ num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
+ kmp_info_t *get_waiter(kmp_uint32 i) {
+ KMP_DEBUG_ASSERT(i < num_waiting_threads);
+ return waiting_threads[i];
+ }
+ kmp_uint32 get_num_waiters() { return num_waiting_threads; }
+ void set_waiter(kmp_info_t *thr) {
+ waiting_threads[0] = thr;
+ num_waiting_threads = 1;
+ }
bool done_check_val(kmp_uint64 old_loc) {
return byteref(&old_loc, offset) == checker;
}
@@ -978,6 +997,17 @@ class kmp_flag_oncore : public kmp_flag_native<kmp_uint64, flag_oncore, false> {
KMP_TEST_THEN_OR64(get(), mask);
}
}
+ kmp_uint64 set_sleeping() {
+ return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
+ }
+ kmp_uint64 unset_sleeping() {
+ return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
+ }
+ bool is_sleeping_val(kmp_uint64 old_loc) {
+ return old_loc & KMP_BARRIER_SLEEP_STATE;
+ }
+ bool is_sleeping() { return is_sleeping_val(*get()); }
+ bool is_any_sleeping() { return is_sleeping_val(*get()); }
void wait(kmp_info_t *this_thr, int final_spin) {
if (final_spin)
__kmp_wait_template<kmp_flag_oncore, TRUE>(
@@ -1008,39 +1038,27 @@ class kmp_flag_oncore : public kmp_flag_native<kmp_uint64, flag_oncore, false> {
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
#endif
}
+ kmp_uint8 *get_stolen() { return NULL; }
enum barrier_type get_bt() { return bt; }
flag_type get_ptr_type() { return flag_oncore; }
};
-static inline void __kmp_null_resume_wrapper(kmp_info_t *thr) {
- int gtid = __kmp_gtid_from_thread(thr);
- void *flag = CCAST(void *, thr->th.th_sleep_loc);
- flag_type type = thr->th.th_sleep_loc_type;
+// Used to wake up threads, volatile void* flag is usually the th_sleep_loc
+// associated with int gtid.
+static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
if (!flag)
return;
- // Attempt to wake up a thread: examine its type and call appropriate template
- switch (type) {
+
+ switch (RCAST(kmp_flag_64<> *, CCAST(void *, flag))->get_type()) {
case flag32:
- __kmp_resume_32(gtid, RCAST(kmp_flag_32<> *, flag));
+ __kmp_resume_32(gtid, (kmp_flag_32<> *)NULL);
break;
case flag64:
- __kmp_resume_64(gtid, RCAST(kmp_flag_64<> *, flag));
- break;
- case atomic_flag64:
- __kmp_atomic_resume_64(gtid, RCAST(kmp_atomic_flag_64<> *, flag));
+ __kmp_resume_64(gtid, (kmp_flag_64<> *)NULL);
break;
case flag_oncore:
- __kmp_resume_oncore(gtid, RCAST(kmp_flag_oncore *, flag));
- break;
-#ifdef KMP_DEBUG
- case flag_unset:
- KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d is unset\n", type));
+ __kmp_resume_oncore(gtid, (kmp_flag_oncore *)NULL);
break;
- default:
- KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d does not match any "
- "known flag type\n",
- type));
-#endif
}
}
diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp
index 920de698ac43..bd50987a857a 100644
--- a/openmp/runtime/src/z_Linux_util.cpp
+++ b/openmp/runtime/src/z_Linux_util.cpp
@@ -1409,13 +1409,9 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
/* TODO: shouldn't this use release semantics to ensure that
__kmp_suspend_initialize_thread gets called first? */
old_spin = flag->set_sleeping();
- TCW_PTR(th->th.th_sleep_loc, (void *)flag);
- th->th.th_sleep_loc_type = flag->get_type();
if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
__kmp_pause_status != kmp_soft_paused) {
flag->unset_sleeping();
- TCW_PTR(th->th.th_sleep_loc, NULL);
- th->th.th_sleep_loc_type = flag_unset;
__kmp_unlock_suspend_mx(th);
return;
}
@@ -1423,10 +1419,8 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
" was %x\n",
th_gtid, flag->get(), flag->load(), old_spin));
- if (flag->done_check_val(old_spin) || flag->done_check()) {
- flag->unset_sleeping();
- TCW_PTR(th->th.th_sleep_loc, NULL);
- th->th.th_sleep_loc_type = flag_unset;
+ if (flag->done_check_val(old_spin)) {
+ old_spin = flag->unset_sleeping();
KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit "
"for spin(%p)\n",
th_gtid, flag->get()));
@@ -1435,6 +1429,7 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
"with low probability" return when the condition variable has
not been signaled or broadcast */
int deactivated = FALSE;
+ TCW_PTR(th->th.th_sleep_loc, (void *)flag);
while (flag->is_sleeping()) {
#ifdef DEBUG_SUSPEND
@@ -1456,9 +1451,6 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
deactivated = TRUE;
}
- KMP_DEBUG_ASSERT(th->th.th_sleep_loc);
- KMP_DEBUG_ASSERT(flag->get_type() == th->th.th_sleep_loc_type);
-
#if USE_SUSPEND_TIMEOUT
struct timespec now;
struct timeval tval;
@@ -1488,18 +1480,6 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
if ((status != 0) && (status != EINTR) && (status != ETIMEDOUT)) {
KMP_SYSFAIL("pthread_cond_wait", status);
}
-
- KMP_DEBUG_ASSERT(flag->get_type() == flag->get_ptr_type());
-
- if (!flag->is_sleeping() &&
- ((status == EINTR) || (status == ETIMEDOUT))) {
- // if interrupt or timeout, and thread is no longer sleeping, we need to
- // make sure sleep_loc gets reset; however, this shouldn't be needed if
- // we woke up with resume
- flag->unset_sleeping();
- TCW_PTR(th->th.th_sleep_loc, NULL);
- th->th.th_sleep_loc_type = flag_unset;
- }
#ifdef KMP_DEBUG
if (status == ETIMEDOUT) {
if (flag->is_sleeping()) {
@@ -1509,8 +1489,6 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
KF_TRACE(2, ("__kmp_suspend_template: T#%d timeout wakeup, sleep bit "
"not set!\n",
th_gtid));
- TCW_PTR(th->th.th_sleep_loc, NULL);
- th->th.th_sleep_loc_type = flag_unset;
}
} else if (flag->is_sleeping()) {
KF_TRACE(100,
@@ -1528,13 +1506,6 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
}
}
}
- // We may have had the loop variable set before entering the loop body;
- // so we need to reset sleep_loc.
- TCW_PTR(th->th.th_sleep_loc, NULL);
- th->th.th_sleep_loc_type = flag_unset;
-
- KMP_DEBUG_ASSERT(!flag->is_sleeping());
- KMP_DEBUG_ASSERT(!th->th.th_sleep_loc);
#ifdef DEBUG_SUSPEND
{
char buffer[128];
@@ -1556,10 +1527,6 @@ template <bool C, bool S>
void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag) {
__kmp_suspend_template(th_gtid, flag);
}
-template <bool C, bool S>
-void __kmp_atomic_suspend_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag) {
- __kmp_suspend_template(th_gtid, flag);
-}
void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
__kmp_suspend_template(th_gtid, flag);
}
@@ -1567,10 +1534,6 @@ void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
template void __kmp_suspend_32<false, false>(int, kmp_flag_32<false, false> *);
template void __kmp_suspend_64<false, true>(int, kmp_flag_64<false, true> *);
template void __kmp_suspend_64<true, false>(int, kmp_flag_64<true, false> *);
-template void
-__kmp_atomic_suspend_64<false, true>(int, kmp_atomic_flag_64<false, true> *);
-template void
-__kmp_atomic_suspend_64<true, false>(int, kmp_atomic_flag_64<true, false> *);
/* This routine signals the thread specified by target_gtid to wake up
after setting the sleep bit indicated by the flag argument to FALSE.
@@ -1593,50 +1556,36 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) {
__kmp_lock_suspend_mx(th);
- if (!flag || flag != th->th.th_sleep_loc) {
- // coming from __kmp_null_resume_wrapper, or thread is now sleeping on a
- //
diff erent location; wake up at new location
+ if (!flag) { // coming from __kmp_null_resume_wrapper
flag = (C *)CCAST(void *, th->th.th_sleep_loc);
}
// First, check if the flag is null or its type has changed. If so, someone
// else woke it up.
- if (!flag) { // Thread doesn't appear to be sleeping on anything
+ if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type
+ // simply shows what flag was cast to
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
"awake: flag(%p)\n",
- gtid, target_gtid, (void *)NULL));
- __kmp_unlock_suspend_mx(th);
- return;
- } else if (flag->get_type() != th->th.th_sleep_loc_type) {
- // Flag type does not appear to match this function template; possibly the
- // thread is sleeping on something else. Try null resume again.
- KF_TRACE(
- 5,
- ("__kmp_resume_template: T#%d retrying, thread T#%d Mismatch flag(%p), "
- "spin(%p) type=%d ptr_type=%d\n",
- gtid, target_gtid, flag, flag->get(), flag->get_type(),
- th->th.th_sleep_loc_type));
+ gtid, target_gtid, NULL));
__kmp_unlock_suspend_mx(th);
- __kmp_null_resume_wrapper(th);
return;
} else { // if multiple threads are sleeping, flag should be internally
// referring to a specific thread here
- if (!flag->is_sleeping()) {
+ typename C::flag_t old_spin = flag->unset_sleeping();
+ if (!flag->is_sleeping_val(old_spin)) {
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
- "awake: flag(%p): %u\n",
- gtid, target_gtid, flag->get(), (unsigned int)flag->load()));
+ "awake: flag(%p): "
+ "%u => %u\n",
+ gtid, target_gtid, flag->get(), old_spin, flag->load()));
__kmp_unlock_suspend_mx(th);
return;
}
+ KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset "
+ "sleep bit for flag's loc(%p): "
+ "%u => %u\n",
+ gtid, target_gtid, flag->get(), old_spin, flag->load()));
}
- KMP_DEBUG_ASSERT(flag);
- flag->unset_sleeping();
TCW_PTR(th->th.th_sleep_loc, NULL);
- th->th.th_sleep_loc_type = flag_unset;
-
- KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset "
- "sleep bit for flag's loc(%p): %u\n",
- gtid, target_gtid, flag->get(), (unsigned int)flag->load()));
#ifdef DEBUG_SUSPEND
{
@@ -1662,19 +1611,12 @@ template <bool C, bool S>
void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag) {
__kmp_resume_template(target_gtid, flag);
}
-template <bool C, bool S>
-void __kmp_atomic_resume_64(int target_gtid, kmp_atomic_flag_64<C, S> *flag) {
- __kmp_resume_template(target_gtid, flag);
-}
void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
__kmp_resume_template(target_gtid, flag);
}
template void __kmp_resume_32<false, true>(int, kmp_flag_32<false, true> *);
-template void __kmp_resume_32<false, false>(int, kmp_flag_32<false, false> *);
template void __kmp_resume_64<false, true>(int, kmp_flag_64<false, true> *);
-template void
-__kmp_atomic_resume_64<false, true>(int, kmp_atomic_flag_64<false, true> *);
#if KMP_USE_MONITOR
void __kmp_resume_monitor() {
diff --git a/openmp/runtime/src/z_Windows_NT_util.cpp b/openmp/runtime/src/z_Windows_NT_util.cpp
index 0a0801c7ece2..320920283c9d 100644
--- a/openmp/runtime/src/z_Windows_NT_util.cpp
+++ b/openmp/runtime/src/z_Windows_NT_util.cpp
@@ -240,12 +240,13 @@ static void __kmp_win32_cond_wait(kmp_win32_cond_t *cv, kmp_win32_mutex_t *mx,
continue;
}
// condition fulfilled, exiting
- flag->unset_sleeping();
+ old_f = flag->unset_sleeping();
+ KMP_DEBUG_ASSERT(old_f & KMP_BARRIER_SLEEP_STATE);
TCW_PTR(th->th.th_sleep_loc, NULL);
- th->th.th_sleep_loc_type = flag_unset;
- KF_TRACE(50, ("__kmp_win32_cond_wait: exiting, condition "
- "fulfilled: flag's loc(%p): %u\n",
- flag->get(), (unsigned int)flag->load()));
+ KF_TRACE(50,
+ ("__kmp_win32_cond_wait: exiting, condition "
+ "fulfilled: flag's loc(%p): %u => %u\n",
+ flag->get(), (unsigned int)old_f, (unsigned int)flag->load()));
__kmp_win32_mutex_lock(&cv->waiters_count_lock_);
KMP_DEBUG_ASSERT(cv->waiters_count_ > 0);
@@ -375,13 +376,9 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
/* TODO: shouldn't this use release semantics to ensure that
__kmp_suspend_initialize_thread gets called first? */
old_spin = flag->set_sleeping();
- TCW_PTR(th->th.th_sleep_loc, (void *)flag);
- th->th.th_sleep_loc_type = flag->get_type();
if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
__kmp_pause_status != kmp_soft_paused) {
flag->unset_sleeping();
- TCW_PTR(th->th.th_sleep_loc, NULL);
- th->th.th_sleep_loc_type = flag_unset;
__kmp_unlock_suspend_mx(th);
return;
}
@@ -390,10 +387,8 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
" loc(%p)==%u\n",
th_gtid, flag->get(), (unsigned int)flag->load()));
- if (flag->done_check_val(old_spin) || flag->done_check()) {
- flag->unset_sleeping();
- TCW_PTR(th->th.th_sleep_loc, NULL);
- th->th.th_sleep_loc_type = flag_unset;
+ if (flag->done_check_val(old_spin)) {
+ old_spin = flag->unset_sleeping();
KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit "
"for flag's loc(%p)\n",
th_gtid, flag->get()));
@@ -405,7 +400,7 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
low probability" return when the condition variable has not been signaled
or broadcast */
int deactivated = FALSE;
-
+ TCW_PTR(th->th.th_sleep_loc, (void *)flag);
while (flag->is_sleeping()) {
KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform "
"kmp_win32_cond_wait()\n",
@@ -420,14 +415,13 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
}
deactivated = TRUE;
+ __kmp_win32_cond_wait(&th->th.th_suspend_cv, &th->th.th_suspend_mx, th,
+ flag);
+ } else {
+ __kmp_win32_cond_wait(&th->th.th_suspend_cv, &th->th.th_suspend_mx, th,
+ flag);
}
- KMP_DEBUG_ASSERT(th->th.th_sleep_loc);
- KMP_DEBUG_ASSERT(th->th.th_sleep_loc_type == flag->get_type());
-
- __kmp_win32_cond_wait(&th->th.th_suspend_cv, &th->th.th_suspend_mx, th,
- flag);
-
#ifdef KMP_DEBUG
if (flag->is_sleeping()) {
KF_TRACE(100,
@@ -437,14 +431,6 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
} // while
- // We may have had the loop variable set before entering the loop body;
- // so we need to reset sleep_loc.
- TCW_PTR(th->th.th_sleep_loc, NULL);
- th->th.th_sleep_loc_type = flag_unset;
-
- KMP_DEBUG_ASSERT(!flag->is_sleeping());
- KMP_DEBUG_ASSERT(!th->th.th_sleep_loc);
-
// Mark the thread as active again (if it was previous marked as inactive)
if (deactivated) {
th->th.th_active = TRUE;
@@ -467,10 +453,6 @@ template <bool C, bool S>
void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag) {
__kmp_suspend_template(th_gtid, flag);
}
-template <bool C, bool S>
-void __kmp_atomic_suspend_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag) {
- __kmp_suspend_template(th_gtid, flag);
-}
void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
__kmp_suspend_template(th_gtid, flag);
}
@@ -478,10 +460,6 @@ void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
template void __kmp_suspend_32<false, false>(int, kmp_flag_32<false, false> *);
template void __kmp_suspend_64<false, true>(int, kmp_flag_64<false, true> *);
template void __kmp_suspend_64<true, false>(int, kmp_flag_64<true, false> *);
-template void
-__kmp_atomic_suspend_64<false, true>(int, kmp_atomic_flag_64<false, true> *);
-template void
-__kmp_atomic_suspend_64<true, false>(int, kmp_atomic_flag_64<true, false> *);
/* This routine signals the thread specified by target_gtid to wake up
after setting the sleep bit indicated by the flag argument to FALSE */
@@ -499,35 +477,32 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) {
__kmp_suspend_initialize_thread(th);
__kmp_lock_suspend_mx(th);
- if (!flag || flag != th->th.th_sleep_loc) {
- // coming from __kmp_null_resume_wrapper, or thread is now sleeping on a
- //
diff erent location; wake up at new location
+ if (!flag) { // coming from __kmp_null_resume_wrapper
flag = (C *)th->th.th_sleep_loc;
}
// First, check if the flag is null or its type has changed. If so, someone
// else woke it up.
- if (!flag || flag->get_type() != th->th.th_sleep_loc_type) {
- // simply shows what flag was cast to
+ if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type
+ // simply shows what
+ // flag was cast to
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
"awake: flag's loc(%p)\n",
gtid, target_gtid, NULL));
__kmp_unlock_suspend_mx(th);
return;
} else {
- if (!flag->is_sleeping()) {
+ typename C::flag_t old_spin = flag->unset_sleeping();
+ if (!flag->is_sleeping_val(old_spin)) {
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
- "awake: flag's loc(%p): %u\n",
- gtid, target_gtid, flag->get(), (unsigned int)flag->load()));
+ "awake: flag's loc(%p): %u => %u\n",
+ gtid, target_gtid, flag->get(), (unsigned int)old_spin,
+ (unsigned int)flag->load()));
__kmp_unlock_suspend_mx(th);
return;
}
}
- KMP_DEBUG_ASSERT(flag);
- flag->unset_sleeping();
TCW_PTR(th->th.th_sleep_loc, NULL);
- th->th.th_sleep_loc_type = flag_unset;
-
KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset sleep "
"bit for flag's loc(%p)\n",
gtid, target_gtid, flag->get()));
@@ -548,19 +523,12 @@ template <bool C, bool S>
void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag) {
__kmp_resume_template(target_gtid, flag);
}
-template <bool C, bool S>
-void __kmp_atomic_resume_64(int target_gtid, kmp_atomic_flag_64<C, S> *flag) {
- __kmp_resume_template(target_gtid, flag);
-}
void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
__kmp_resume_template(target_gtid, flag);
}
template void __kmp_resume_32<false, true>(int, kmp_flag_32<false, true> *);
-template void __kmp_resume_32<false, false>(int, kmp_flag_32<false, false> *);
template void __kmp_resume_64<false, true>(int, kmp_flag_64<false, true> *);
-template void
-__kmp_atomic_resume_64<false, true>(int, kmp_atomic_flag_64<false, true> *);
void __kmp_yield() { Sleep(0); }
diff --git a/openmp/runtime/test/barrier/omp_barrier.c b/openmp/runtime/test/barrier/omp_barrier.c
index 052784e0de39..3da70db92890 100644
--- a/openmp/runtime/test/barrier/omp_barrier.c
+++ b/openmp/runtime/test/barrier/omp_barrier.c
@@ -2,8 +2,6 @@
// RUN: %libomp-compile && env KMP_BLOCKTIME=infinite %libomp-run
// RUN: %libomp-compile && env KMP_PLAIN_BARRIER_PATTERN='hierarchical,hierarchical' KMP_FORKJOIN_BARRIER_PATTERN='hierarchical,hierarchical' %libomp-run
// RUN: %libomp-compile && env KMP_BLOCKTIME=infinite KMP_PLAIN_BARRIER_PATTERN='hierarchical,hierarchical' KMP_FORKJOIN_BARRIER_PATTERN='hierarchical,hierarchical' %libomp-run
-// RUN: %libomp-compile && env KMP_PLAIN_BARRIER_PATTERN='dist,dist' KMP_FORKJOIN_BARRIER_PATTERN='dist,dist' KMP_REDUCTION_BARRIER_PATTERN='dist,dist' %libomp-run
-// RUN: %libomp-compile && env KMP_BLOCKTIME=infinite KMP_PLAIN_BARRIER_PATTERN='dist,dist' KMP_FORKJOIN_BARRIER_PATTERN='dist,dist' KMP_REDUCTION_BARRIER_PATTERN='dist,dist' %libomp-run
#include <stdio.h>
#include "omp_testsuite.h"
#include "omp_my_sleep.h"
More information about the Openmp-commits
mailing list