[Openmp-commits] [openmp] r318388 - Exclude untied tasks from checking of task scheduling constraint (TSC).
Andrey Churbanov via Openmp-commits
openmp-commits at lists.llvm.org
Thu Nov 16 02:45:07 PST 2017
Author: achurbanov
Date: Thu Nov 16 02:45:07 2017
New Revision: 318388
URL: http://llvm.org/viewvc/llvm-project?rev=318388&view=rev
Log:
Exclude untied tasks from checking of task scheduling constraint (TSC).
This can improve performance of tests with untied tasks.
Differential Revision: https://reviews.llvm.org/D39613
Modified:
openmp/trunk/runtime/src/kmp.h
openmp/trunk/runtime/src/kmp_global.cpp
openmp/trunk/runtime/src/kmp_tasking.cpp
Modified: openmp/trunk/runtime/src/kmp.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp.h?rev=318388&r1=318387&r2=318388&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp.h (original)
+++ openmp/trunk/runtime/src/kmp.h Thu Nov 16 02:45:07 2017
@@ -2028,7 +2028,7 @@ typedef enum kmp_tasking_mode {
extern kmp_tasking_mode_t
__kmp_tasking_mode; /* determines how/when to execute tasks */
-extern kmp_int32 __kmp_task_stealing_constraint;
+extern int __kmp_task_stealing_constraint;
#if OMP_40_ENABLED
extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if
// specified, defaults to 0 otherwise
@@ -2248,13 +2248,14 @@ struct kmp_taskdata { /* aligned during
*td_dephash; // Dependencies for children tasks are tracked from here
kmp_depnode_t
*td_depnode; // Pointer to graph node if this task has dependencies
-#endif
-#if OMPT_SUPPORT
- ompt_task_info_t ompt_task_info;
-#endif
+#endif // OMP_40_ENABLED
#if OMP_45_ENABLED
kmp_task_team_t *td_task_team;
kmp_int32 td_size_alloc; // The size of task structure, including shareds etc.
+#endif // OMP_45_ENABLED
+ kmp_taskdata_t *td_last_tied; // keep tied task for task scheduling constraint
+#if OMPT_SUPPORT
+ ompt_task_info_t ompt_task_info;
#endif
}; // struct kmp_taskdata
@@ -2312,6 +2313,7 @@ typedef struct kmp_base_task_team {
kmp_int32
tt_found_proxy_tasks; /* Have we found proxy tasks since last barrier */
#endif
+ kmp_int32 tt_untied_task_encountered;
KMP_ALIGN_CACHE
volatile kmp_int32 tt_unfinished_threads; /* #threads still active */
Modified: openmp/trunk/runtime/src/kmp_global.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_global.cpp?rev=318388&r1=318387&r2=318388&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_global.cpp (original)
+++ openmp/trunk/runtime/src/kmp_global.cpp Thu Nov 16 02:45:07 2017
@@ -305,8 +305,7 @@ kmp_uint64 __kmp_taskloop_min_tasks = 0;
be redefined to have exactly 32 bits. */
KMP_BUILD_ASSERT(sizeof(kmp_tasking_flags_t) == 4);
-kmp_int32 __kmp_task_stealing_constraint =
- 1; /* Constrain task stealing by default */
+int __kmp_task_stealing_constraint = 1; /* Constrain task stealing by default */
#ifdef DEBUG_SUSPEND
int __kmp_suspend_count = 0;
Modified: openmp/trunk/runtime/src/kmp_tasking.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_tasking.cpp?rev=318388&r1=318387&r2=318388&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_tasking.cpp (original)
+++ openmp/trunk/runtime/src/kmp_tasking.cpp Thu Nov 16 02:45:07 2017
@@ -806,13 +806,6 @@ static void __kmp_task_finish(kmp_int32
if (resumed_task == NULL) {
resumed_task = taskdata->td_parent; // In a serialized task, the resumed
// task is the parent
- } else
-#if OMP_45_ENABLED
- if (!(task_team && task_team->tt.tt_found_proxy_tasks))
-#endif
- {
- // verify resumed task passed in points to parent
- KMP_DEBUG_ASSERT(resumed_task == taskdata->td_parent);
}
} else {
KMP_DEBUG_ASSERT(resumed_task !=
@@ -949,6 +942,7 @@ void __kmp_init_implicit_task(ident_t *l
#if OMP_40_ENABLED
task->td_depnode = NULL;
#endif
+ task->td_last_tied = task;
if (set_curr_task) { // only do this init first time thread is created
task->td_incomplete_child_tasks = 0;
@@ -1040,6 +1034,12 @@ kmp_task_t *__kmp_task_alloc(ident_t *lo
}
flags->final = 1;
}
+ if (flags->tiedness == TASK_UNTIED && !team->t.t_serialized) {
+ // Untied task encountered causes the TSC algorithm to check entire deque of
+ // the victim thread. If no untied task encountered, then checking the head
+ // of the deque should be enough.
+ KMP_CHECK_UPDATE(thread->th.th_task_team->tt.tt_untied_task_encountered, 1);
+ }
#if OMP_45_ENABLED
if (flags->proxy == TASK_PROXY) {
@@ -1184,6 +1184,10 @@ kmp_task_t *__kmp_task_alloc(ident_t *lo
taskdata->td_dephash = NULL;
taskdata->td_depnode = NULL;
#endif
+ if (flags->tiedness == TASK_UNTIED)
+ taskdata->td_last_tied = NULL; // will be set when the task is scheduled
+ else
+ taskdata->td_last_tied = taskdata;
// Only need to keep track of child task counts if team parallel and tasking not
// serialized or if it is a proxy task
@@ -1349,6 +1353,10 @@ static void __kmp_invoke_task(kmp_int32
// Invoke the task routine and pass in relevant data.
// Thunks generated by gcc take a different argument list.
if (!discard) {
+ if (taskdata->td_flags.tiedness == TASK_UNTIED) {
+ taskdata->td_last_tied = current_task->td_last_tied;
+ KMP_DEBUG_ASSERT(taskdata->td_last_tied);
+ }
#if KMP_STATS_ENABLED
KMP_COUNT_BLOCK(TASK_executed);
switch (KMP_GET_THREAD_STATE()) {
@@ -2029,6 +2037,10 @@ void __kmpc_end_taskgroup(ident_t *loc,
KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
if (__kmp_tasking_mode != tskm_immediate_exec) {
+ // mark task as waiting not on a barrier
+ taskdata->td_taskwait_counter += 1;
+ taskdata->td_taskwait_ident = loc;
+ taskdata->td_taskwait_thread = gtid + 1;
#if USE_ITT_BUILD
// For ITT the taskgroup wait is similar to taskwait until we need to
// distinguish them
@@ -2060,6 +2072,7 @@ void __kmpc_end_taskgroup(ident_t *loc,
__kmp_task_stealing_constraint);
}
}
+ taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread; // end waiting
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
@@ -2144,26 +2157,31 @@ static kmp_task_t *__kmp_remove_my_task(
taskdata = thread_data->td.td_deque[tail];
if (is_constrained && (taskdata->td_flags.tiedness == TASK_TIED)) {
- // we need to check if the candidate obeys task scheduling constraint:
- // only child of current task can be scheduled
- kmp_taskdata_t *current = thread->th.th_current_task;
- kmp_int32 level = current->td_level;
- kmp_taskdata_t *parent = taskdata->td_parent;
- while (parent != current && parent->td_level > level) {
- parent = parent->td_parent; // check generation up to the level of the
- // current task
- KMP_DEBUG_ASSERT(parent != NULL);
- }
- if (parent != current) {
- // If the tail task is not a child, then no other child can appear in the
- // deque.
- __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
- KA_TRACE(10,
- ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
- "ntasks=%d head=%u tail=%u\n",
- gtid, thread_data->td.td_deque_ntasks,
- thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
- return NULL;
+ // we need to check if the candidate obeys task scheduling constraint (TSC)
+ // only descendant of all deferred tied tasks can be scheduled, checking
+ // the last one is enough, as it in turn is the descendant of all others
+ kmp_taskdata_t *current = thread->th.th_current_task->td_last_tied;
+ KMP_DEBUG_ASSERT(current != NULL);
+ // check if last tied task is not suspended on barrier
+ if (current->td_flags.tasktype == TASK_EXPLICIT ||
+ current->td_taskwait_thread > 0) { // <= 0 on barrier
+ kmp_int32 level = current->td_level;
+ kmp_taskdata_t *parent = taskdata->td_parent;
+ while (parent != current && parent->td_level > level) {
+ parent = parent->td_parent; // check generation up to the level of the
+ // current task
+ KMP_DEBUG_ASSERT(parent != NULL);
+ }
+ if (parent != current) {
+ // The TSC does not allow to steal victim task
+ __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
+ KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
+ "ntasks=%d head=%u tail=%u\n",
+ gtid, thread_data->td.td_deque_ntasks,
+ thread_data->td.td_deque_head,
+ thread_data->td.td_deque_tail));
+ return NULL;
+ }
}
}
@@ -2184,14 +2202,16 @@ static kmp_task_t *__kmp_remove_my_task(
// __kmp_steal_task: remove a task from another thread's deque
// Assume that calling thread has already checked existence of
// task_team thread_data before calling this routine.
-static kmp_task_t *__kmp_steal_task(kmp_info_t *victim, kmp_int32 gtid,
+static kmp_task_t *__kmp_steal_task(kmp_info_t *victim_thr, kmp_int32 gtid,
kmp_task_team_t *task_team,
volatile kmp_int32 *unfinished_threads,
int *thread_finished,
kmp_int32 is_constrained) {
kmp_task_t *task;
kmp_taskdata_t *taskdata;
+ kmp_taskdata_t *current;
kmp_thread_data_t *victim_td, *threads_data;
+ kmp_int32 level, target;
kmp_int32 victim_tid;
KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
@@ -2199,25 +2219,19 @@ static kmp_task_t *__kmp_steal_task(kmp_
threads_data = task_team->tt.tt_threads_data;
KMP_DEBUG_ASSERT(threads_data != NULL); // Caller should check this condition
- victim_tid = victim->th.th_info.ds.ds_tid;
+ victim_tid = victim_thr->th.th_info.ds.ds_tid;
victim_td = &threads_data[victim_tid];
KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: "
- "task_team=%p ntasks=%d "
- "head=%u tail=%u\n",
- gtid, __kmp_gtid_from_thread(victim), task_team,
+ "task_team=%p ntasks=%d head=%u tail=%u\n",
+ gtid, __kmp_gtid_from_thread(victim_thr), task_team,
victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
victim_td->td.td_deque_tail));
- if ((TCR_4(victim_td->td.td_deque_ntasks) ==
- 0) || // Caller should not check this condition
- (TCR_PTR(victim->th.th_task_team) !=
- task_team)) // GEH: why would this happen?
- {
+ if (TCR_4(victim_td->td.td_deque_ntasks) == 0) {
KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
- "task_team=%p "
- "ntasks=%d head=%u tail=%u\n",
- gtid, __kmp_gtid_from_thread(victim), task_team,
+ "task_team=%p ntasks=%d head=%u tail=%u\n",
+ gtid, __kmp_gtid_from_thread(victim_thr), task_team,
victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
victim_td->td.td_deque_tail));
return NULL;
@@ -2225,53 +2239,103 @@ static kmp_task_t *__kmp_steal_task(kmp_
__kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock);
+ int ntasks = TCR_4(victim_td->td.td_deque_ntasks);
// Check again after we acquire the lock
- if ((TCR_4(victim_td->td.td_deque_ntasks) == 0) ||
- (TCR_PTR(victim->th.th_task_team) !=
- task_team)) // GEH: why would this happen?
- {
+ if (ntasks == 0) {
__kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "
- "task_team=%p "
- "ntasks=%d head=%u tail=%u\n",
- gtid, __kmp_gtid_from_thread(victim), task_team,
- victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
- victim_td->td.td_deque_tail));
+ "task_team=%p ntasks=%d head=%u tail=%u\n",
+ gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
+ victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
return NULL;
}
KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL);
taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
- if (is_constrained) {
- // we need to check if the candidate obeys task scheduling constraint:
- // only descendant of current task can be scheduled
- kmp_taskdata_t *current = __kmp_threads[gtid]->th.th_current_task;
- kmp_int32 level = current->td_level;
- kmp_taskdata_t *parent = taskdata->td_parent;
- while (parent != current && parent->td_level > level) {
- parent = parent->td_parent; // check generation up to the level of the
- // current task
- KMP_DEBUG_ASSERT(parent != NULL);
- }
- if (parent != current) {
- // If the head task is not a descendant of the current task then do not
- // steal it. No other task in victim's deque can be a descendant of the
- // current task.
+ if (is_constrained && (taskdata->td_flags.tiedness == TASK_TIED)) {
+ // we need to check if the candidate obeys task scheduling constraint (TSC)
+ // only descendant of all deferred tied tasks can be scheduled, checking
+ // the last one is enough, as it in turn is the descendant of all others
+ current = __kmp_threads[gtid]->th.th_current_task->td_last_tied;
+ KMP_DEBUG_ASSERT(current != NULL);
+ // check if last tied task is not suspended on barrier
+ if (current->td_flags.tasktype == TASK_EXPLICIT ||
+ current->td_taskwait_thread > 0) { // <= 0 on barrier
+ level = current->td_level;
+ kmp_taskdata_t *parent = taskdata->td_parent;
+ while (parent != current && parent->td_level > level) {
+ parent = parent->td_parent; // check generation up to the level of the
+ // current task
+ KMP_DEBUG_ASSERT(parent != NULL);
+ }
+ if (parent != current) {
+ if (!task_team->tt.tt_untied_task_encountered) {
+ // The TSC does not allow to steal victim task
+ __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
+ KA_TRACE(10,
+ ("__kmp_steal_task(exit #3): T#%d could not steal from "
+ "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
+ gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
+ victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
+ return NULL;
+ }
+ taskdata = NULL; // will check other tasks in victim's deque
+ }
+ }
+ }
+ if (taskdata != NULL) {
+ // Bump head pointer and Wrap.
+ victim_td->td.td_deque_head =
+ (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
+ } else {
+ int i;
+ // walk through victim's deque trying to steal any task
+ target = victim_td->td.td_deque_head;
+ for (i = 1; i < ntasks; ++i) {
+ target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
+ taskdata = victim_td->td.td_deque[target];
+ if (taskdata->td_flags.tiedness == TASK_TIED) {
+ // check if the candidate obeys the TSC
+ kmp_taskdata_t *parent = taskdata->td_parent;
+ // check generation up to the level of the current task
+ while (parent != current && parent->td_level > level) {
+ parent = parent->td_parent;
+ KMP_DEBUG_ASSERT(parent != NULL);
+ }
+ if (parent != current) {
+ // The TSC does not allow to steal the candidate
+ taskdata = NULL;
+ continue;
+ } else {
+ // found victim tied task
+ break;
+ }
+ } else {
+ // found victim untied task
+ break;
+ }
+ }
+ if (taskdata == NULL) {
+ // No appropriate candidate to steal found
__kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
- KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from "
- "T#%d: task_team=%p "
- "ntasks=%d head=%u tail=%u\n",
- gtid,
- __kmp_gtid_from_thread(threads_data[victim_tid].td.td_thr),
- task_team, victim_td->td.td_deque_ntasks,
+ KA_TRACE(10, ("__kmp_steal_task(exit #4): T#%d could not steal from "
+ "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
+ gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
return NULL;
}
+ int prev = target;
+ for (i = i + 1; i < ntasks; ++i) {
+ // shift remaining tasks in the deque left by 1
+ target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
+ victim_td->td.td_deque[prev] = victim_td->td.td_deque[target];
+ prev = target;
+ }
+ KMP_DEBUG_ASSERT(victim_td->td.td_deque_tail ==
+ ((target + 1) & TASK_DEQUE_MASK(victim_td->td)));
+ victim_td->td.td_deque_tail = target; // tail -= 1 (wrapped))
}
- // Bump head pointer and Wrap.
- victim_td->td.td_deque_head =
- (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
if (*thread_finished) {
// We need to un-mark this victim as a finished victim. This must be done
// before releasing the lock, or else other threads (starting with the
@@ -2287,19 +2351,16 @@ static kmp_task_t *__kmp_steal_task(kmp_
*thread_finished = FALSE;
}
- TCW_4(victim_td->td.td_deque_ntasks,
- TCR_4(victim_td->td.td_deque_ntasks) - 1);
+ TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1);
__kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
KMP_COUNT_BLOCK(TASK_stolen);
- KA_TRACE(
- 10,
- ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
- "ntasks=%d head=%u tail=%u\n",
- gtid, taskdata, __kmp_gtid_from_thread(victim), task_team,
- victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
- victim_td->td.td_deque_tail));
+ KA_TRACE(10,
+ ("__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: "
+ "task_team=%p ntasks=%d head=%u tail=%u\n",
+ gtid, taskdata, __kmp_gtid_from_thread(victim_thr), task_team,
+ ntasks, victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
task = KMP_TASKDATA_TO_TASK(taskdata);
return task;
@@ -2325,7 +2386,7 @@ static inline int __kmp_execute_tasks_te
kmp_info_t *other_thread;
kmp_taskdata_t *current_task = thread->th.th_current_task;
volatile kmp_int32 *unfinished_threads;
- kmp_int32 nthreads, victim = -2, use_own_tasks = 1, new_victim = 0,
+ kmp_int32 nthreads, victim_tid = -2, use_own_tasks = 1, new_victim = 0,
tid = thread->th.th_info.ds.ds_tid;
KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
@@ -2362,13 +2423,13 @@ static inline int __kmp_execute_tasks_te
int asleep = 1;
use_own_tasks = 0;
// Try to steal from the last place I stole from successfully.
- if (victim == -2) { // haven't stolen anything yet
- victim = threads_data[tid].td.td_deque_last_stolen;
- if (victim !=
+ if (victim_tid == -2) { // haven't stolen anything yet
+ victim_tid = threads_data[tid].td.td_deque_last_stolen;
+ if (victim_tid !=
-1) // if we have a last stolen from victim, get the thread
- other_thread = threads_data[victim].td.td_thr;
+ other_thread = threads_data[victim_tid].td.td_thr;
}
- if (victim != -1) { // found last victim
+ if (victim_tid != -1) { // found last victim
asleep = 0;
} else if (!new_victim) { // no recent steals and we haven't already
// used a new victim; select a random thread
@@ -2376,12 +2437,12 @@ static inline int __kmp_execute_tasks_te
// Pick a random thread. Initial plan was to cycle through all the
// threads, and only return if we tried to steal from every thread,
// and failed. Arch says that's not such a great idea.
- victim = __kmp_get_random(thread) % (nthreads - 1);
- if (victim >= tid) {
- ++victim; // Adjusts random distribution to exclude self
+ victim_tid = __kmp_get_random(thread) % (nthreads - 1);
+ if (victim_tid >= tid) {
+ ++victim_tid; // Adjusts random distribution to exclude self
}
// Found a potential victim
- other_thread = threads_data[victim].td.td_thr;
+ other_thread = threads_data[victim_tid].td.td_thr;
// There is a slight chance that __kmp_enable_tasking() did not wake
// up all threads waiting at the barrier. If victim is sleeping,
// then wake it up. Since we were going to pay the cache miss
@@ -2415,8 +2476,8 @@ static inline int __kmp_execute_tasks_te
is_constrained);
}
if (task != NULL) { // set last stolen to victim
- if (threads_data[tid].td.td_deque_last_stolen != victim) {
- threads_data[tid].td.td_deque_last_stolen = victim;
+ if (threads_data[tid].td.td_deque_last_stolen != victim_tid) {
+ threads_data[tid].td.td_deque_last_stolen = victim_tid;
// The pre-refactored code did not try more than 1 successful new
// vicitm, unless the last one generated more local tasks;
// new_victim keeps track of this
@@ -2424,7 +2485,7 @@ static inline int __kmp_execute_tasks_te
}
} else { // No tasks found; unset last_stolen
KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
- victim = -2; // no successful victim found
+ victim_tid = -2; // no successful victim found
}
}
@@ -3157,6 +3218,7 @@ void __kmp_task_team_wait(
#else
KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1);
#endif
+ KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0);
TCW_SYNC_4(task_team->tt.tt_active, FALSE);
KMP_MB();
More information about the Openmp-commits
mailing list