[Openmp-commits] [openmp] r272286 - Refactor __kmp_execute_tasks_template function
Jonathan Peyton via Openmp-commits
openmp-commits at lists.llvm.org
Thu Jun 9 11:27:03 PDT 2016
Author: jlpeyton
Date: Thu Jun 9 13:27:03 2016
New Revision: 272286
URL: http://llvm.org/viewvc/llvm-project?rev=272286&view=rev
Log:
Refactor __kmp_execute_tasks_template function
Refactored __kmp_execute_tasks_template to shorten and remove code redundancy.
The original code for __kmp_execute_tasks_template was very redundant with
large sections of repeated code that needed to be kept consistent, and goto
statements that made the control flow difficult to discern. This refactoring
removes all gotos and redundancy.
Patch by Terry Wilmarth
Differential Revision: http://reviews.llvm.org/D20879
Modified:
openmp/trunk/runtime/src/kmp_tasking.c
openmp/trunk/runtime/test/lit.cfg
Modified: openmp/trunk/runtime/src/kmp_tasking.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_tasking.c?rev=272286&r1=272285&r2=272286&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_tasking.c (original)
+++ openmp/trunk/runtime/src/kmp_tasking.c Thu Jun 9 13:27:03 2016
@@ -1806,17 +1806,17 @@ static inline int __kmp_execute_tasks_te
int *thread_finished
USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
{
- kmp_task_team_t * task_team;
+ kmp_task_team_t * task_team = thread->th.th_task_team;
kmp_thread_data_t * threads_data;
kmp_task_t * task;
+ kmp_info_t * other_thread;
kmp_taskdata_t * current_task = thread -> th.th_current_task;
volatile kmp_uint32 * unfinished_threads;
- kmp_int32 nthreads, last_stolen, k, tid;
+ kmp_int32 nthreads, victim=-2, use_own_tasks=1, new_victim=0, tid=thread->th.th_info.ds.ds_tid;
KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
- task_team = thread -> th.th_task_team;
if (task_team == NULL) return FALSE;
KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
@@ -1834,277 +1834,154 @@ static inline int __kmp_execute_tasks_te
#endif
KMP_DEBUG_ASSERT( (int)(TCR_4(*unfinished_threads)) >= 0 );
- // Choose tasks from our own work queue.
- start:
- while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
-#if USE_ITT_BUILD && USE_ITT_NOTIFY
- if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
- if ( itt_sync_obj == NULL ) {
- // we are at fork barrier where we could not get the object reliably
- itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
- }
- __kmp_itt_task_starting( itt_sync_obj );
- }
-#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
- __kmp_invoke_task( gtid, task, current_task );
-#if USE_ITT_BUILD
- if ( itt_sync_obj != NULL )
- __kmp_itt_task_finished( itt_sync_obj );
-#endif /* USE_ITT_BUILD */
-
- // If this thread is only partway through the barrier and the condition
- // is met, then return now, so that the barrier gather/release pattern can proceed.
- // If this thread is in the last spin loop in the barrier, waiting to be
- // released, we know that the termination condition will not be satisified,
- // so don't waste any cycles checking it.
- if (flag == NULL || (!final_spin && flag->done_check())) {
- KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
- return TRUE;
- }
- if (thread->th.th_task_team == NULL) break;
- KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
- }
-
- // This thread's work queue is empty. If we are in the final spin loop
- // of the barrier, check and see if the termination condition is satisfied.
-#if OMP_41_ENABLED
- // The work queue may be empty but there might be proxy tasks still executing
- if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
-#else
- if (final_spin)
-#endif
- {
- // First, decrement the #unfinished threads, if that has not already
- // been done. This decrement might be to the spin location, and
- // result in the termination condition being satisfied.
- if (! *thread_finished) {
- kmp_uint32 count;
-
- count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
- KA_TRACE(20, ("__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
- gtid, count, task_team) );
- *thread_finished = TRUE;
- }
-
- // It is now unsafe to reference thread->th.th_team !!!
- // Decrementing task_team->tt.tt_unfinished_threads can allow the master
- // thread to pass through the barrier, where it might reset each thread's
- // th.th_team field for the next parallel region.
- // If we can steal more work, we know that this has not happened yet.
- if (flag != NULL && flag->done_check()) {
- KA_TRACE(15, ("__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) );
- return TRUE;
- }
- }
-
- if (thread->th.th_task_team == NULL) return FALSE;
-#if OMP_41_ENABLED
- // check if there are other threads to steal from, otherwise go back
- if ( nthreads == 1 )
- goto start;
-#endif
+ while (1) { // Outer loop keeps trying to find tasks in case of single thread getting tasks from target constructs
+ while (1) { // Inner loop to find a task and execute it
+ task = NULL;
+ if (use_own_tasks) { // check on own queue first
+ task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained );
+ }
+ if ((task == NULL) && (nthreads > 1)) { // Steal a task
+ int asleep = 1;
+ use_own_tasks = 0;
+ // Try to steal from the last place I stole from successfully.
+ if (victim == -2) { // haven't stolen anything yet
+ victim = threads_data[tid].td.td_deque_last_stolen;
+ if (victim != -1) // if we have a last stolen from victim, get the thread
+ other_thread = threads_data[victim].td.td_thr;
+ }
+ if (victim != -1) { // found last victim
+ asleep = 0;
+ }
+ else if (!new_victim) { // no recent steals and we haven't already used a new victim; select a random thread
+ do { // Find a different thread to steal work from.
+ // Pick a random thread. Initial plan was to cycle through all the threads, and only return if
+ // we tried to steal from every thread, and failed. Arch says that's not such a great idea.
+ victim = __kmp_get_random(thread) % (nthreads - 1);
+ if (victim >= tid) {
+ ++victim; // Adjusts random distribution to exclude self
+ }
+ // Found a potential victim
+ other_thread = threads_data[victim].td.td_thr;
+ // There is a slight chance that __kmp_enable_tasking() did not wake up all threads
+ // waiting at the barrier. If victim is sleeping, then wake it up. Since we were going to
+ // pay the cache miss penalty for referencing another thread's kmp_info_t struct anyway,
+ // the check shouldn't cost too much performance at this point. In extra barrier mode, tasks
+ // do not sleep at the separate tasking barrier, so this isn't a problem.
+ asleep = 0;
+ if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
+ (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
+ (TCR_PTR(other_thread->th.th_sleep_loc) != NULL)) {
+ asleep = 1;
+ __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
+ // A sleeping thread should not have any tasks on it's queue. There is a slight
+ // possibility that it resumes, steals a task from another thread, which spawns more
+ // tasks, all in the time that it takes this thread to check => don't write an assertion
+ // that the victim's queue is empty. Try stealing from a different thread.
+ }
+ } while (asleep);
+ }
- // Try to steal from the last place I stole from successfully.
- tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
- last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
+ if (!asleep) {
+ // We have a victim to try to steal from
+ task = __kmp_steal_task(other_thread, gtid, task_team, unfinished_threads, thread_finished, is_constrained);
+ }
+ if (task != NULL) { // set last stolen to victim
+ if (threads_data[tid].td.td_deque_last_stolen != victim) {
+ threads_data[tid].td.td_deque_last_stolen = victim;
+ // The pre-refactored code did not try more than 1 successful new vicitm,
+ // unless the last one generated more local tasks; new_victim keeps track of this
+ new_victim = 1;
+ }
+ }
+ else { // No tasks found; unset last_stolen
+ KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
+ victim = -2; // no successful victim found
+ }
+ }
- if (last_stolen != -1) {
- kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
+ if (task == NULL) // break out of tasking loop
+ break;
- while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
- thread_finished, is_constrained )) != NULL)
- {
+ // Found a task; execute it
#if USE_ITT_BUILD && USE_ITT_NOTIFY
if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
- if ( itt_sync_obj == NULL ) {
- // we are at fork barrier where we could not get the object reliably
- itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
+ if ( itt_sync_obj == NULL ) { // we are at fork barrier where we could not get the object reliably
+ itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
}
__kmp_itt_task_starting( itt_sync_obj );
}
#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
__kmp_invoke_task( gtid, task, current_task );
#if USE_ITT_BUILD
- if ( itt_sync_obj != NULL )
- __kmp_itt_task_finished( itt_sync_obj );
+ if ( itt_sync_obj != NULL ) __kmp_itt_task_finished( itt_sync_obj );
#endif /* USE_ITT_BUILD */
-
- // Check to see if this thread can proceed.
+ // If this thread is only partway through the barrier and the condition is met, then return now,
+ // so that the barrier gather/release pattern can proceed. If this thread is in the last spin loop
+ // in the barrier, waiting to be released, we know that the termination condition will not be
+ // satisified, so don't waste any cycles checking it.
if (flag == NULL || (!final_spin && flag->done_check())) {
- KA_TRACE(15, ("__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n",
- gtid) );
+ KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) );
return TRUE;
}
-
- if (thread->th.th_task_team == NULL) break;
+ if (thread->th.th_task_team == NULL) {
+ break;
+ }
KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
- // If the execution of the stolen task resulted in more tasks being
- // placed on our run queue, then restart the whole process.
- if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
- KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
- gtid) );
- goto start;
+ // If execution of a stolen task results in more tasks being placed on our run queue, reset use_own_tasks
+ if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
+ KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n", gtid));
+ use_own_tasks = 1;
+ new_victim = 0;
}
}
- // Don't give priority to stealing from this thread anymore.
- threads_data[ tid ].td.td_deque_last_stolen = -1;
-
- // The victims's work queue is empty. If we are in the final spin loop
- // of the barrier, check and see if the termination condition is satisfied.
+ // The task source has been exhausted. If in final spin loop of barrier, check if termination condition is satisfied.
#if OMP_41_ENABLED
// The work queue may be empty but there might be proxy tasks still executing
- if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
+ if (final_spin && TCR_4(current_task->td_incomplete_child_tasks) == 0)
#else
if (final_spin)
#endif
{
- // First, decrement the #unfinished threads, if that has not already
- // been done. This decrement might be to the spin location, and
- // result in the termination condition being satisfied.
+ // First, decrement the #unfinished threads, if that has not already been done. This decrement
+ // might be to the spin location, and result in the termination condition being satisfied.
if (! *thread_finished) {
kmp_uint32 count;
count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
- KA_TRACE(20, ("__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d "
- "task_team=%p\n", gtid, count, task_team) );
+ KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec unfinished_threads to %d task_team=%p\n",
+ gtid, count, task_team) );
*thread_finished = TRUE;
}
- // If __kmp_tasking_mode != tskm_immediate_exec
- // then it is now unsafe to reference thread->th.th_team !!!
- // Decrementing task_team->tt.tt_unfinished_threads can allow the master
- // thread to pass through the barrier, where it might reset each thread's
- // th.th_team field for the next parallel region.
+ // It is now unsafe to reference thread->th.th_team !!!
+ // Decrementing task_team->tt.tt_unfinished_threads can allow the master thread to pass through
+ // the barrier, where it might reset each thread's th.th_team field for the next parallel region.
// If we can steal more work, we know that this has not happened yet.
if (flag != NULL && flag->done_check()) {
- KA_TRACE(15, ("__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n",
- gtid) );
+ KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) );
return TRUE;
}
}
- if (thread->th.th_task_team == NULL) return FALSE;
- }
- // Find a different thread to steal work from. Pick a random thread.
- // My initial plan was to cycle through all the threads, and only return
- // if we tried to steal from every thread, and failed. Arch says that's
- // not such a great idea.
- // GEH - need yield code in this loop for throughput library mode?
- new_victim:
- k = __kmp_get_random( thread ) % (nthreads - 1);
- if ( k >= thread -> th.th_info.ds.ds_tid ) {
- ++k; // Adjusts random distribution to exclude self
- }
- {
- kmp_info_t *other_thread = threads_data[k].td.td_thr;
- int first;
-
- // There is a slight chance that __kmp_enable_tasking() did not wake up
- // all threads waiting at the barrier. If this thread is sleeping, then
- // wake it up. Since we were going to pay the cache miss penalty
- // for referencing another thread's kmp_info_t struct anyway, the check
- // shouldn't cost too much performance at this point.
- // In extra barrier mode, tasks do not sleep at the separate tasking
- // barrier, so this isn't a problem.
- if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
- (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
- (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
- {
- __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
- // A sleeping thread should not have any tasks on it's queue.
- // There is a slight possibility that it resumes, steals a task from
- // another thread, which spawns more tasks, all in the time that it takes
- // this thread to check => don't write an assertion that the victim's
- // queue is empty. Try stealing from a different thread.
- goto new_victim;
+ // If this thread's task team is NULL, master has recognized that there are no more tasks; bail out
+ if (thread->th.th_task_team == NULL) {
+ KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid) );
+ return FALSE;
}
- // Now try to steal work from the selected thread
- first = TRUE;
- while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
- thread_finished, is_constrained )) != NULL)
- {
-#if USE_ITT_BUILD && USE_ITT_NOTIFY
- if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
- if ( itt_sync_obj == NULL ) {
- // we are at fork barrier where we could not get the object reliably
- itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
- }
- __kmp_itt_task_starting( itt_sync_obj );
- }
-#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
- __kmp_invoke_task( gtid, task, current_task );
-#if USE_ITT_BUILD
- if ( itt_sync_obj != NULL )
- __kmp_itt_task_finished( itt_sync_obj );
-#endif /* USE_ITT_BUILD */
-
- // Try stealing from this victim again, in the future.
- if (first) {
- threads_data[ tid ].td.td_deque_last_stolen = k;
- first = FALSE;
- }
-
- // Check to see if this thread can proceed.
- if (flag == NULL || (!final_spin && flag->done_check())) {
- KA_TRACE(15, ("__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n",
- gtid) );
- return TRUE;
- }
- if (thread->th.th_task_team == NULL) break;
- KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
-
- // If the execution of the stolen task resulted in more tasks being
- // placed on our run queue, then restart the whole process.
- if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
- KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
- gtid) );
- goto start;
- }
- }
-
- // The victims's work queue is empty. If we are in the final spin loop
- // of the barrier, check and see if the termination condition is satisfied.
- // Going on and finding a new victim to steal from is expensive, as it
- // involves a lot of cache misses, so we definitely want to re-check the
- // termination condition before doing that.
#if OMP_41_ENABLED
- // The work queue may be empty but there might be proxy tasks still executing
- if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
-#else
- if (final_spin)
+ // We could be getting tasks from target constructs; if this is the only thread, keep trying to execute
+ // tasks from own queue
+ if (nthreads == 1)
+ use_own_tasks = 1;
+ else
#endif
{
- // First, decrement the #unfinished threads, if that has not already
- // been done. This decrement might be to the spin location, and
- // result in the termination condition being satisfied.
- if (! *thread_finished) {
- kmp_uint32 count;
-
- count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
- KA_TRACE(20, ("__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; "
- "task_team=%p\n",
- gtid, count, task_team) );
- *thread_finished = TRUE;
- }
-
- // If __kmp_tasking_mode != tskm_immediate_exec,
- // then it is now unsafe to reference thread->th.th_team !!!
- // Decrementing task_team->tt.tt_unfinished_threads can allow the master
- // thread to pass through the barrier, where it might reset each thread's
- // th.th_team field for the next parallel region.
- // If we can steal more work, we know that this has not happened yet.
- if (flag != NULL && flag->done_check()) {
- KA_TRACE(15, ("__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) );
- return TRUE;
- }
+ KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d can't find work\n", gtid) );
+ return FALSE;
}
- if (thread->th.th_task_team == NULL) return FALSE;
}
-
- KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
- return FALSE;
}
int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
Modified: openmp/trunk/runtime/test/lit.cfg
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/test/lit.cfg?rev=272286&r1=272285&r2=272286&view=diff
==============================================================================
--- openmp/trunk/runtime/test/lit.cfg (original)
+++ openmp/trunk/runtime/test/lit.cfg Thu Jun 9 13:27:03 2016
@@ -26,6 +26,9 @@ def append_dynamic_library_path(path):
else:
config.environment[name] = path
+for name,value in os.environ.items():
+ config.environment[name] = value
+
# name: The name of this test suite.
config.name = 'libomp'
More information about the Openmp-commits
mailing list