[Openmp-commits] [openmp] r228718 - The usage of tt_state flag is replaced by an array of two task_team pointers.
Andrey Churbanov
Andrey.Churbanov at intel.com
Tue Feb 10 10:37:44 PST 2015
Author: achurbanov
Date: Tue Feb 10 12:37:43 2015
New Revision: 228718
URL: http://llvm.org/viewvc/llvm-project?rev=228718&view=rev
Log:
The usage of tt_state flag is replaced by an array of two task_team pointers.
Modified:
openmp/trunk/runtime/src/kmp.h
openmp/trunk/runtime/src/kmp_barrier.cpp
openmp/trunk/runtime/src/kmp_csupport.c
openmp/trunk/runtime/src/kmp_omp.h
openmp/trunk/runtime/src/kmp_runtime.c
openmp/trunk/runtime/src/kmp_tasking.c
openmp/trunk/runtime/src/kmp_wait_release.h
Modified: openmp/trunk/runtime/src/kmp.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp.h?rev=228718&r1=228717&r2=228718&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp.h (original)
+++ openmp/trunk/runtime/src/kmp.h Tue Feb 10 12:37:43 2015
@@ -1852,10 +1852,8 @@ extern kmp_int32 __kmp_task_stealing_con
// The tt_found_tasks flag is a signal to all threads in the team that tasks were spawned and
// queued since the previous barrier release.
-// State is used to alternate task teams for successive barriers
-#define KMP_TASKING_ENABLED(task_team,state) \
- ((TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE) && \
- (TCR_4((task_team)->tt.tt_state) == (state)))
+#define KMP_TASKING_ENABLED(task_team) \
+ (TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE)
/*!
@ingroup BASIC_TYPES
@{
@@ -2071,8 +2069,6 @@ typedef struct kmp_base_task_team {
volatile kmp_uint32 tt_ref_ct; /* #threads accessing struct */
/* (not incl. master) */
- kmp_int32 tt_state; /* alternating 0/1 for task team identification */
- /* Note: VERY sensitive to padding! */
} kmp_base_task_team_t;
union KMP_ALIGN_CACHE kmp_task_team {
@@ -2195,6 +2191,9 @@ typedef struct KMP_ALIGN_CACHE kmp_base_
kmp_task_team_t * th_task_team; // Task team struct
kmp_taskdata_t * th_current_task; // Innermost Task being executed
kmp_uint8 th_task_state; // alternating 0/1 for task team identification
+ kmp_uint8 * th_task_state_memo_stack; // Stack holding memos of th_task_state at nested levels
+ kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack
+ kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack
/*
* More stuff for keeping track of active/sleeping threads
@@ -2294,7 +2293,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_
kmp_team_p *t_parent; // parent team
kmp_team_p *t_next_pool; // next free team in the team pool
kmp_disp_t *t_dispatch; // thread's dispatch data
- kmp_task_team_t *t_task_team; // Task team struct
+ kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2
#if OMP_40_ENABLED
kmp_proc_bind_t t_proc_bind; // bind type for par region
#endif // OMP_40_ENABLED
@@ -3100,7 +3099,7 @@ int __kmp_execute_tasks_oncore(kmp_info_
extern void __kmp_reap_task_teams( void );
extern void __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread );
extern void __kmp_wait_to_unref_task_teams( void );
-extern void __kmp_task_team_setup ( kmp_info_t *this_thr, kmp_team_t *team );
+extern void __kmp_task_team_setup ( kmp_info_t *this_thr, kmp_team_t *team, int both );
extern void __kmp_task_team_sync ( kmp_info_t *this_thr, kmp_team_t *team );
extern void __kmp_task_team_wait ( kmp_info_t *this_thr, kmp_team_t *team
#if USE_ITT_BUILD
Modified: openmp/trunk/runtime/src/kmp_barrier.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_barrier.cpp?rev=228718&r1=228717&r2=228718&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_barrier.cpp (original)
+++ openmp/trunk/runtime/src/kmp_barrier.cpp Tue Feb 10 12:37:43 2015
@@ -1101,7 +1101,7 @@ __kmp_barrier(enum barrier_type bt, int
if (__kmp_tasking_mode != tskm_immediate_exec) {
__kmp_task_team_wait(this_thr, team
USE_ITT_BUILD_ARG(itt_sync_obj) );
- __kmp_task_team_setup(this_thr, team);
+ __kmp_task_team_setup(this_thr, team, 0); // use 0 to only setup the current team
}
@@ -1189,7 +1189,7 @@ __kmp_barrier(enum barrier_type bt, int
status = 0;
if (__kmp_tasking_mode != tskm_immediate_exec) {
// The task team should be NULL for serialized code (tasks will be executed immediately)
- KMP_DEBUG_ASSERT(team->t.t_task_team == NULL);
+ KMP_DEBUG_ASSERT(team->t.t_task_team[this_thr->th.th_task_state] == NULL);
KMP_DEBUG_ASSERT(this_thr->th.th_task_team == NULL);
}
}
@@ -1293,9 +1293,9 @@ __kmp_join_barrier(int gtid)
# ifdef KMP_DEBUG
if (__kmp_tasking_mode != tskm_immediate_exec) {
KA_TRACE(20, ( "__kmp_join_barrier: T#%d, old team = %d, old task_team = %p, th_task_team = %p\n",
- __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team,
+ __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state],
this_thr->th.th_task_team));
- KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team);
+ KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
}
# endif /* KMP_DEBUG */
@@ -1448,7 +1448,7 @@ __kmp_fork_barrier(int gtid, int tid)
#endif
if (__kmp_tasking_mode != tskm_immediate_exec) {
- __kmp_task_team_setup(this_thr, team);
+ __kmp_task_team_setup(this_thr, team, 1); // 1 indicates setup both task teams
}
/* The master thread may have changed its blocktime between the join barrier and the
Modified: openmp/trunk/runtime/src/kmp_csupport.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_csupport.c?rev=228718&r1=228717&r2=228718&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_csupport.c (original)
+++ openmp/trunk/runtime/src/kmp_csupport.c Tue Feb 10 12:37:43 2015
@@ -494,13 +494,8 @@ __kmpc_end_serialized_parallel(ident_t *
this_thr -> th.th_current_task -> td_flags.executing = 1;
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
- //
- // Copy the task team from the new child / old parent team
- // to the thread. If non-NULL, copy the state flag also.
- //
- if ( ( this_thr -> th.th_task_team = this_thr -> th.th_team -> t.t_task_team ) != NULL ) {
- this_thr -> th.th_task_state = this_thr -> th.th_task_team -> tt.tt_state;
- }
+ // Copy the task team from the new child / old parent team to the thread.
+ this_thr->th.th_task_team = this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n",
global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) );
}
@@ -1821,11 +1816,10 @@ __kmpc_reduce_nowait(
teams_swapped = 1;
th->th.th_info.ds.ds_tid = team->t.t_master_tid;
th->th.th_team = team->t.t_parent;
- th->th.th_task_team = th->th.th_team->t.t_task_team;
th->th.th_team_nproc = th->th.th_team->t.t_nproc;
+ th->th.th_task_team = th->th.th_team->t.t_task_team[0];
task_state = th->th.th_task_state;
- if( th->th.th_task_team )
- th->th.th_task_state = th->th.th_task_team->tt.tt_state;
+ th->th.th_task_state = 0;
}
}
#endif // OMP_40_ENABLED
@@ -1899,8 +1893,8 @@ __kmpc_reduce_nowait(
// Restore thread structure
th->th.th_info.ds.ds_tid = 0;
th->th.th_team = team;
- th->th.th_task_team = team->t.t_task_team;
th->th.th_team_nproc = team->t.t_nproc;
+ th->th.th_task_team = team->t.t_task_team[task_state];
th->th.th_task_state = task_state;
}
#endif
Modified: openmp/trunk/runtime/src/kmp_omp.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_omp.h?rev=228718&r1=228717&r2=228718&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_omp.h (original)
+++ openmp/trunk/runtime/src/kmp_omp.h Tue Feb 10 12:37:43 2015
@@ -162,7 +162,6 @@ typedef struct {
offset_and_size_t tt_nproc;
offset_and_size_t tt_unfinished_threads;
offset_and_size_t tt_active;
- offset_and_size_t tt_state;
/* kmp_taskdata_t */
kmp_int32 td_sizeof_struct;
Modified: openmp/trunk/runtime/src/kmp_runtime.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_runtime.c?rev=228718&r1=228717&r2=228718&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_runtime.c (original)
+++ openmp/trunk/runtime/src/kmp_runtime.c Tue Feb 10 12:37:43 2015
@@ -1229,8 +1229,8 @@ __kmp_serialized_parallel(ident_t *loc,
KMP_MB();
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
- KMP_DEBUG_ASSERT( this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team );
- KMP_DEBUG_ASSERT( serial_team->t.t_task_team == NULL );
+ KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
+ KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL );
KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) );
this_thr->th.th_task_team = NULL;
@@ -1565,7 +1565,7 @@ __kmp_fork_call(
#if KMP_DEBUG
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
- KMP_DEBUG_ASSERT( master_th->th.th_task_team == parent_team->t.t_task_team );
+ KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
}
#endif
@@ -1797,11 +1797,31 @@ __kmp_fork_call(
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
// Set master's task team to team's task team. Unless this is hot team, it should be NULL.
- KMP_DEBUG_ASSERT( master_th->th.th_task_team == parent_team->t.t_task_team );
+ KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
__kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
- parent_team, team->t.t_task_team, team ) );
- master_th->th.th_task_team = team->t.t_task_team;
+ parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) );
+ if (level) {
+ // Take a memo of master's task_state
+ KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
+ if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size
+ kmp_uint8 *old_stack, *new_stack = (kmp_uint8 *) __kmp_allocate( 2*master_th->th.th_task_state_stack_sz );
+ kmp_uint32 i;
+ for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
+ new_stack[i] = master_th->th.th_task_state_memo_stack[i];
+ }
+ old_stack = master_th->th.th_task_state_memo_stack;
+ master_th->th.th_task_state_memo_stack = new_stack;
+ master_th->th.th_task_state_stack_sz *= 2;
+ __kmp_free(old_stack);
+ }
+ // Store master's task_state on stack
+ master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
+ master_th->th.th_task_state_top++;
+ master_th->th.th_task_state = 0;
+ }
+ master_th->th.th_task_team = team->t.t_task_team[master_th->th.th_task_state];
+
#if !KMP_NESTED_HOT_TEAMS
KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
#endif
@@ -1955,8 +1975,8 @@ __kmp_join_call(ident_t *loc, int gtid
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
__kmp_gtid_from_thread( master_th ), team,
- team->t.t_task_team, master_th->th.th_task_team) );
- KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team );
+ team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) );
+ KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] );
}
#endif
@@ -1991,6 +2011,10 @@ __kmp_join_call(ident_t *loc, int gtid
// But there is barrier for external team (league).
__kmp_internal_join( loc, gtid, team );
}
+ else {
+ master_th->th.th_task_state = 0; // AC: no tasking in teams (out of any parallel)
+ }
+
KMP_MB();
#if USE_ITT_BUILD
@@ -2062,8 +2086,10 @@ __kmp_join_call(ident_t *loc, int gtid
balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
}
- // Synchronize thread's task state
- other_threads[i]->th.th_task_state = master_th->th.th_task_state;
+ if ( __kmp_tasking_mode != tskm_immediate_exec ) {
+ // Synchronize thread's task state
+ other_threads[i]->th.th_task_state = master_th->th.th_task_state;
+ }
}
}
return;
@@ -2112,9 +2138,9 @@ __kmp_join_call(ident_t *loc, int gtid
__kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) ); // this will free worker threads
/* this race was fun to find. make sure the following is in the critical
- * region otherwise assertions may fail occasiounally since the old team
+ * region otherwise assertions may fail occasionally since the old team
* may be reallocated and the hierarchy appears inconsistent. it is
- * actually safe to run and won't cause any bugs, but will cause thoose
+ * actually safe to run and won't cause any bugs, but will cause those
* assertion failures. it's only one deref&assign so might as well put this
* in the critical region */
master_th->th.th_team = parent_team;
@@ -2131,13 +2157,15 @@ __kmp_join_call(ident_t *loc, int gtid
}
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
- //
- // Copy the task team from the new child / old parent team
- // to the thread. If non-NULL, copy the state flag also.
- //
- if ( ( master_th->th.th_task_team = parent_team->t.t_task_team ) != NULL ) {
- master_th->th.th_task_state = master_th->th.th_task_team->tt.tt_state;
+ // Restore task state from memo stack
+ KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
+ if (master_th->th.th_task_state_top > 0) {
+ --master_th->th.th_task_state_top; // pop
+ master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
}
+ // Copy the first task team from the new child / old parent team to the thread and reset state flag.
+ master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
+
KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
__kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
parent_team ) );
@@ -2229,24 +2257,22 @@ __kmp_set_num_threads( int new_nth, int
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
- kmp_task_team_t *task_team = hot_team->t.t_task_team;
- if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
- //
- // Signal the worker threads (esp. the extra ones) to stop
- // looking for tasks while spin waiting. The task teams
- // are reference counted and will be deallocated by the
- // last worker thread.
- //
- KMP_DEBUG_ASSERT( hot_team->t.t_nproc > 1 );
- TCW_SYNC_4( task_team->tt.tt_active, FALSE );
- KMP_MB();
-
- KA_TRACE( 20, ( "__kmp_set_num_threads: setting task_team %p to NULL\n",
- &hot_team->t.t_task_team ) );
- hot_team->t.t_task_team = NULL;
- }
- else {
- KMP_DEBUG_ASSERT( task_team == NULL );
+ int tt_idx;
+ for (tt_idx=0; tt_idx<2; ++tt_idx) {
+ kmp_task_team_t *task_team = hot_team->t.t_task_team[tt_idx];
+ if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
+ // Signal worker threads (esp. the extra ones) to stop looking for tasks while spin waiting.
+ // The task teams are reference counted and will be deallocated by the last worker thread.
+ KMP_DEBUG_ASSERT( hot_team->t.t_nproc > 1 );
+ TCW_SYNC_4( task_team->tt.tt_active, FALSE );
+ KMP_MB();
+ KA_TRACE( 20, ( "__kmp_set_num_threads: setting task_team %p to NULL\n",
+ &hot_team->t.t_task_team[tt_idx] ) );
+ hot_team->t.t_task_team[tt_idx] = NULL;
+ }
+ else {
+ KMP_DEBUG_ASSERT( task_team == NULL );
+ }
}
}
@@ -3617,7 +3643,7 @@ void __kmp_task_info() {
#endif // KMP_DEBUG
/* TODO optimize with one big memclr, take out what isn't needed,
- * split responsility to workers as much as possible, and delay
+ * split responsibility to workers as much as possible, and delay
* initialization of features as much as possible */
static void
__kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid )
@@ -3723,6 +3749,12 @@ __kmp_initialize_info( kmp_info_t *this_
this_thr->th.th_next_pool = NULL;
+ if (!this_thr->th.th_task_state_memo_stack) {
+ this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) );
+ this_thr->th.th_task_state_top = 0;
+ this_thr->th.th_task_state_stack_sz = 4;
+ }
+
KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
@@ -3778,6 +3810,9 @@ __kmp_allocate_thread( kmp_root_t *root,
TCW_4(__kmp_nth, __kmp_nth + 1);
+ new_thr->th.th_task_state_top = 0;
+ new_thr->th.th_task_state_stack_sz = 4;
+
#ifdef KMP_ADJUST_BLOCKTIME
/* Adjust blocktime back to zero if necessar y */
/* Middle initialization might not have occurred yet */
@@ -4367,6 +4402,7 @@ __kmp_allocate_team( kmp_root_t *root, i
char *ptr;
size_t size;
int use_hot_team = ! root->r.r_active;
+ int level = 0;
KA_TRACE( 20, ("__kmp_allocate_team: called\n"));
KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
@@ -4374,7 +4410,6 @@ __kmp_allocate_team( kmp_root_t *root, i
KMP_MB();
#if KMP_NESTED_HOT_TEAMS
- int level;
kmp_hot_team_ptr_t *hot_teams;
if( master ) {
team = master->th.th_team;
@@ -4405,8 +4440,8 @@ __kmp_allocate_team( kmp_root_t *root, i
#endif
#if KMP_DEBUG
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
- KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team = %p before reinit\n",
- team->t.t_task_team ));
+ KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n",
+ team->t.t_task_team[0], team->t.t_task_team[1] ));
}
#endif
@@ -4449,30 +4484,35 @@ __kmp_allocate_team( kmp_root_t *root, i
}
# endif /* KMP_AFFINITY_SUPPORTED */
#endif /* OMP_40_ENABLED */
+
+ if (level) {
+ for(f = 0; f < new_nproc; ++f) {
+ team->t.t_threads[f]->th.th_task_state = 0;
+ }
+ }
}
else if( team->t.t_nproc > new_nproc ) {
KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
team->t.t_size_changed = 1;
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
- kmp_task_team_t *task_team = team->t.t_task_team;
- if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
- //
- // Signal the worker threads (esp. the extra ones) to stop
- // looking for tasks while spin waiting. The task teams
- // are reference counted and will be deallocated by the
- // last worker thread.
- //
- KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
- TCW_SYNC_4( task_team->tt.tt_active, FALSE );
- KMP_MB();
-
- KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team %p to NULL\n",
- &team->t.t_task_team ) );
- team->t.t_task_team = NULL;
- }
- else {
- KMP_DEBUG_ASSERT( task_team == NULL );
+ // Signal the worker threads (esp. extra ones) to stop looking for tasks while spin waiting.
+ // The task teams are reference counted and will be deallocated by the last worker thread.
+ int tt_idx;
+ for (tt_idx=0; tt_idx<2; ++tt_idx) {
+ // We don't know which of the two task teams workers are waiting on, so deactivate both.
+ kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
+ if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
+ KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
+ TCW_SYNC_4( task_team->tt.tt_active, FALSE );
+ KMP_MB();
+ KA_TRACE(20, ("__kmp_allocate_team: setting task_team %p to NULL\n",
+ &team->t.t_task_team[tt_idx]));
+ team->t.t_task_team[tt_idx] = NULL;
+ }
+ else {
+ KMP_DEBUG_ASSERT( task_team == NULL );
+ }
}
}
#if KMP_NESTED_HOT_TEAMS
@@ -4497,20 +4537,31 @@ __kmp_allocate_team( kmp_root_t *root, i
__kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
- kmp_task_team_t *task_team = team->t.t_task_team;
- if ( task_team != NULL ) {
- KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
- task_team->tt.tt_nproc = new_nproc;
- task_team->tt.tt_unfinished_threads = new_nproc;
- task_team->tt.tt_ref_ct = new_nproc - 1;
+ // Init both task teams
+ int tt_idx;
+ for (tt_idx=0; tt_idx<2; ++tt_idx) {
+ kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
+ if ( task_team != NULL ) {
+ KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
+ task_team->tt.tt_nproc = new_nproc;
+ task_team->tt.tt_unfinished_threads = new_nproc;
+ task_team->tt.tt_ref_ct = new_nproc - 1;
+ }
}
}
/* update the remaining threads */
- for(f = 0; f < new_nproc; ++f) {
- team->t.t_threads[f]->th.th_team_nproc = new_nproc;
+ if (level) {
+ for(f = 0; f < new_nproc; ++f) {
+ team->t.t_threads[f]->th.th_team_nproc = new_nproc;
+ team->t.t_threads[f]->th.th_task_state = 0;
+ }
+ }
+ else {
+ for(f = 0; f < new_nproc; ++f) {
+ team->t.t_threads[f]->th.th_team_nproc = new_nproc;
+ }
}
-
// restore the current task state of the master thread: should be the implicit task
KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
0, team->t.t_threads[0], team ) );
@@ -4621,19 +4672,33 @@ __kmp_allocate_team( kmp_root_t *root, i
__kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
- kmp_task_team_t *task_team = team->t.t_task_team;
- if ( task_team != NULL ) {
- KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
- task_team->tt.tt_nproc = new_nproc;
- task_team->tt.tt_unfinished_threads = new_nproc;
- task_team->tt.tt_ref_ct = new_nproc - 1;
+ int tt_idx;
+ for (tt_idx=0; tt_idx<2; ++tt_idx) {
+ kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
+ if ( task_team != NULL ) {
+ KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
+ task_team->tt.tt_nproc = new_nproc;
+ task_team->tt.tt_unfinished_threads = new_nproc;
+ task_team->tt.tt_ref_ct = new_nproc - 1;
+ }
}
}
/* reinitialize the old threads */
- for( f = 0 ; f < team->t.t_nproc ; f++ )
- __kmp_initialize_info( team->t.t_threads[ f ], team, f,
- __kmp_gtid_from_tid( f, team ) );
+ if (level) {
+ for( f = 0 ; f < team->t.t_nproc ; f++ ) {
+ __kmp_initialize_info( team->t.t_threads[ f ], team, f,
+ __kmp_gtid_from_tid( f, team ) );
+ }
+ }
+ else {
+ int old_state = team->t.t_threads[0]->th.th_task_state;
+ for (f=0; f < team->t.t_nproc; ++f) {
+ __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
+ team->t.t_threads[f]->th.th_task_state = old_state;
+ }
+ }
+
#ifdef KMP_DEBUG
for ( f = 0; f < team->t.t_nproc; ++ f ) {
KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
@@ -4666,7 +4731,7 @@ __kmp_allocate_team( kmp_root_t *root, i
// Sync task (TODO: and barrier?) state for nested hot teams, not needed for outermost hot team.
for( f = 1; f < new_nproc; ++f ) {
kmp_info_t *thr = team->t.t_threads[f];
- thr->th.th_task_state = master->th.th_task_state;
+ thr->th.th_task_state = 0;
int b;
kmp_balign_t * balign = thr->th.th_bar;
for( b = 0; b < bs_last_barrier; ++ b ) {
@@ -4689,8 +4754,8 @@ __kmp_allocate_team( kmp_root_t *root, i
#if KMP_DEBUG
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
- KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team = %p after reinit\n",
- team->t.t_task_team ));
+ KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n",
+ team->t.t_task_team[0], team->t.t_task_team[1] ));
}
#endif
@@ -4711,9 +4776,10 @@ __kmp_allocate_team( kmp_root_t *root, i
/* setup the team for fresh use */
__kmp_initialize_team( team, new_nproc, new_icvs, NULL );
- KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team %p to NULL\n",
- &team->t.t_task_team ) );
- team->t.t_task_team = NULL;
+ KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
+ &team->t.t_task_team[0], &team->t.t_task_team[1]) );
+ team->t.t_task_team[0] = NULL;
+ team->t.t_task_team[1] = NULL;
/* reallocate space for arguments if necessary */
__kmp_alloc_argv_entries( argc, team, TRUE );
@@ -4759,9 +4825,10 @@ __kmp_allocate_team( kmp_root_t *root, i
KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) );
__kmp_initialize_team( team, new_nproc, new_icvs, NULL );
- KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team %p to NULL\n",
- &team->t.t_task_team ) );
- team->t.t_task_team = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
+ KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
+ &team->t.t_task_team[0], &team->t.t_task_team[1] ) );
+ team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
+ team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
if ( __kmp_storage_map ) {
__kmp_print_team_storage_map( "team", team, team->t.t_id, new_nproc );
@@ -4838,22 +4905,21 @@ __kmp_free_team( kmp_root_t *root, kmp_t
/* if we are non-hot team, release our threads */
if( ! use_hot_team ) {
-
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
- kmp_task_team_t *task_team = team->t.t_task_team;
- if ( task_team != NULL ) {
- //
- // Signal the worker threads to stop looking for tasks while
- // spin waiting. The task teams are reference counted and will
- // be deallocated by the last worker thread via the thread's
- // pointer to the task team.
- //
- KA_TRACE( 20, ( "__kmp_free_team: deactivating task_team %p\n",
- task_team ) );
- KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
- TCW_SYNC_4( task_team->tt.tt_active, FALSE );
- KMP_MB();
- team->t.t_task_team = NULL;
+ int tt_idx;
+ for (tt_idx=0; tt_idx<2; ++tt_idx) {
+ // We don't know which of the two task teams workers are waiting on, so deactivate both.
+ kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
+ if ( task_team != NULL ) {
+ // Signal the worker threads to stop looking for tasks while spin waiting. The task
+ // teams are reference counted and will be deallocated by the last worker thread via the
+ // thread's pointer to the task team.
+ KA_TRACE( 20, ( "__kmp_free_team: deactivating task_team %p\n", task_team ) );
+ KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
+ TCW_SYNC_4( task_team->tt.tt_active, FALSE );
+ KMP_MB();
+ team->t.t_task_team[tt_idx] = NULL;
+ }
}
}
@@ -5263,6 +5329,11 @@ __kmp_reap_thread(
thread->th.th_pri_common = NULL;
}; // if
+ if (thread->th.th_task_state_memo_stack != NULL) {
+ __kmp_free(thread->th.th_task_state_memo_stack);
+ thread->th.th_task_state_memo_stack = NULL;
+ }
+
#if KMP_USE_BGET
if ( thread->th.th_local.bget_data != NULL ) {
__kmp_finalize_bget( thread );
Modified: openmp/trunk/runtime/src/kmp_tasking.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_tasking.c?rev=228718&r1=228717&r2=228718&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_tasking.c (original)
+++ openmp/trunk/runtime/src/kmp_tasking.c Tue Feb 10 12:37:43 2015
@@ -284,7 +284,7 @@ __kmp_push_task(kmp_int32 gtid, kmp_task
// Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
- if ( ! KMP_TASKING_ENABLED( task_team, thread->th.th_task_state ) ) {
+ if ( ! KMP_TASKING_ENABLED(task_team) ) {
__kmp_enable_tasking( task_team, thread );
}
KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
@@ -1180,7 +1180,7 @@ __kmpc_omp_taskyield( ident_t *loc_ref,
if ( ! taskdata->td_flags.team_serial ) {
kmp_task_team_t * task_team = thread->th.th_task_team;
if (task_team != NULL) {
- if (KMP_TASKING_ENABLED(task_team, thread->th.th_task_state)) {
+ if (KMP_TASKING_ENABLED(task_team)) {
__kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
}
@@ -2101,7 +2101,6 @@ __kmp_allocate_task_team( kmp_info_t *th
TCW_4(task_team -> tt.tt_found_tasks, FALSE);
task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
- task_team -> tt.tt_state = 0;
TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
TCW_4( task_team -> tt.tt_active, TRUE );
TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
@@ -2270,13 +2269,12 @@ __kmp_wait_to_unref_task_teams(void)
// __kmp_task_team_setup: Create a task_team for the current team, but use
// an already created, unused one if it already exists.
// This may be called by any thread, but only for teams with # threads >1.
-
void
-__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team )
+__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int both )
{
KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
- if ( ( team->t.t_task_team == NULL ) && ( team->t.t_nproc > 1 ) ) {
+ if ( ( team->t.t_task_team[this_thr->th.th_task_state] == NULL ) && ( team->t.t_nproc > 1 ) ) {
// Allocate a new task team, which will be propagated to
// all of the worker threads after the barrier. As they
// spin in the barrier release phase, then will continue
@@ -2284,22 +2282,24 @@ __kmp_task_team_setup( kmp_info_t *this_
// the signal to stop checking for tasks (they can't safely
// reference the kmp_team_t struct, which could be reallocated
// by the master thread).
- team->t.t_task_team = __kmp_allocate_task_team( this_thr, team );
- KA_TRACE( 20, ( "__kmp_task_team_setup: Master T#%d created new "
- "task_team %p for team %d\n",
- __kmp_gtid_from_thread( this_thr ), team->t.t_task_team,
+ team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
+ KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
+ __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
((team != NULL) ? team->t.t_id : -1)) );
}
- else {
+ //else
// All threads have reported in, and no tasks were spawned
// for this release->gather region. Leave the old task
// team struct in place for the upcoming region. No task
// teams are formed for serialized teams.
+ if (both) {
+ int other_team = 1 - this_thr->th.th_task_state;
+ if ( ( team->t.t_task_team[other_team] == NULL ) && ( team->t.t_nproc > 1 ) ) { // setup other team as well
+ team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
+ KA_TRACE( 20, ( "__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
+ __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
+ ((team != NULL) ? team->t.t_id : -1)) );
}
- if ( team->t.t_task_team != NULL ) {
- // Toggle the state flag so that we can tell which side of
- // the barrier we are on.
- team->t.t_task_team->tt.tt_state = 1 - this_thr->th.th_task_state;
}
}
@@ -2314,35 +2314,20 @@ __kmp_task_team_sync( kmp_info_t *this_t
{
KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
- // On the rare chance that this thread never saw that the task
- // team was no longer active, then unref/deallocate it now.
+ // In case this thread never saw that the task team was no longer active, unref/deallocate it now.
if ( this_thr->th.th_task_team != NULL ) {
if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
__kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
- } else {
- //
- // We are re-using a task team that was never enabled.
- //
- KMP_DEBUG_ASSERT( this_thr->th.th_task_team == team->t.t_task_team );
+ } else { // We are re-using a task team that was never enabled.
+ KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
}
}
- //
- // It is now safe to propagate the task team pointer from the
- // team struct to the current thread.
- //
- TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team);
- if ( this_thr->th.th_task_team != NULL ) {
- //
- // Toggle the th_task_state field, instead of reading it from
- // the task team. Reading the tt_state field at this point
- // causes a 30% regression on EPCC parallel - toggling it
- // is much cheaper.
- //
+ // Toggle the th_task_state field, to switch which task_team this thread refers to
this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
- KMP_DEBUG_ASSERT( this_thr->th.th_task_state == TCR_4(team->t.t_task_team->tt.tt_state) );
- }
+ // It is now safe to propagate the task team pointer from the team struct to the current thread.
+ TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
KA_TRACE( 20, ( "__kmp_task_team_sync: Thread T#%d task team assigned pointer (%p) from Team #%d task team\n",
__kmp_gtid_from_thread( this_thr ), &this_thr->th.th_task_team,
this_thr->th.th_task_team, ((team != NULL) ? (team->t.t_id) : -1) ) );
@@ -2350,41 +2335,31 @@ __kmp_task_team_sync( kmp_info_t *this_t
//------------------------------------------------------------------------------
-// __kmp_task_team_wait: Master thread waits for outstanding tasks after
-// the barrier gather phase. Only called by master thread if #threads
-// in team > 1 !
-
+// __kmp_task_team_wait: Master thread waits for outstanding tasks after the
+// barrier gather phase. Only called by master thread if #threads in team > 1 !
void
-__kmp_task_team_wait( kmp_info_t *this_thr,
- kmp_team_t *team
+__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
USE_ITT_BUILD_ARG(void * itt_sync_obj)
)
{
- kmp_task_team_t *task_team = team->t.t_task_team;
+ kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
- if ( ( task_team != NULL ) && KMP_TASKING_ENABLED( task_team, this_thr->th.th_task_state ) ) {
+ if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d waiting for all tasks: task_team = %p\n",
__kmp_gtid_from_thread( this_thr ), task_team ) );
- //
- // All worker threads might have dropped through to the
- // release phase, but could still be executing tasks.
- // Wait here for all tasks to complete. To avoid memory
- // contention, only the master thread checks for the
- // termination condition.
- //
+ // All worker threads might have dropped through to the release phase, but could still
+ // be executing tasks. Wait here for all tasks to complete. To avoid memory contention,
+ // only the master thread checks for the termination condition.
kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
flag.wait(this_thr, TRUE
USE_ITT_BUILD_ARG(itt_sync_obj));
- //
- // Kill the old task team, so that the worker threads will
- // stop referencing it while spinning. They will
- // deallocate it when the reference count reaches zero.
+ // Kill the old task team, so that the worker threads will stop referencing it while spinning.
+ // They will deallocate it when the reference count reaches zero.
// The master thread is not included in the ref count.
- //
KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d deactivating task_team %p\n",
__kmp_gtid_from_thread( this_thr ), task_team ) );
KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
@@ -2392,7 +2367,7 @@ __kmp_task_team_wait( kmp_info_t *this_t
KMP_MB();
TCW_PTR(this_thr->th.th_task_team, NULL);
- team->t.t_task_team = NULL;
+ team->t.t_task_team[this_thr->th.th_task_state] = NULL;
}
}
@@ -2408,7 +2383,7 @@ __kmp_task_team_wait( kmp_info_t *this_t
void
__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
{
- volatile kmp_uint32 *spin = &team->t.t_task_team->tt.tt_unfinished_threads;
+ volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
int flag = FALSE;
KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
Modified: openmp/trunk/runtime/src/kmp_wait_release.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_wait_release.h?rev=228718&r1=228717&r2=228718&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_wait_release.h (original)
+++ openmp/trunk/runtime/src/kmp_wait_release.h Tue Feb 10 12:37:43 2015
@@ -143,7 +143,7 @@ static inline void __kmp_wait_template(k
if (!TCR_SYNC_4(task_team->tt.tt_active)) {
KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
__kmp_unref_task_team(task_team, this_thr);
- } else if (KMP_TASKING_ENABLED(task_team, this_thr->th.th_task_state)) {
+ } else if (KMP_TASKING_ENABLED(task_team)) {
flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed
USE_ITT_BUILD_ARG(itt_sync_obj), 0);
}
More information about the Openmp-commits
mailing list