[Openmp-commits] [openmp] r252082 - Refactor of task_team code.

Jonathan Peyton via Openmp-commits openmp-commits at lists.llvm.org
Wed Nov 4 13:37:48 PST 2015

Author: jlpeyton
Date: Wed Nov  4 15:37:48 2015
New Revision: 252082

URL: http://llvm.org/viewvc/llvm-project?rev=252082&view=rev
Refactor of task_team code.

This is a refactoring of the task_team code that more elegantly handles the two
task_team case. Two task_teams per team are kept in use for the lifetime of the
team. Thus no reference counting is needed.

Differential Revision: http://reviews.llvm.org/D13993


Modified: openmp/trunk/runtime/src/kmp.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp.h?rev=252082&r1=252081&r2=252082&view=diff
--- openmp/trunk/runtime/src/kmp.h (original)
+++ openmp/trunk/runtime/src/kmp.h Wed Nov  4 15:37:48 2015
@@ -2100,14 +2100,6 @@ typedef struct kmp_base_task_team {
     volatile kmp_uint32     tt_active;             /* is the team still actively executing tasks */
-    kmp_int32               tt_padme[INTERNODE_CACHE_LINE/sizeof(kmp_int32)];
-    volatile kmp_uint32     tt_ref_ct;             /* #threads accessing struct  */
-                                                   /* (not incl. master)         */
 } kmp_base_task_team_t;
 union KMP_ALIGN_CACHE kmp_task_team {
@@ -3172,15 +3164,16 @@ int __kmp_execute_tasks_oncore(kmp_info_
 #endif /* USE_ITT_BUILD */
                                kmp_int32 is_constrained);
+extern void __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team );
 extern void __kmp_reap_task_teams( void );
-extern void __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread );
 extern void __kmp_wait_to_unref_task_teams( void );
-extern void __kmp_task_team_setup ( kmp_info_t *this_thr, kmp_team_t *team, int both, int always );
+extern void __kmp_task_team_setup ( kmp_info_t *this_thr, kmp_team_t *team, int always );
 extern void __kmp_task_team_sync  ( kmp_info_t *this_thr, kmp_team_t *team );
 extern void __kmp_task_team_wait  ( kmp_info_t *this_thr, kmp_team_t *team
                                     , void * itt_sync_obj
 #endif /* USE_ITT_BUILD */
+                                    , int wait=1
 extern void __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid );

Modified: openmp/trunk/runtime/src/kmp_barrier.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_barrier.cpp?rev=252082&r1=252081&r2=252082&view=diff
--- openmp/trunk/runtime/src/kmp_barrier.cpp (original)
+++ openmp/trunk/runtime/src/kmp_barrier.cpp Wed Nov  4 15:37:48 2015
@@ -1153,7 +1153,7 @@ __kmp_barrier(enum barrier_type bt, int
             if (__kmp_tasking_mode != tskm_immediate_exec) {
                 __kmp_task_team_wait(this_thr, team
                                      USE_ITT_BUILD_ARG(itt_sync_obj) );
-                __kmp_task_team_setup(this_thr, team, 0, 0); // use 0,0 to only setup the current team if nthreads > 1
+                __kmp_task_team_setup(this_thr, team, 0); // use 0 to only setup the current team if nthreads > 1
             // Let the debugger know: All threads are arrived and starting leaving the barrier.
@@ -1261,7 +1261,7 @@ __kmp_barrier(enum barrier_type bt, int
                 KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE);
                 __kmp_task_team_wait(this_thr, team
-                __kmp_task_team_setup(this_thr, team, 0, 0);
+                __kmp_task_team_setup(this_thr, team, 0);
                 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
@@ -1575,7 +1575,7 @@ __kmp_fork_barrier(int gtid, int tid)
         if (__kmp_tasking_mode != tskm_immediate_exec) {
-            __kmp_task_team_setup(this_thr, team, 1, 0);  // 1,0 indicates setup both task teams if nthreads > 1
+            __kmp_task_team_setup(this_thr, team, 0);  // 0 indicates setup current task team if nthreads > 1
         /* The master thread may have changed its blocktime between the join barrier and the
@@ -1614,14 +1614,7 @@ __kmp_fork_barrier(int gtid, int tid)
     // Early exit for reaping threads releasing forkjoin barrier
     if (TCR_4(__kmp_global.g.g_done)) {
-        if (this_thr->th.th_task_team != NULL) {
-            if (KMP_MASTER_TID(tid)) {
-                TCW_PTR(this_thr->th.th_task_team, NULL);
-            }
-            else {
-                __kmp_unref_task_team(this_thr->th.th_task_team, this_thr);
-            }
-        }
+        this_thr->th.th_task_team = NULL;
         if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {

Modified: openmp/trunk/runtime/src/kmp_runtime.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_runtime.c?rev=252082&r1=252081&r2=252082&view=diff
--- openmp/trunk/runtime/src/kmp_runtime.c (original)
+++ openmp/trunk/runtime/src/kmp_runtime.c Wed Nov  4 15:37:48 2015
@@ -2104,23 +2104,31 @@ __kmp_fork_call(
             // Take a memo of master's task_state
             if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size
-                kmp_uint8 *old_stack, *new_stack = (kmp_uint8 *) __kmp_allocate( 2*master_th->th.th_task_state_stack_sz );
+                kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz;
+                kmp_uint8 *old_stack, *new_stack;
                 kmp_uint32 i;
+                new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
                 for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
                     new_stack[i] = master_th->th.th_task_state_memo_stack[i];
+                for (i=master_th->th.th_task_state_stack_sz; i<new_size; ++i) { // zero-init rest of stack
+                    new_stack[i] = 0;
+                }
                 old_stack = master_th->th.th_task_state_memo_stack;
                 master_th->th.th_task_state_memo_stack = new_stack;
-                master_th->th.th_task_state_stack_sz *= 2;
+                master_th->th.th_task_state_stack_sz = new_size;
             // Store master's task_state on stack
             master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
-            master_th->th.th_task_state = 0;
+            if (team == master_th->th.th_hot_teams[level].hot_team) { // Restore master's nested state if nested hot team
+                master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
+            }
+            else {
+                master_th->th.th_task_state = 0;
+            }
-        master_th->th.th_task_team = team->t.t_task_team[master_th->th.th_task_state];
         KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
@@ -2410,12 +2418,7 @@ __kmp_join_call(ident_t *loc, int gtid
             int old_num = master_th->th.th_team_nproc;
             int new_num = master_th->th.th_teams_size.nth;
             kmp_info_t **other_threads = team->t.t_threads;
-            kmp_task_team_t * task_team = master_th->th.th_task_team;
             team->t.t_nproc = new_num;
-            if ( task_team ) { // task team might have lesser value of counters
-                task_team->tt.tt_ref_ct = new_num - 1;
-                task_team->tt.tt_unfinished_threads = new_num;
-            }
             for ( i = 0; i < old_num; ++i ) {
                 other_threads[i]->th.th_team_nproc = new_num;
@@ -2509,18 +2512,18 @@ __kmp_join_call(ident_t *loc, int gtid
     if ( __kmp_tasking_mode != tskm_immediate_exec ) {
-        // Restore task state from memo stack
-        KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
-        if (master_th->th.th_task_state_top > 0) {
+        if (master_th->th.th_task_state_top > 0) { // Restore task state from memo stack
+            KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
+            // Remember master's state if we re-use this nested hot team
+            master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
             --master_th->th.th_task_state_top; // pop
+            // Now restore state at this level
             master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
-        // Copy the first task team from the new child / old parent team to the thread and reset state flag.
+        // Copy the task team from the parent team to the master thread
         master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
         KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
-                        __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
-                        parent_team ) );
+                        __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) );
      // TODO: GEH - cannot do this assertion because root thread not set up as executing
@@ -2615,31 +2618,13 @@ __kmp_set_num_threads( int new_nth, int
         __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
-        if ( __kmp_tasking_mode != tskm_immediate_exec ) {
-            int tt_idx;
-            for (tt_idx=0; tt_idx<2; ++tt_idx) {
-                kmp_task_team_t *task_team = hot_team->t.t_task_team[tt_idx];
-                if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
-                    // Signal worker threads (esp. the extra ones) to stop looking for tasks while spin waiting.
-                    // The task teams are reference counted and will be deallocated by the last worker thread.
-                    KMP_DEBUG_ASSERT( hot_team->t.t_nproc > 1 );
-                    TCW_SYNC_4( task_team->tt.tt_active, FALSE );
-                    KMP_MB();
-                    KA_TRACE( 20, ( "__kmp_set_num_threads: setting task_team %p to NULL\n",
-                                    &hot_team->t.t_task_team[tt_idx] ) );
-                    hot_team->t.t_task_team[tt_idx] = NULL;
-                }
-                else {
-                    KMP_DEBUG_ASSERT( task_team == NULL );
-                }
-            }
-        }
-        //
         // Release the extra threads we don't need any more.
-        //
         for ( f = new_nth;  f < hot_team->t.t_nproc; f++ ) {
             KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
+            if ( __kmp_tasking_mode != tskm_immediate_exec) {
+                // When decreasing team size, threads no longer in the team should unref task team.
+                hot_team->t.t_threads[f]->th.th_task_team = NULL;
+            }
             __kmp_free_thread( hot_team->t.t_threads[f] );
             hot_team->t.t_threads[f] =  NULL;
@@ -4081,7 +4066,6 @@ __kmp_initialize_info( kmp_info_t *this_
     TCW_PTR(this_thr->th.th_sleep_loc, NULL);
     KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata );
-    this_thr->th.th_task_state = 0;
     KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
                     tid, gtid, this_thr, this_thr->th.th_current_task ) );
@@ -4151,9 +4135,12 @@ __kmp_initialize_info( kmp_info_t *this_
     this_thr->th.th_next_pool = NULL;
     if (!this_thr->th.th_task_state_memo_stack) {
+        size_t i;
         this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) );
         this_thr->th.th_task_state_top = 0;
         this_thr->th.th_task_state_stack_sz = 4;
+        for (i=0; i<this_thr->th.th_task_state_stack_sz; ++i) // zero init the stack
+            this_thr->th.th_task_state_memo_stack[i] = 0;
     KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
@@ -4211,6 +4198,7 @@ __kmp_allocate_thread( kmp_root_t *root,
         TCW_4(__kmp_nth, __kmp_nth + 1);
+        new_thr->th.th_task_state = 0;
         new_thr->th.th_task_state_top = 0;
         new_thr->th.th_task_state_stack_sz = 4;
@@ -4896,26 +4884,6 @@ __kmp_allocate_team( kmp_root_t *root, i
             KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
             team->t.t_size_changed = 1;
-            if ( __kmp_tasking_mode != tskm_immediate_exec ) {
-                // Signal the worker threads (esp. extra ones) to stop looking for tasks while spin waiting.
-                // The task teams are reference counted and will be deallocated by the last worker thread.
-                int tt_idx;
-                for (tt_idx=0; tt_idx<2; ++tt_idx) {
-                    // We don't know which of the two task teams workers are waiting on, so deactivate both.
-                    kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
-                    if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
-                        KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
-                        TCW_SYNC_4( task_team->tt.tt_active, FALSE );
-                        KMP_MB();
-                        KA_TRACE(20, ("__kmp_allocate_team: setting task_team %p to NULL\n",
-                                      &team->t.t_task_team[tt_idx]));
-                        team->t.t_task_team[tt_idx] = NULL;
-                    }
-                    else {
-                        KMP_DEBUG_ASSERT( task_team == NULL );
-                    }
-                }
-            }
             if( __kmp_hot_teams_mode == 0 ) {
                 // AC: saved number of threads should correspond to team's value in this mode,
@@ -4926,6 +4894,10 @@ __kmp_allocate_team( kmp_root_t *root, i
                 /* release the extra threads we don't need any more */
                 for( f = new_nproc  ;  f < team->t.t_nproc  ;  f++ ) {
                     KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
+                    if ( __kmp_tasking_mode != tskm_immediate_exec) {
+                        // When decreasing team size, threads no longer in the team should unref task team.
+                        team->t.t_threads[f]->th.th_task_team = NULL;
+                    }
                     __kmp_free_thread( team->t.t_threads[ f ] );
                     team->t.t_threads[ f ] = NULL;
@@ -4937,32 +4909,9 @@ __kmp_allocate_team( kmp_root_t *root, i
             team->t.t_sched =  new_icvs->sched;
             __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
-            if ( __kmp_tasking_mode != tskm_immediate_exec ) {
-                // Init both task teams
-                int tt_idx;
-                for (tt_idx=0; tt_idx<2; ++tt_idx) {
-                    kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
-                    if ( task_team != NULL ) {
-                        KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
-                        task_team->tt.tt_nproc = new_nproc;
-                        task_team->tt.tt_unfinished_threads = new_nproc;
-                        task_team->tt.tt_ref_ct = new_nproc - 1;
-                    }
-                }
-            }
             /* update the remaining threads */
-            if (level) {
-                team->t.t_threads[0]->th.th_team_nproc = new_nproc;
-                for(f = 1; f < new_nproc; ++f) {
-                    team->t.t_threads[f]->th.th_team_nproc = new_nproc;
-                    team->t.t_threads[f]->th.th_task_state = 0;
-                }
-            }
-            else {
-                for(f = 0; f < new_nproc; ++f) {
-                    team->t.t_threads[f]->th.th_team_nproc = new_nproc;
-                }
+            for(f = 0; f < new_nproc; ++f) {
+                team->t.t_threads[f]->th.th_team_nproc = new_nproc;
             // restore the current task state of the master thread: should be the implicit task
             KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
@@ -5076,39 +5025,24 @@ __kmp_allocate_team( kmp_root_t *root, i
             } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
             /* make sure everyone is syncronized */
+            int old_nproc = team->t.t_nproc; // save old value and use to update only new threads below
             __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
-            if ( __kmp_tasking_mode != tskm_immediate_exec ) {
-                // Signal the worker threads to stop looking for tasks while spin waiting.
-                // The task teams are reference counted and will be deallocated by the last worker thread.
-                int tt_idx;
-                for (tt_idx=0; tt_idx<2; ++tt_idx) {
-                    // We don't know which of the two task teams workers are waiting on, so deactivate both.
-                    kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
-                    if ( (task_team != NULL) && TCR_SYNC_4(task_team->tt.tt_active) ) {
-                        TCW_SYNC_4( task_team->tt.tt_active, FALSE );
-                        team->t.t_task_team[tt_idx] = NULL;
-                    }
-                }
-            }
             /* reinitialize the threads */
             KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
-            if (level) {
-                int old_state = team->t.t_threads[0]->th.th_task_state;
-                for (f=0;  f < team->t.t_nproc; ++f)
-                    __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
-                // th_task_state for master thread will be put in stack of states in __kmp_fork_call()
-                // before zeroing, for workers it was just zeroed in __kmp_initialize_info()
-                team->t.t_threads[0]->th.th_task_state = old_state;
-            }
-            else {
-                int old_state = team->t.t_threads[0]->th.th_task_state;
-                for (f=0;  f<team->t.t_nproc; ++f) {
-                    __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
+            for (f=0;  f < team->t.t_nproc; ++f)
+                __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
+            if (level) { // set th_task_state for new threads in nested hot team
+                // __kmp_initialize_info() no longer zeroes th_task_state, so we should only need to set the
+                // th_task_state for the new threads. th_task_state for master thread will not be accurate until 
+                // after this in __kmp_fork_call(), so we look to the master's memo_stack to get the correct value.
+                for (f=old_nproc; f < team->t.t_nproc; ++f)
+                    team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level];
+            }
+            else { // set th_task_state for new threads in non-nested hot team
+                int old_state = team->t.t_threads[0]->th.th_task_state; // copy master's state
+                for (f=old_nproc; f < team->t.t_nproc; ++f)
                     team->t.t_threads[f]->th.th_task_state = old_state;
-                    team->t.t_threads[f]->th.th_task_team = team->t.t_task_team[old_state];
-                }
 #ifdef KMP_DEBUG
@@ -5342,18 +5276,17 @@ __kmp_free_team( kmp_root_t *root, kmp_t
     /* if we are non-hot team, release our threads */
     if( ! use_hot_team ) {
         if ( __kmp_tasking_mode != tskm_immediate_exec ) {
+            // Delete task teams
             int tt_idx;
             for (tt_idx=0; tt_idx<2; ++tt_idx) {
-                // We don't know which of the two task teams workers are waiting on, so deactivate both.
                 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
                 if ( task_team != NULL ) {
-                    // Signal the worker threads to stop looking for tasks while spin waiting.  The task
-                    // teams are reference counted and will be deallocated by the last worker thread via the
-                    // thread's pointer to the task team.
-                    KA_TRACE( 20, ( "__kmp_free_team: deactivating task_team %p\n", task_team ) );
+                    for (f=0; f<team->t.t_nproc; ++f) { // Have all threads unref task teams
+                        team->t.t_threads[f]->th.th_task_team = NULL;
+                    }
+                    KA_TRACE( 20, ( "__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) );
                     KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
-                    TCW_SYNC_4( task_team->tt.tt_active, FALSE );
-                    KMP_MB();
+                    __kmp_free_task_team( master, task_team );
                     team->t.t_task_team[tt_idx] = NULL;
@@ -5452,6 +5385,7 @@ __kmp_free_thread( kmp_info_t *this_th )
             balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
         balign[b].bb.team = NULL;
+    this_th->th.th_task_state = 0;
     /* put thread back on the free pool */
@@ -5622,9 +5556,7 @@ __kmp_launch_thread( kmp_info_t *this_th
-    if ( TCR_PTR( this_thr->th.th_task_team ) != NULL ) {
-        __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
-    }
+    this_thr->th.th_task_team = NULL;
     /* run the destructors for the threadprivate data for this thread */
     __kmp_common_destroy_gtid( gtid );
@@ -6120,10 +6052,7 @@ __kmp_internal_end_thread( int gtid_req
             KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
             if ( gtid >= 0 ) {
-                kmp_info_t *this_thr = __kmp_threads[ gtid ];
-                if (TCR_PTR(this_thr->th.th_task_team) != NULL) {
-                    __kmp_unref_task_team(this_thr->th.th_task_team, this_thr);
-                }
+                __kmp_threads[gtid]->th.th_task_team = NULL;
             KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));

Modified: openmp/trunk/runtime/src/kmp_tasking.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_tasking.c?rev=252082&r1=252081&r2=252082&view=diff
--- openmp/trunk/runtime/src/kmp_tasking.c (original)
+++ openmp/trunk/runtime/src/kmp_tasking.c Wed Nov  4 15:37:48 2015
@@ -895,7 +895,7 @@ __kmp_task_alloc( ident_t *loc_ref, kmp_
             KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
-            __kmp_task_team_setup(thread,team,0,1); // 0,1 indicates only setup the current team regardless of nthreads
+            __kmp_task_team_setup(thread,team,1); // 1 indicates setup the current team regardless of nthreads
             thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
         kmp_task_team_t * task_team = thread->th.th_task_team;
@@ -1297,8 +1297,7 @@ __kmpc_omp_taskwait( ident_t *loc_ref, k
     kmp_info_t * thread;
     int thread_finished = FALSE;
-    KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n",
-                  gtid, loc_ref) );
+    KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) );
     if ( __kmp_tasking_mode != tskm_immediate_exec ) {
         // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
@@ -1688,7 +1687,7 @@ static inline int __kmp_execute_tasks_te
     KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
     task_team = thread -> th.th_task_team;
-    KMP_DEBUG_ASSERT( task_team != NULL );
+    if (task_team == NULL) return FALSE;
     KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
                   gtid, final_spin, *thread_finished) );
@@ -1732,6 +1731,7 @@ static inline int __kmp_execute_tasks_te
             KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
             return TRUE;
+        if (thread->th.th_task_team == NULL) break;
         KMP_YIELD( __kmp_library == library_throughput );   // Yield before executing next task
@@ -1767,6 +1767,7 @@ static inline int __kmp_execute_tasks_te
+    if (thread->th.th_task_team == NULL) return FALSE;
     // check if there are other threads to steal from, otherwise go back
     if ( nthreads  == 1 )
@@ -1805,6 +1806,7 @@ static inline int __kmp_execute_tasks_te
                 return TRUE;
+            if (thread->th.th_task_team == NULL) break;
             KMP_YIELD( __kmp_library == library_throughput );   // Yield before executing next task
             // If the execution of the stolen task resulted in more tasks being
             // placed on our run queue, then restart the whole process.
@@ -1851,6 +1853,7 @@ static inline int __kmp_execute_tasks_te
                 return TRUE;
+        if (thread->th.th_task_team == NULL) return FALSE;
     // Find a different thread to steal work from.  Pick a random thread.
@@ -1919,6 +1922,7 @@ static inline int __kmp_execute_tasks_te
                               gtid) );
                 return TRUE;
+            if (thread->th.th_task_team == NULL) break;
             KMP_YIELD( __kmp_library == library_throughput );   // Yield before executing next task
             // If the execution of the stolen task resulted in more tasks being
@@ -1966,6 +1970,7 @@ static inline int __kmp_execute_tasks_te
                 return TRUE;
+        if (thread->th.th_task_team == NULL) return FALSE;
     KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
@@ -2350,10 +2355,9 @@ __kmp_allocate_task_team( kmp_info_t *th
     TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
     TCW_4( task_team -> tt.tt_active, TRUE );
-    TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
-    KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p\n",
-                    (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team ) );
+    KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p unfinished_threads init'd to %d\n",
+                    (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team, task_team -> tt.tt_unfinished_threads) );
     return task_team;
@@ -2362,16 +2366,13 @@ __kmp_allocate_task_team( kmp_info_t *th
 // __kmp_free_task_team:
 // Frees the task team associated with a specific thread, and adds it
 // to the global task team free list.
-static void
 __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
     KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
                     thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
-    KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_ref_ct) == 0 );
     // Put task team back on free list
     __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
@@ -2412,32 +2413,6 @@ __kmp_reap_task_teams( void )
-// __kmp_unref_task_teams:
-// Remove one thread from referencing the task team structure by
-// decreasing the reference count and deallocate task team if no more
-// references to it.
-__kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread )
-    kmp_uint ref_ct;
-    ref_ct = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& task_team->tt.tt_ref_ct) ) - 1;
-    KA_TRACE( 20, ( "__kmp_unref_task_team: T#%d task_team = %p ref_ct = %d\n",
-                    __kmp_gtid_from_thread( thread ), task_team, ref_ct ) );
-    if ( ref_ct == 0 ) {
-        __kmp_free_task_team( thread, task_team );
-    }
-    TCW_PTR( *((volatile kmp_task_team_t **)(&thread->th.th_task_team)), NULL );
 // __kmp_wait_to_unref_task_teams:
 // Some threads could still be in the fork barrier release code, possibly
@@ -2475,9 +2450,7 @@ __kmp_wait_to_unref_task_teams(void)
             // TODO: GEH - add this check for Linux* OS / OS X* as well?
             if (!__kmp_is_thread_alive(thread, &exit_val)) {
-                if (TCR_PTR(thread->th.th_task_team) != NULL) {
-                    __kmp_unref_task_team( thread->th.th_task_team, thread );
-                }
+                thread->th.th_task_team = NULL;
@@ -2517,34 +2490,46 @@ __kmp_wait_to_unref_task_teams(void)
 // an already created, unused one if it already exists.
 // This may be called by any thread, but only for teams with # threads >1.
-__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int both, int always )
+__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int always )
     KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
-    if ( ( team->t.t_task_team[this_thr->th.th_task_state] == NULL ) && ( always || team->t.t_nproc > 1 ) ) {
-        // Allocate a new task team, which will be propagated to
-        // all of the worker threads after the barrier.  As they
-        // spin in the barrier release phase, then will continue
-        // to use the previous task team struct, until they receive
-        // the signal to stop checking for tasks (they can't safely
-        // reference the kmp_team_t struct, which could be reallocated
-        // by the master thread).
+    // If this task_team hasn't been created yet, allocate it. It will be used in the region after the next.
+    // If it exists, it is the current task team and shouldn't be touched yet as it may still be in use.
+    if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && (always || team->t.t_nproc > 1) ) { 
         team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
         KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n",
                       __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
                       ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
-    // else: Either all threads have reported in, and no tasks were spawned for this release->gather region
-    // Leave the old task team struct in place for the upcoming region.
-    // No task teams are formed for serialized teams.
-    if (both) {
-        int other_team = 1 - this_thr->th.th_task_state;
-        if ( ( team->t.t_task_team[other_team] == NULL ) && ( team->t.t_nproc > 1 ) ) { // setup other team as well
-            team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
-            KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n",
-                          __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
-                          ((team != NULL) ? team->t.t_id : -1), other_team ));
+    // After threads exit the release, they will call sync, and then point to this other task_team; make sure it is 
+    // allocated and properly initialized. As threads spin in the barrier release phase, they will continue to use the
+    // previous task_team struct(above), until they receive the signal to stop checking for tasks (they can't safely
+    // reference the kmp_team_t struct, which could be reallocated by the master thread). No task teams are formed for 
+    // serialized teams.
+    int other_team = 1 - this_thr->th.th_task_state;
+    if (team->t.t_task_team[other_team] == NULL && team->t.t_nproc > 1) { // setup other team as well
+        team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
+        KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n",
+                      __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
+                      ((team != NULL) ? team->t.t_id : -1), other_team ));
+    }
+    else { // Leave the old task team struct in place for the upcoming region; adjust as needed
+        kmp_task_team_t *task_team = team->t.t_task_team[other_team];
+        if (!task_team->tt.tt_active || team->t.t_nproc != task_team->tt.tt_nproc) {
+            TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
+            TCW_4(task_team->tt.tt_found_tasks, FALSE);
+            TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
+            TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc );
+            TCW_4(task_team->tt.tt_active, TRUE );
+        // if team size has changed, the first thread to enable tasking will realloc threads_data if necessary
+        KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team %p for team %d at parity=%d\n",
+                      __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
+                      ((team != NULL) ? team->t.t_id : -1), other_team ));
@@ -2559,26 +2544,11 @@ __kmp_task_team_sync( kmp_info_t *this_t
     KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
-    // In case this thread never saw that the task team was no longer active, unref/deallocate it now.
-    if ( this_thr->th.th_task_team != NULL ) {
-        if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
-            KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
-            KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team (%p)is not active, unrefing\n",
-                          __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team));
-            __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
-        } 
-#if KMP_DEBUG       
-        else {  // We are re-using a task team that was never enabled.
-            KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
-        }
-    }
     // Toggle the th_task_state field, to switch which task_team this thread refers to
     this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
     // It is now safe to propagate the task team pointer from the team struct to the current thread.
     TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
-    KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to %p from Team #%d task team (parity=%d)\n",
+    KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to task_team %p from Team #%d (parity=%d)\n",
                   __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team,
                   ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
@@ -2586,11 +2556,14 @@ __kmp_task_team_sync( kmp_info_t *this_t
 // __kmp_task_team_wait: Master thread waits for outstanding tasks after the barrier gather
-// phase.  Only called by master thread if #threads in team > 1 or if proxy tasks were created
+// phase.  Only called by master thread if #threads in team > 1 or if proxy tasks were created.
+// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off by passing in 0
+// optionally as the last argument. When wait is zero, master thread does not wait for
+// unfinished_threads to reach 0.
 __kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
                       USE_ITT_BUILD_ARG(void * itt_sync_obj)
-                      )
+                      , int wait)
     kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
@@ -2598,18 +2571,18 @@ __kmp_task_team_wait( kmp_info_t *this_t
     KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
     if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
-        KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n",
-                      __kmp_gtid_from_thread(this_thr), task_team));
-        // Worker threads may have dropped through to release phase, but could still be executing tasks. Wait
-        // here for tasks to complete. To avoid memory contention, only master thread checks termination condition.
-        kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
-        flag.wait(this_thr, TRUE
-                  USE_ITT_BUILD_ARG(itt_sync_obj));
-        // Kill the old task team, so that the worker threads will stop referencing it while spinning.
-        // They will deallocate it when the reference count reaches zero.
-        // The master thread is not included in the ref count.
-        KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: setting active to false, setting local and team's pointer to NULL\n",
+        if (wait) {
+            KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n",
+                          __kmp_gtid_from_thread(this_thr), task_team));
+            // Worker threads may have dropped through to release phase, but could still be executing tasks. Wait
+            // here for tasks to complete. To avoid memory contention, only master thread checks termination condition.
+            kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
+            flag.wait(this_thr, TRUE
+                      USE_ITT_BUILD_ARG(itt_sync_obj));
+        }
+        // Deactivate the old task team, so that the worker threads will stop referencing it while spinning.
+        KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: "
+                      "setting active to false, setting local and team's pointer to NULL\n",
                       __kmp_gtid_from_thread(this_thr), task_team));
         KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
@@ -2621,7 +2594,6 @@ __kmp_task_team_wait( kmp_info_t *this_t
         TCW_PTR(this_thr->th.th_task_team, NULL);
-        team->t.t_task_team[this_thr->th.th_task_state] = NULL;

Modified: openmp/trunk/runtime/src/kmp_wait_release.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_wait_release.h?rev=252082&r1=252081&r2=252082&view=diff
--- openmp/trunk/runtime/src/kmp_wait_release.h (original)
+++ openmp/trunk/runtime/src/kmp_wait_release.h Wed Nov  4 15:37:48 2015
@@ -178,12 +178,14 @@ static inline void __kmp_wait_template(k
         if (__kmp_tasking_mode != tskm_immediate_exec) {
             task_team = this_thr->th.th_task_team;
             if (task_team != NULL) {
-                if (!TCR_SYNC_4(task_team->tt.tt_active)) {
+                if (TCR_SYNC_4(task_team->tt.tt_active)) {
+                    if (KMP_TASKING_ENABLED(task_team))
+                        flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed
+                                            USE_ITT_BUILD_ARG(itt_sync_obj), 0);
+                }
+                else {
-                    __kmp_unref_task_team(task_team, this_thr);
-                } else if (KMP_TASKING_ENABLED(task_team)) {
-                    flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed
-                                        USE_ITT_BUILD_ARG(itt_sync_obj), 0);
+                    this_thr->th.th_task_team = NULL;
             } // if
         } // if

More information about the Openmp-commits mailing list