[Openmp-commits] [openmp] r228718 - The usage of tt_state flag is replaced by an array of two task_team pointers.

Andrey Churbanov Andrey.Churbanov at intel.com
Tue Feb 10 10:37:44 PST 2015


Author: achurbanov
Date: Tue Feb 10 12:37:43 2015
New Revision: 228718

URL: http://llvm.org/viewvc/llvm-project?rev=228718&view=rev
Log:
The usage of tt_state flag is replaced by an array of two task_team pointers.

Modified:
    openmp/trunk/runtime/src/kmp.h
    openmp/trunk/runtime/src/kmp_barrier.cpp
    openmp/trunk/runtime/src/kmp_csupport.c
    openmp/trunk/runtime/src/kmp_omp.h
    openmp/trunk/runtime/src/kmp_runtime.c
    openmp/trunk/runtime/src/kmp_tasking.c
    openmp/trunk/runtime/src/kmp_wait_release.h

Modified: openmp/trunk/runtime/src/kmp.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp.h?rev=228718&r1=228717&r2=228718&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp.h (original)
+++ openmp/trunk/runtime/src/kmp.h Tue Feb 10 12:37:43 2015
@@ -1852,10 +1852,8 @@ extern kmp_int32 __kmp_task_stealing_con
 
 // The tt_found_tasks flag is a signal to all threads in the team that tasks were spawned and
 // queued since the previous barrier release.
-// State is used to alternate task teams for successive barriers
-#define KMP_TASKING_ENABLED(task_team,state) \
-    ((TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE) && \
-     (TCR_4((task_team)->tt.tt_state)       == (state)))
+#define KMP_TASKING_ENABLED(task_team) \
+    (TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE)
 /*!
 @ingroup BASIC_TYPES
 @{
@@ -2071,8 +2069,6 @@ typedef struct kmp_base_task_team {
 
     volatile kmp_uint32     tt_ref_ct;             /* #threads accessing struct  */
                                                    /* (not incl. master)         */
-    kmp_int32               tt_state;              /* alternating 0/1 for task team identification */
-                                                   /* Note: VERY sensitive to padding! */
 } kmp_base_task_team_t;
 
 union KMP_ALIGN_CACHE kmp_task_team {
@@ -2195,6 +2191,9 @@ typedef struct KMP_ALIGN_CACHE kmp_base_
     kmp_task_team_t    * th_task_team;           // Task team struct
     kmp_taskdata_t     * th_current_task;        // Innermost Task being executed
     kmp_uint8            th_task_state;          // alternating 0/1 for task team identification
+    kmp_uint8          * th_task_state_memo_stack;  // Stack holding memos of th_task_state at nested levels
+    kmp_uint32           th_task_state_top;         // Top element of th_task_state_memo_stack
+    kmp_uint32           th_task_state_stack_sz;    // Size of th_task_state_memo_stack
 
     /*
      * More stuff for keeping track of active/sleeping threads
@@ -2294,7 +2293,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_
     kmp_team_p              *t_parent;       // parent team
     kmp_team_p              *t_next_pool;    // next free team in the team pool
     kmp_disp_t              *t_dispatch;     // thread's dispatch data
-    kmp_task_team_t         *t_task_team;    // Task team struct
+    kmp_task_team_t         *t_task_team[2]; // Task team struct; switch between 2
 #if OMP_40_ENABLED
     kmp_proc_bind_t          t_proc_bind;    // bind type for par region
 #endif // OMP_40_ENABLED
@@ -3100,7 +3099,7 @@ int __kmp_execute_tasks_oncore(kmp_info_
 extern void __kmp_reap_task_teams( void );
 extern void __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread );
 extern void __kmp_wait_to_unref_task_teams( void );
-extern void __kmp_task_team_setup ( kmp_info_t *this_thr, kmp_team_t *team );
+extern void __kmp_task_team_setup ( kmp_info_t *this_thr, kmp_team_t *team, int both );
 extern void __kmp_task_team_sync  ( kmp_info_t *this_thr, kmp_team_t *team );
 extern void __kmp_task_team_wait  ( kmp_info_t *this_thr, kmp_team_t *team
 #if USE_ITT_BUILD

Modified: openmp/trunk/runtime/src/kmp_barrier.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_barrier.cpp?rev=228718&r1=228717&r2=228718&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_barrier.cpp (original)
+++ openmp/trunk/runtime/src/kmp_barrier.cpp Tue Feb 10 12:37:43 2015
@@ -1101,7 +1101,7 @@ __kmp_barrier(enum barrier_type bt, int
             if (__kmp_tasking_mode != tskm_immediate_exec) {
                 __kmp_task_team_wait(this_thr, team
                                      USE_ITT_BUILD_ARG(itt_sync_obj) );
-                __kmp_task_team_setup(this_thr, team);
+                __kmp_task_team_setup(this_thr, team, 0); // use 0 to only setup the current team
             }
 
 
@@ -1189,7 +1189,7 @@ __kmp_barrier(enum barrier_type bt, int
         status = 0;
         if (__kmp_tasking_mode != tskm_immediate_exec) {
             // The task team should be NULL for serialized code (tasks will be executed immediately)
-            KMP_DEBUG_ASSERT(team->t.t_task_team == NULL);
+            KMP_DEBUG_ASSERT(team->t.t_task_team[this_thr->th.th_task_state] == NULL);
             KMP_DEBUG_ASSERT(this_thr->th.th_task_team == NULL);
         }
     }
@@ -1293,9 +1293,9 @@ __kmp_join_barrier(int gtid)
 # ifdef KMP_DEBUG
     if (__kmp_tasking_mode != tskm_immediate_exec) {
         KA_TRACE(20, ( "__kmp_join_barrier: T#%d, old team = %d, old task_team = %p, th_task_team = %p\n",
-                       __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team,
+                       __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state],
                        this_thr->th.th_task_team));
-        KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team);
+        KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
     }
 # endif /* KMP_DEBUG */
 
@@ -1448,7 +1448,7 @@ __kmp_fork_barrier(int gtid, int tid)
 #endif
 
         if (__kmp_tasking_mode != tskm_immediate_exec) {
-            __kmp_task_team_setup(this_thr, team);
+            __kmp_task_team_setup(this_thr, team, 1);  // 1 indicates setup both task teams
         }
 
         /* The master thread may have changed its blocktime between the join barrier and the

Modified: openmp/trunk/runtime/src/kmp_csupport.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_csupport.c?rev=228718&r1=228717&r2=228718&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_csupport.c (original)
+++ openmp/trunk/runtime/src/kmp_csupport.c Tue Feb 10 12:37:43 2015
@@ -494,13 +494,8 @@ __kmpc_end_serialized_parallel(ident_t *
         this_thr -> th.th_current_task -> td_flags.executing = 1;
 
         if ( __kmp_tasking_mode != tskm_immediate_exec ) {
-            //
-            // Copy the task team from the new child / old parent team
-            // to the thread.  If non-NULL, copy the state flag also.
-            //
-            if ( ( this_thr -> th.th_task_team = this_thr -> th.th_team -> t.t_task_team ) != NULL ) {
-                this_thr -> th.th_task_state = this_thr -> th.th_task_team -> tt.tt_state;
-            }
+            // Copy the task team from the new child / old parent team to the thread.
+            this_thr->th.th_task_team = this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
             KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n",
                             global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) );
         }
@@ -1821,11 +1816,10 @@ __kmpc_reduce_nowait(
             teams_swapped = 1;
             th->th.th_info.ds.ds_tid = team->t.t_master_tid;
             th->th.th_team = team->t.t_parent;
-            th->th.th_task_team = th->th.th_team->t.t_task_team;
             th->th.th_team_nproc = th->th.th_team->t.t_nproc;
+            th->th.th_task_team = th->th.th_team->t.t_task_team[0];
             task_state = th->th.th_task_state;
-            if( th->th.th_task_team )
-                th->th.th_task_state = th->th.th_task_team->tt.tt_state;
+            th->th.th_task_state = 0;
         }
     }
 #endif // OMP_40_ENABLED
@@ -1899,8 +1893,8 @@ __kmpc_reduce_nowait(
         // Restore thread structure
         th->th.th_info.ds.ds_tid = 0;
         th->th.th_team = team;
-        th->th.th_task_team = team->t.t_task_team;
         th->th.th_team_nproc = team->t.t_nproc;
+        th->th.th_task_team = team->t.t_task_team[task_state];
         th->th.th_task_state = task_state;
     }
 #endif

Modified: openmp/trunk/runtime/src/kmp_omp.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_omp.h?rev=228718&r1=228717&r2=228718&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_omp.h (original)
+++ openmp/trunk/runtime/src/kmp_omp.h Tue Feb 10 12:37:43 2015
@@ -162,7 +162,6 @@ typedef struct {
     offset_and_size_t  tt_nproc;
     offset_and_size_t  tt_unfinished_threads;
     offset_and_size_t  tt_active;
-    offset_and_size_t  tt_state;
 
     /* kmp_taskdata_t */
     kmp_int32          td_sizeof_struct;

Modified: openmp/trunk/runtime/src/kmp_runtime.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_runtime.c?rev=228718&r1=228717&r2=228718&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_runtime.c (original)
+++ openmp/trunk/runtime/src/kmp_runtime.c Tue Feb 10 12:37:43 2015
@@ -1229,8 +1229,8 @@ __kmp_serialized_parallel(ident_t *loc,
     KMP_MB();
 
     if ( __kmp_tasking_mode != tskm_immediate_exec ) {
-        KMP_DEBUG_ASSERT( this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team );
-        KMP_DEBUG_ASSERT( serial_team->t.t_task_team == NULL );
+        KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
+        KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL );
         KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
                         global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) );
         this_thr->th.th_task_team = NULL;
@@ -1565,7 +1565,7 @@ __kmp_fork_call(
 
 #if KMP_DEBUG
     if ( __kmp_tasking_mode != tskm_immediate_exec ) {
-        KMP_DEBUG_ASSERT( master_th->th.th_task_team == parent_team->t.t_task_team );
+        KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
     }
 #endif
 
@@ -1797,11 +1797,31 @@ __kmp_fork_call(
 
     if ( __kmp_tasking_mode != tskm_immediate_exec ) {
         // Set master's task team to team's task team. Unless this is hot team, it should be NULL.
-        KMP_DEBUG_ASSERT( master_th->th.th_task_team == parent_team->t.t_task_team );
+        KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
         KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
                       __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
-                      parent_team, team->t.t_task_team, team ) );
-        master_th->th.th_task_team = team->t.t_task_team;
+                      parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) );
+        if (level) {
+            // Take a memo of master's task_state
+            KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
+            if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size
+                kmp_uint8 *old_stack, *new_stack = (kmp_uint8 *) __kmp_allocate( 2*master_th->th.th_task_state_stack_sz );
+                kmp_uint32 i;
+                for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
+                    new_stack[i] = master_th->th.th_task_state_memo_stack[i];
+                }
+                old_stack = master_th->th.th_task_state_memo_stack;
+                master_th->th.th_task_state_memo_stack = new_stack;
+                master_th->th.th_task_state_stack_sz *= 2;
+                __kmp_free(old_stack);
+            }
+            // Store master's task_state on stack
+            master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
+            master_th->th.th_task_state_top++;
+            master_th->th.th_task_state = 0;
+        }
+        master_th->th.th_task_team = team->t.t_task_team[master_th->th.th_task_state];
+
 #if !KMP_NESTED_HOT_TEAMS
         KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
 #endif
@@ -1955,8 +1975,8 @@ __kmp_join_call(ident_t *loc, int gtid
     if ( __kmp_tasking_mode != tskm_immediate_exec ) {
         KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
                          __kmp_gtid_from_thread( master_th ), team,
-                         team->t.t_task_team, master_th->th.th_task_team) );
-        KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team );
+                         team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) );
+        KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] );
     }
 #endif
 
@@ -1991,6 +2011,10 @@ __kmp_join_call(ident_t *loc, int gtid
         //     But there is barrier for external team (league).
         __kmp_internal_join( loc, gtid, team );
     }
+    else {
+        master_th->th.th_task_state = 0; // AC: no tasking in teams (out of any parallel)
+    }
+
     KMP_MB();
 
 #if USE_ITT_BUILD
@@ -2062,8 +2086,10 @@ __kmp_join_call(ident_t *loc, int gtid
                     balign[ b ].bb.b_arrived        = team->t.t_bar[ b ].b_arrived;
                     KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
                 }
-                // Synchronize thread's task state
-                other_threads[i]->th.th_task_state = master_th->th.th_task_state;
+                if ( __kmp_tasking_mode != tskm_immediate_exec ) {
+                    // Synchronize thread's task state
+                    other_threads[i]->th.th_task_state = master_th->th.th_task_state;
+                }
             }
         }
         return;
@@ -2112,9 +2138,9 @@ __kmp_join_call(ident_t *loc, int gtid
     __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) ); // this will free worker threads
 
     /* this race was fun to find.  make sure the following is in the critical
-     * region otherwise assertions may fail occasiounally since the old team
+     * region otherwise assertions may fail occasionally since the old team
      * may be reallocated and the hierarchy appears inconsistent.  it is
-     * actually safe to run and won't cause any bugs, but will cause thoose
+     * actually safe to run and won't cause any bugs, but will cause those
      * assertion failures.  it's only one deref&assign so might as well put this
      * in the critical region */
     master_th->th.th_team        =   parent_team;
@@ -2131,13 +2157,15 @@ __kmp_join_call(ident_t *loc, int gtid
     }
 
     if ( __kmp_tasking_mode != tskm_immediate_exec ) {
-        //
-        // Copy the task team from the new child / old parent team
-        // to the thread.  If non-NULL, copy the state flag also.
-        //
-        if ( ( master_th->th.th_task_team = parent_team->t.t_task_team ) != NULL ) {
-            master_th->th.th_task_state = master_th->th.th_task_team->tt.tt_state;
+        // Restore task state from memo stack
+        KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
+        if (master_th->th.th_task_state_top > 0) {
+            --master_th->th.th_task_state_top; // pop
+            master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
         }
+        // Copy the first task team from the new child / old parent team to the thread and reset state flag.
+        master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
+
         KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
                         __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
                         parent_team ) );
@@ -2229,24 +2257,22 @@ __kmp_set_num_threads( int new_nth, int
 
 
         if ( __kmp_tasking_mode != tskm_immediate_exec ) {
-            kmp_task_team_t *task_team = hot_team->t.t_task_team;
-            if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
-                //
-                // Signal the worker threads (esp. the extra ones) to stop
-                // looking for tasks while spin waiting.  The task teams
-                // are reference counted and will be deallocated by the
-                // last worker thread.
-                //
-                KMP_DEBUG_ASSERT( hot_team->t.t_nproc > 1 );
-                TCW_SYNC_4( task_team->tt.tt_active, FALSE );
-                KMP_MB();
-
-                KA_TRACE( 20, ( "__kmp_set_num_threads: setting task_team %p to NULL\n",
-                  &hot_team->t.t_task_team ) );
-                  hot_team->t.t_task_team = NULL;
-            }
-            else {
-                KMP_DEBUG_ASSERT( task_team == NULL );
+            int tt_idx;
+            for (tt_idx=0; tt_idx<2; ++tt_idx) {
+                kmp_task_team_t *task_team = hot_team->t.t_task_team[tt_idx];
+                if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
+                    // Signal worker threads (esp. the extra ones) to stop looking for tasks while spin waiting.
+                    // The task teams are reference counted and will be deallocated by the last worker thread.
+                    KMP_DEBUG_ASSERT( hot_team->t.t_nproc > 1 );
+                    TCW_SYNC_4( task_team->tt.tt_active, FALSE );
+                    KMP_MB();
+                    KA_TRACE( 20, ( "__kmp_set_num_threads: setting task_team %p to NULL\n",
+                                    &hot_team->t.t_task_team[tt_idx] ) );
+                    hot_team->t.t_task_team[tt_idx] = NULL;
+                }
+                else {
+                    KMP_DEBUG_ASSERT( task_team == NULL );
+                }
             }
         }
 
@@ -3617,7 +3643,7 @@ void __kmp_task_info() {
 #endif // KMP_DEBUG
 
 /* TODO optimize with one big memclr, take out what isn't needed,
- * split responsility to workers as much as possible, and delay
+ * split responsibility to workers as much as possible, and delay
  * initialization of features as much as possible  */
 static void
 __kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid )
@@ -3723,6 +3749,12 @@ __kmp_initialize_info( kmp_info_t *this_
 
     this_thr->th.th_next_pool = NULL;
 
+    if (!this_thr->th.th_task_state_memo_stack) {
+        this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) );
+        this_thr->th.th_task_state_top = 0;
+        this_thr->th.th_task_state_stack_sz = 4;
+    }
+
     KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
     KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
 
@@ -3778,6 +3810,9 @@ __kmp_allocate_thread( kmp_root_t *root,
 
         TCW_4(__kmp_nth, __kmp_nth + 1);
 
+        new_thr->th.th_task_state_top = 0;
+        new_thr->th.th_task_state_stack_sz = 4;
+
 #ifdef KMP_ADJUST_BLOCKTIME
         /* Adjust blocktime back to zero if necessar      y */
         /* Middle initialization might not have occurred yet */
@@ -4367,6 +4402,7 @@ __kmp_allocate_team( kmp_root_t *root, i
     char *ptr;
     size_t size;
     int use_hot_team = ! root->r.r_active;
+    int level = 0;
 
     KA_TRACE( 20, ("__kmp_allocate_team: called\n"));
     KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
@@ -4374,7 +4410,6 @@ __kmp_allocate_team( kmp_root_t *root, i
     KMP_MB();
 
 #if KMP_NESTED_HOT_TEAMS
-    int level;
     kmp_hot_team_ptr_t *hot_teams;
     if( master ) {
         team = master->th.th_team;
@@ -4405,8 +4440,8 @@ __kmp_allocate_team( kmp_root_t *root, i
 #endif
 #if KMP_DEBUG
         if ( __kmp_tasking_mode != tskm_immediate_exec ) {
-            KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team = %p before reinit\n",
-                           team->t.t_task_team ));
+            KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n",
+                           team->t.t_task_team[0], team->t.t_task_team[1] ));
         }
 #endif
 
@@ -4449,30 +4484,35 @@ __kmp_allocate_team( kmp_root_t *root, i
             }
 # endif /* KMP_AFFINITY_SUPPORTED */
 #endif /* OMP_40_ENABLED */
+
+            if (level) {
+                for(f = 0; f < new_nproc; ++f) {
+                    team->t.t_threads[f]->th.th_task_state = 0;
+                }
+            }
         }
         else if( team->t.t_nproc > new_nproc ) {
             KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
 
             team->t.t_size_changed = 1;
             if ( __kmp_tasking_mode != tskm_immediate_exec ) {
-                kmp_task_team_t *task_team = team->t.t_task_team;
-                if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
-                    //
-                    // Signal the worker threads (esp. the extra ones) to stop
-                    // looking for tasks while spin waiting.  The task teams
-                    // are reference counted and will be deallocated by the
-                    // last worker thread.
-                    //
-                    KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
-                    TCW_SYNC_4( task_team->tt.tt_active, FALSE );
-                    KMP_MB();
-
-                    KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team %p to NULL\n",
-                      &team->t.t_task_team ) );
-                      team->t.t_task_team = NULL;
-                }
-                else {
-                    KMP_DEBUG_ASSERT( task_team == NULL );
+                // Signal the worker threads (esp. extra ones) to stop looking for tasks while spin waiting.
+                // The task teams are reference counted and will be deallocated by the last worker thread.
+                int tt_idx;
+                for (tt_idx=0; tt_idx<2; ++tt_idx) {
+                    // We don't know which of the two task teams workers are waiting on, so deactivate both.
+                    kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
+                    if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
+                        KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
+                        TCW_SYNC_4( task_team->tt.tt_active, FALSE );
+                        KMP_MB();
+                        KA_TRACE(20, ("__kmp_allocate_team: setting task_team %p to NULL\n",
+                                      &team->t.t_task_team[tt_idx]));
+                        team->t.t_task_team[tt_idx] = NULL;
+                    }
+                    else {
+                        KMP_DEBUG_ASSERT( task_team == NULL );
+                    }
                 }
             }
 #if KMP_NESTED_HOT_TEAMS
@@ -4497,20 +4537,31 @@ __kmp_allocate_team( kmp_root_t *root, i
             __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
 
             if ( __kmp_tasking_mode != tskm_immediate_exec ) {
-                kmp_task_team_t *task_team = team->t.t_task_team;
-                if ( task_team != NULL ) {
-                    KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
-                    task_team->tt.tt_nproc = new_nproc;
-                    task_team->tt.tt_unfinished_threads = new_nproc;
-                    task_team->tt.tt_ref_ct = new_nproc - 1;
+                // Init both task teams
+                int tt_idx;
+                for (tt_idx=0; tt_idx<2; ++tt_idx) {
+                    kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
+                    if ( task_team != NULL ) {
+                        KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
+                        task_team->tt.tt_nproc = new_nproc;
+                        task_team->tt.tt_unfinished_threads = new_nproc;
+                        task_team->tt.tt_ref_ct = new_nproc - 1;
+                    }
                 }
             }
 
             /* update the remaining threads */
-            for(f = 0; f < new_nproc; ++f) {
-                team->t.t_threads[f]->th.th_team_nproc = new_nproc;
+            if (level) {
+                for(f = 0; f < new_nproc; ++f) {
+                    team->t.t_threads[f]->th.th_team_nproc = new_nproc;
+                    team->t.t_threads[f]->th.th_task_state = 0;
+                }
+            }
+            else {
+                for(f = 0; f < new_nproc; ++f) {
+                    team->t.t_threads[f]->th.th_team_nproc = new_nproc;
+                }
             }
-
             // restore the current task state of the master thread: should be the implicit task
             KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
                        0, team->t.t_threads[0], team ) );
@@ -4621,19 +4672,33 @@ __kmp_allocate_team( kmp_root_t *root, i
             __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
 
             if ( __kmp_tasking_mode != tskm_immediate_exec ) {
-                kmp_task_team_t *task_team = team->t.t_task_team;
-                if ( task_team != NULL ) {
-                    KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
-                    task_team->tt.tt_nproc = new_nproc;
-                    task_team->tt.tt_unfinished_threads = new_nproc;
-                    task_team->tt.tt_ref_ct = new_nproc - 1;
+                int tt_idx;
+                for (tt_idx=0; tt_idx<2; ++tt_idx) {
+                    kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
+                    if ( task_team != NULL ) {
+                        KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
+                        task_team->tt.tt_nproc = new_nproc;
+                        task_team->tt.tt_unfinished_threads = new_nproc;
+                        task_team->tt.tt_ref_ct = new_nproc - 1;
+                    }
                 }
             }
 
             /* reinitialize the old threads */
-            for( f = 0  ;  f < team->t.t_nproc  ;  f++ )
-                __kmp_initialize_info( team->t.t_threads[ f ], team, f,
-                                       __kmp_gtid_from_tid( f, team ) );
+            if (level) {
+                for( f = 0  ;  f < team->t.t_nproc  ;  f++ ) {
+                    __kmp_initialize_info( team->t.t_threads[ f ], team, f,
+                                           __kmp_gtid_from_tid( f, team ) );
+                }
+            }
+            else {
+                int old_state = team->t.t_threads[0]->th.th_task_state;
+                for (f=0;  f < team->t.t_nproc; ++f) {
+                    __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
+                    team->t.t_threads[f]->th.th_task_state = old_state;
+                }
+            }
+
 #ifdef KMP_DEBUG
             for ( f = 0; f < team->t.t_nproc; ++ f ) {
                 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
@@ -4666,7 +4731,7 @@ __kmp_allocate_team( kmp_root_t *root, i
             // Sync task (TODO: and barrier?) state for nested hot teams, not needed for outermost hot team.
             for( f = 1; f < new_nproc; ++f ) {
                 kmp_info_t *thr = team->t.t_threads[f];
-                thr->th.th_task_state = master->th.th_task_state;
+                thr->th.th_task_state = 0;
                 int b;
                 kmp_balign_t * balign = thr->th.th_bar;
                 for( b = 0; b < bs_last_barrier; ++ b ) {
@@ -4689,8 +4754,8 @@ __kmp_allocate_team( kmp_root_t *root, i
 
 #if KMP_DEBUG
         if ( __kmp_tasking_mode != tskm_immediate_exec ) {
-            KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team = %p after reinit\n",
-              team->t.t_task_team ));
+            KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n",
+                           team->t.t_task_team[0], team->t.t_task_team[1] ));
         }
 #endif
 
@@ -4711,9 +4776,10 @@ __kmp_allocate_team( kmp_root_t *root, i
             /* setup the team for fresh use */
             __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
 
-            KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team %p to NULL\n",
-                            &team->t.t_task_team ) );
-            team->t.t_task_team = NULL;
+            KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
+                            &team->t.t_task_team[0], &team->t.t_task_team[1]) );
+            team->t.t_task_team[0] = NULL;
+            team->t.t_task_team[1] = NULL;
 
             /* reallocate space for arguments if necessary */
             __kmp_alloc_argv_entries( argc, team, TRUE );
@@ -4759,9 +4825,10 @@ __kmp_allocate_team( kmp_root_t *root, i
     KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) );
     __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
 
-    KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team %p to NULL\n",
-                    &team->t.t_task_team ) );
-    team->t.t_task_team = NULL;    // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
+    KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
+                    &team->t.t_task_team[0], &team->t.t_task_team[1] ) );
+    team->t.t_task_team[0] = NULL;    // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
+    team->t.t_task_team[1] = NULL;    // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
 
     if ( __kmp_storage_map ) {
         __kmp_print_team_storage_map( "team", team, team->t.t_id, new_nproc );
@@ -4838,22 +4905,21 @@ __kmp_free_team( kmp_root_t *root, kmp_t
 
     /* if we are non-hot team, release our threads */
     if( ! use_hot_team ) {
-
         if ( __kmp_tasking_mode != tskm_immediate_exec ) {
-            kmp_task_team_t *task_team = team->t.t_task_team;
-            if ( task_team != NULL ) {
-                //
-                // Signal the worker threads to stop looking for tasks while
-                // spin waiting.  The task teams are reference counted and will
-                // be deallocated by the last worker thread via the thread's
-                // pointer to the task team.
-                //
-                KA_TRACE( 20, ( "__kmp_free_team: deactivating task_team %p\n",
-                                task_team ) );
-                KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
-                TCW_SYNC_4( task_team->tt.tt_active, FALSE );
-                KMP_MB();
-                team->t.t_task_team = NULL;
+            int tt_idx;
+            for (tt_idx=0; tt_idx<2; ++tt_idx) {
+                // We don't know which of the two task teams workers are waiting on, so deactivate both.
+                kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
+                if ( task_team != NULL ) {
+                    // Signal the worker threads to stop looking for tasks while spin waiting.  The task
+                    // teams are reference counted and will be deallocated by the last worker thread via the
+                    // thread's pointer to the task team.
+                    KA_TRACE( 20, ( "__kmp_free_team: deactivating task_team %p\n", task_team ) );
+                    KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
+                    TCW_SYNC_4( task_team->tt.tt_active, FALSE );
+                    KMP_MB();
+                    team->t.t_task_team[tt_idx] = NULL;
+                }
             }
         }
 
@@ -5263,6 +5329,11 @@ __kmp_reap_thread(
         thread->th.th_pri_common = NULL;
     }; // if
 
+    if (thread->th.th_task_state_memo_stack != NULL) {
+        __kmp_free(thread->th.th_task_state_memo_stack);
+        thread->th.th_task_state_memo_stack = NULL;
+    }
+
     #if KMP_USE_BGET
         if ( thread->th.th_local.bget_data != NULL ) {
             __kmp_finalize_bget( thread );

Modified: openmp/trunk/runtime/src/kmp_tasking.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_tasking.c?rev=228718&r1=228717&r2=228718&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_tasking.c (original)
+++ openmp/trunk/runtime/src/kmp_tasking.c Tue Feb 10 12:37:43 2015
@@ -284,7 +284,7 @@ __kmp_push_task(kmp_int32 gtid, kmp_task
 
     // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
     KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
-    if ( ! KMP_TASKING_ENABLED( task_team, thread->th.th_task_state ) ) {
+    if ( ! KMP_TASKING_ENABLED(task_team) ) {
          __kmp_enable_tasking( task_team, thread );
     }
     KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
@@ -1180,7 +1180,7 @@ __kmpc_omp_taskyield( ident_t *loc_ref,
         if ( ! taskdata->td_flags.team_serial ) {
             kmp_task_team_t * task_team = thread->th.th_task_team;
             if (task_team != NULL) {
-                if (KMP_TASKING_ENABLED(task_team, thread->th.th_task_state)) {
+                if (KMP_TASKING_ENABLED(task_team)) {
                     __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
                                             USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
                 }
@@ -2101,7 +2101,6 @@ __kmp_allocate_task_team( kmp_info_t *th
     TCW_4(task_team -> tt.tt_found_tasks, FALSE);
     task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
 
-    task_team -> tt.tt_state = 0;
     TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
     TCW_4( task_team -> tt.tt_active, TRUE );
     TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
@@ -2270,13 +2269,12 @@ __kmp_wait_to_unref_task_teams(void)
 // __kmp_task_team_setup:  Create a task_team for the current team, but use
 // an already created, unused one if it already exists.
 // This may be called by any thread, but only for teams with # threads >1.
-
 void
-__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team )
+__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int both )
 {
     KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
 
-    if ( ( team->t.t_task_team == NULL ) && ( team->t.t_nproc > 1 ) ) {
+    if ( ( team->t.t_task_team[this_thr->th.th_task_state] == NULL ) && ( team->t.t_nproc > 1 ) ) {
         // Allocate a new task team, which will be propagated to
         // all of the worker threads after the barrier.  As they
         // spin in the barrier release phase, then will continue
@@ -2284,22 +2282,24 @@ __kmp_task_team_setup( kmp_info_t *this_
         // the signal to stop checking for tasks (they can't safely
         // reference the kmp_team_t struct, which could be reallocated
         // by the master thread).
-        team->t.t_task_team = __kmp_allocate_task_team( this_thr, team );
-        KA_TRACE( 20, ( "__kmp_task_team_setup: Master T#%d created new "
-                        "task_team %p for team %d\n",
-                        __kmp_gtid_from_thread( this_thr ), team->t.t_task_team,
+        team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
+        KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
+                      __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
                         ((team != NULL) ? team->t.t_id : -1)) );
     }
-    else {
+    //else
         // All threads have reported in, and no tasks were spawned
         // for this release->gather region.  Leave the old task
         // team struct in place for the upcoming region.  No task
         // teams are formed for serialized teams.
+    if (both) {
+        int other_team = 1 - this_thr->th.th_task_state;
+        if ( ( team->t.t_task_team[other_team] == NULL ) && ( team->t.t_nproc > 1 ) ) { // setup other team as well
+            team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
+            KA_TRACE( 20, ( "__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
+                            __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
+                            ((team != NULL) ? team->t.t_id : -1)) );
     }
-    if ( team->t.t_task_team != NULL ) {
-        // Toggle the state flag so that we can tell which side of
-        // the barrier we are on.
-        team->t.t_task_team->tt.tt_state = 1 - this_thr->th.th_task_state;
     }
 }
 
@@ -2314,35 +2314,20 @@ __kmp_task_team_sync( kmp_info_t *this_t
 {
     KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
 
-    // On the rare chance that this thread never saw that the task
-    // team was no longer active, then unref/deallocate it now.
+    // In case this thread never saw that the task team was no longer active, unref/deallocate it now.
     if ( this_thr->th.th_task_team != NULL ) {
         if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
             KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
             __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
-        } else {
-            //
-            // We are re-using a task team that was never enabled.
-            //
-            KMP_DEBUG_ASSERT( this_thr->th.th_task_team == team->t.t_task_team );
+        } else {  // We are re-using a task team that was never enabled.
+            KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
         }
     }
 
-    //
-    // It is now safe to propagate the task team pointer from the
-    // team struct to the current thread.
-    //
-    TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team);
-    if ( this_thr->th.th_task_team != NULL ) {
-        //
-        // Toggle the th_task_state field, instead of reading it from
-        // the task team.  Reading the tt_state field at this point
-        // causes a 30% regression on EPCC parallel - toggling it
-        // is much cheaper.
-        //
+    // Toggle the th_task_state field, to switch which task_team this thread refers to
         this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
-        KMP_DEBUG_ASSERT( this_thr->th.th_task_state == TCR_4(team->t.t_task_team->tt.tt_state) );
-    }
+    // It is now safe to propagate the task team pointer from the team struct to the current thread.
+    TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
     KA_TRACE( 20, ( "__kmp_task_team_sync: Thread T#%d task team assigned pointer (%p) from Team #%d task team\n",
                     __kmp_gtid_from_thread( this_thr ), &this_thr->th.th_task_team,
                     this_thr->th.th_task_team, ((team != NULL) ? (team->t.t_id) : -1) ) );
@@ -2350,41 +2335,31 @@ __kmp_task_team_sync( kmp_info_t *this_t
 
 
 //------------------------------------------------------------------------------
-// __kmp_task_team_wait: Master thread waits for outstanding tasks after
-// the barrier gather phase.  Only called by master thread if #threads
-// in team > 1 !
-
+// __kmp_task_team_wait: Master thread waits for outstanding tasks after the
+// barrier gather phase.  Only called by master thread if #threads in team > 1 !
 void
-__kmp_task_team_wait( kmp_info_t *this_thr,
-                      kmp_team_t *team
+__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
                       USE_ITT_BUILD_ARG(void * itt_sync_obj)
                       )
 {
-    kmp_task_team_t *task_team = team->t.t_task_team;
+    kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
 
     KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
     KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
 
-    if ( ( task_team != NULL ) && KMP_TASKING_ENABLED( task_team, this_thr->th.th_task_state ) ) {
+    if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
         KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d waiting for all tasks: task_team = %p\n",
                           __kmp_gtid_from_thread( this_thr ), task_team ) );
-        //
-        // All worker threads might have dropped through to the
-        // release phase, but could still be executing tasks.
-        // Wait here for all tasks to complete.  To avoid memory
-        // contention, only the master thread checks for the
-        // termination condition.
-        //
+        // All worker threads might have dropped through to the release phase, but could still
+        // be executing tasks. Wait here for all tasks to complete.  To avoid memory contention,
+        // only the master thread checks for the termination condition.
         kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
         flag.wait(this_thr, TRUE
                   USE_ITT_BUILD_ARG(itt_sync_obj));
 
-        //
-        // Kill the old task team, so that the worker threads will
-        // stop referencing it while spinning.  They will
-        // deallocate it when the reference count reaches zero.
+        // Kill the old task team, so that the worker threads will stop referencing it while spinning.
+        // They will deallocate it when the reference count reaches zero.
         // The master thread is not included in the ref count.
-        //
         KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d deactivating task_team %p\n",
                           __kmp_gtid_from_thread( this_thr ), task_team ) );
         KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
@@ -2392,7 +2367,7 @@ __kmp_task_team_wait( kmp_info_t *this_t
         KMP_MB();
 
         TCW_PTR(this_thr->th.th_task_team, NULL);
-        team->t.t_task_team = NULL;
+        team->t.t_task_team[this_thr->th.th_task_state] = NULL;
     }
 }
 
@@ -2408,7 +2383,7 @@ __kmp_task_team_wait( kmp_info_t *this_t
 void
 __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
 {
-    volatile kmp_uint32 *spin = &team->t.t_task_team->tt.tt_unfinished_threads;
+    volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
     int flag = FALSE;
     KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
 

Modified: openmp/trunk/runtime/src/kmp_wait_release.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_wait_release.h?rev=228718&r1=228717&r2=228718&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_wait_release.h (original)
+++ openmp/trunk/runtime/src/kmp_wait_release.h Tue Feb 10 12:37:43 2015
@@ -143,7 +143,7 @@ static inline void __kmp_wait_template(k
                 if (!TCR_SYNC_4(task_team->tt.tt_active)) {
                     KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
                     __kmp_unref_task_team(task_team, this_thr);
-                } else if (KMP_TASKING_ENABLED(task_team, this_thr->th.th_task_state)) {
+                } else if (KMP_TASKING_ENABLED(task_team)) {
                     flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed
                                         USE_ITT_BUILD_ARG(itt_sync_obj), 0);
                 }





More information about the Openmp-commits mailing list