[Openmp-commits] [openmp] r286892 - Update stats-gathering code

Jonathan Peyton via Openmp-commits openmp-commits at lists.llvm.org
Mon Nov 14 13:13:45 PST 2016


Author: jlpeyton
Date: Mon Nov 14 15:13:44 2016
New Revision: 286892

URL: http://llvm.org/viewvc/llvm-project?rev=286892&view=rev
Log:
Update stats-gathering code

Have developer timers use partitioning scheme which also required that some
redundant developer timers be removed in favor of the already existing normal
timers. Move per thread stats initialization to just after global thread id
assignment which is as early as possible. Also put all global stats
initialization code in __kmp_stats_init() and all global stats destruction code
in __kmp_stats_fini().

Differential Revision: https://reviews.llvm.org/D26361

Modified:
    openmp/trunk/runtime/src/kmp_barrier.cpp
    openmp/trunk/runtime/src/kmp_global.c
    openmp/trunk/runtime/src/kmp_runtime.c
    openmp/trunk/runtime/src/kmp_stats.cpp
    openmp/trunk/runtime/src/kmp_stats.h
    openmp/trunk/runtime/src/z_Linux_util.c

Modified: openmp/trunk/runtime/src/kmp_barrier.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_barrier.cpp?rev=286892&r1=286891&r2=286892&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_barrier.cpp (original)
+++ openmp/trunk/runtime/src/kmp_barrier.cpp Mon Nov 14 15:13:44 2016
@@ -50,7 +50,7 @@ __kmp_linear_barrier_gather(enum barrier
                             void (*reduce)(void *, void *)
                             USE_ITT_BUILD_ARG(void * itt_sync_obj) )
 {
-    KMP_TIME_DEVELOPER_BLOCK(KMP_linear_gather);
+    KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather);
     register kmp_team_t *team = this_thr->th.th_team;
     register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
     register kmp_info_t **other_threads = team->t.t_threads;
@@ -130,7 +130,7 @@ __kmp_linear_barrier_release(enum barrie
                              int propagate_icvs
                              USE_ITT_BUILD_ARG(void *itt_sync_obj) )
 {
-    KMP_TIME_DEVELOPER_BLOCK(KMP_linear_release);
+    KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release);
     register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
     register kmp_team_t *team;
 
@@ -149,7 +149,7 @@ __kmp_linear_barrier_release(enum barrie
         if (nproc > 1) {
 #if KMP_BARRIER_ICV_PUSH
             {
-                KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
+                KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
                 if (propagate_icvs) {
                     ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
                     for (i=1; i<nproc; ++i) {
@@ -225,7 +225,7 @@ __kmp_tree_barrier_gather(enum barrier_t
                           void (*reduce)(void *, void *)
                           USE_ITT_BUILD_ARG(void *itt_sync_obj) )
 {
-    KMP_TIME_DEVELOPER_BLOCK(KMP_tree_gather);
+    KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather);
     register kmp_team_t *team = this_thr->th.th_team;
     register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
     register kmp_info_t **other_threads = team->t.t_threads;
@@ -323,7 +323,7 @@ __kmp_tree_barrier_release(enum barrier_
                            int propagate_icvs
                            USE_ITT_BUILD_ARG(void *itt_sync_obj) )
 {
-    KMP_TIME_DEVELOPER_BLOCK(KMP_tree_release);
+    KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release);
     register kmp_team_t *team;
     register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
     register kmp_uint32 nproc;
@@ -393,7 +393,7 @@ __kmp_tree_barrier_release(enum barrier_
 
 #if KMP_BARRIER_ICV_PUSH
             {
-                KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
+                KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
                 if (propagate_icvs) {
                     __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid],
                                              team, child_tid, FALSE);
@@ -426,7 +426,7 @@ __kmp_hyper_barrier_gather(enum barrier_
                            void (*reduce)(void *, void *)
                            USE_ITT_BUILD_ARG(void *itt_sync_obj) )
 {
-    KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_gather);
+    KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather);
     register kmp_team_t *team = this_thr->th.th_team;
     register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
     register kmp_info_t **other_threads = team->t.t_threads;
@@ -535,7 +535,7 @@ __kmp_hyper_barrier_release(enum barrier
                             int propagate_icvs
                             USE_ITT_BUILD_ARG(void *itt_sync_obj) )
 {
-    KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_release);
+    KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release);
     register kmp_team_t    *team;
     register kmp_bstate_t  *thr_bar       = & this_thr -> th.th_bar[ bt ].bb;
     register kmp_info_t   **other_threads;
@@ -742,7 +742,7 @@ __kmp_hierarchical_barrier_gather(enum b
                                   int gtid, int tid, void (*reduce) (void *, void *)
                                   USE_ITT_BUILD_ARG(void * itt_sync_obj) )
 {
-    KMP_TIME_DEVELOPER_BLOCK(KMP_hier_gather);
+    KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather);
     register kmp_team_t *team = this_thr->th.th_team;
     register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
     register kmp_uint32 nproc = this_thr->th.th_team_nproc;
@@ -883,7 +883,7 @@ __kmp_hierarchical_barrier_release(enum
                                    int propagate_icvs
                                    USE_ITT_BUILD_ARG(void * itt_sync_obj) )
 {
-    KMP_TIME_DEVELOPER_BLOCK(KMP_hier_release);
+    KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release);
     register kmp_team_t *team;
     register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
     register kmp_uint32 nproc;
@@ -1067,9 +1067,8 @@ int
 __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
               void *reduce_data, void (*reduce)(void *, void *))
 {
-    KMP_TIME_DEVELOPER_BLOCK(KMP_barrier);
-    KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
     KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier);
+    KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
     register int tid = __kmp_tid_from_gtid(gtid);
     register kmp_info_t *this_thr = __kmp_threads[gtid];
     register kmp_team_t *team = this_thr->th.th_team;
@@ -1333,7 +1332,8 @@ __kmp_barrier(enum barrier_type bt, int
 void
 __kmp_end_split_barrier(enum barrier_type bt, int gtid)
 {
-    KMP_TIME_DEVELOPER_BLOCK(KMP_end_split_barrier);
+    KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier);
+    KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
     int tid = __kmp_tid_from_gtid(gtid);
     kmp_info_t *this_thr = __kmp_threads[gtid];
     kmp_team_t *team = this_thr->th.th_team;
@@ -1376,9 +1376,8 @@ __kmp_end_split_barrier(enum barrier_typ
 void
 __kmp_join_barrier(int gtid)
 {
-    KMP_TIME_PARTITIONED_BLOCK(OMP_fork_join_barrier);
+    KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier);
     KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
-    KMP_TIME_DEVELOPER_BLOCK(KMP_join_barrier);
     register kmp_info_t *this_thr = __kmp_threads[gtid];
     register kmp_team_t *team;
     register kmp_uint nproc;
@@ -1592,9 +1591,8 @@ __kmp_join_barrier(int gtid)
 void
 __kmp_fork_barrier(int gtid, int tid)
 {
-    KMP_TIME_PARTITIONED_BLOCK(OMP_fork_join_barrier);
+    KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier);
     KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
-    KMP_TIME_DEVELOPER_BLOCK(KMP_fork_barrier);
     kmp_info_t *this_thr = __kmp_threads[gtid];
     kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
 #if USE_ITT_BUILD
@@ -1707,7 +1705,7 @@ __kmp_fork_barrier(int gtid, int tid)
        the fixed ICVs in the master's thread struct, because it is not always the case that the
        threads arrays have been allocated when __kmp_fork_call() is executed. */
     {
-        KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
+        KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
         if (!KMP_MASTER_TID(tid)) {  // master thread already has ICVs
             // Copy the initial ICVs from the master's thread struct to the implicit task for this tid.
             KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
@@ -1762,7 +1760,7 @@ __kmp_fork_barrier(int gtid, int tid)
 void
 __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc )
 {
-    KMP_TIME_DEVELOPER_BLOCK(KMP_setup_icv_copy);
+    KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy);
 
     KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
     KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);

Modified: openmp/trunk/runtime/src/kmp_global.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_global.c?rev=286892&r1=286891&r2=286892&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_global.c (original)
+++ openmp/trunk/runtime/src/kmp_global.c Mon Nov 14 15:13:44 2016
@@ -28,10 +28,10 @@ kmp_cpuinfo_t   __kmp_cpuinfo = { 0 }; /
 kmp_tas_lock_t __kmp_stats_lock;
 
 // global list of per thread stats, the head is a sentinel node which accumulates all stats produced before __kmp_create_worker is called.
-kmp_stats_list __kmp_stats_list;
+kmp_stats_list* __kmp_stats_list;
 
 // thread local pointer to stats node within list
-__thread kmp_stats_list* __kmp_stats_thread_ptr = &__kmp_stats_list;
+__thread kmp_stats_list* __kmp_stats_thread_ptr = NULL;
 
 // gives reference tick for all events (considered the 0 tick)
 tsc_tick_count __kmp_stats_start_time;

Modified: openmp/trunk/runtime/src/kmp_runtime.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_runtime.c?rev=286892&r1=286891&r2=286892&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_runtime.c (original)
+++ openmp/trunk/runtime/src/kmp_runtime.c Mon Nov 14 15:13:44 2016
@@ -1417,7 +1417,7 @@ __kmp_fork_call(
     kmp_hot_team_ptr_t **p_hot_teams;
 #endif
     { // KMP_TIME_BLOCK
-    KMP_TIME_DEVELOPER_BLOCK(KMP_fork_call);
+    KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
     KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
 
     KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid ));
@@ -2199,7 +2199,6 @@ __kmp_fork_call(
     {
         KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
         KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
-        // KMP_TIME_DEVELOPER_BLOCK(USER_master_invoke);
         if (! team->t.t_invoke( gtid )) {
             KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
         }
@@ -2258,7 +2257,7 @@ __kmp_join_call(ident_t *loc, int gtid
 #endif /* OMP_40_ENABLED */
 )
 {
-    KMP_TIME_DEVELOPER_BLOCK(KMP_join_call);
+    KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
     kmp_team_t     *team;
     kmp_team_t     *parent_team;
     kmp_info_t     *master_th;
@@ -3681,6 +3680,13 @@ __kmp_register_root( int initial_thread
         KMP_DEBUG_ASSERT( ! root->r.r_root_team );
     }
 
+#if KMP_STATS_ENABLED
+    // Initialize stats as soon as possible (right after gtid assignment).
+    __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
+    KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life);
+    KMP_SET_THREAD_STATE(SERIAL_REGION);
+    KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
+#endif
     __kmp_initialize_root( root );
 
     /* setup new root thread structure */
@@ -4748,7 +4754,7 @@ __kmp_allocate_team( kmp_root_t *root, i
     kmp_internal_control_t *new_icvs,
     int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
 {
-    KMP_TIME_DEVELOPER_BLOCK(KMP_allocate_team);
+    KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
     int f;
     kmp_team_t *team;
     int use_hot_team = ! root->r.r_active;
@@ -5504,14 +5510,11 @@ __kmp_launch_thread( kmp_info_t *this_th
                 }
 #endif
 
-                KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
                 {
-                    KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke);
                     KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
                     KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
                     rc = (*pteam)->t.t_invoke( gtid );
                 }
-                KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
                 KMP_ASSERT( rc );
 
 #if OMPT_SUPPORT
@@ -6332,7 +6335,7 @@ __kmp_do_serial_initialize( void )
 #endif
 #endif
 #if KMP_STATS_ENABLED
-    __kmp_init_tas_lock( & __kmp_stats_lock );
+    __kmp_stats_init();
 #endif
     __kmp_init_lock( & __kmp_global_lock     );
     __kmp_init_queuing_lock( & __kmp_dispatch_lock );
@@ -7293,8 +7296,7 @@ __kmp_cleanup( void )
     __kmp_i18n_catclose();
 
 #if KMP_STATS_ENABLED
-    __kmp_accumulate_stats_at_exit();
-    __kmp_stats_list.deallocate();
+    __kmp_stats_fini();
 #endif
 
     KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) );

Modified: openmp/trunk/runtime/src/kmp_stats.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_stats.cpp?rev=286892&r1=286891&r2=286892&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_stats.cpp (original)
+++ openmp/trunk/runtime/src/kmp_stats.cpp Mon Nov 14 15:13:44 2016
@@ -29,11 +29,11 @@
 #define expandName(name,flags,ignore)  {STRINGIZE(name),flags},
 statInfo timeStat::timerInfo[] = {
     KMP_FOREACH_TIMER(expandName,0)
-    {0,0}
+    {"TIMER_LAST", 0}
 };
 const statInfo counter::counterInfo[] = {
     KMP_FOREACH_COUNTER(expandName,0)
-    {0,0}
+    {"COUNTER_LAST", 0}
 };
 #undef expandName
 
@@ -71,7 +71,7 @@ const kmp_stats_output_module::rgb_color
 static uint32_t statsPrinted = 0;
 
 // output interface
-static kmp_stats_output_module __kmp_stats_global_output;
+static kmp_stats_output_module* __kmp_stats_global_output = NULL;
 
 /* ****************************************************** */
 /* ************* statistic member functions ************* */
@@ -164,7 +164,7 @@ void explicitTimer::start(timer_e timerE
     return;
 }
 
-void explicitTimer::stop(timer_e timerEnumValue) {
+void explicitTimer::stop(timer_e timerEnumValue, kmp_stats_list* stats_ptr /* = nullptr */) {
     if (startTime.getValue() == 0)
         return;
 
@@ -174,8 +174,10 @@ void explicitTimer::stop(timer_e timerEn
     stat->addSample(((finishTime - startTime) - totalPauseTime).ticks());
 
     if(timeStat::logEvent(timerEnumValue)) {
-        __kmp_stats_thread_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue);
-        __kmp_stats_thread_ptr->decrementNestValue();
+        if(!stats_ptr)
+            stats_ptr = __kmp_stats_thread_ptr;
+        stats_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue);
+        stats_ptr->decrementNestValue();
     }
 
     /* We accept the risk that we drop a sample because it really did start at t==0. */
@@ -481,18 +483,18 @@ void kmp_stats_output_module::windupExpl
     // and say "it's over".
     // If the timer wasn't running, this won't record anything anyway.
     kmp_stats_list::iterator it;
-    for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
+    for(it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
         kmp_stats_list* ptr = *it;
         ptr->getPartitionedTimers()->windup();
         for (int timer=0; timer<EXPLICIT_TIMER_LAST; timer++) {
-            ptr->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer);
+            ptr->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer, ptr);
         }
     }
 }
 
 void kmp_stats_output_module::printPloticusFile() {
     int i;
-    int size = __kmp_stats_list.size();
+    int size = __kmp_stats_list->size();
     FILE* plotOut = fopen(plotFileName, "w+");
 
     fprintf(plotOut, "#proc page\n"
@@ -602,7 +604,7 @@ void kmp_stats_output_module::outputStat
     fprintf(statsOut, "%s\n",heading);
     // Accumulate across threads.
     kmp_stats_list::iterator it;
-    for (it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
+    for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
         int t = (*it)->getGtid();
         // Output per thread stats if requested.
         if (printPerThreadFlag) {
@@ -666,7 +668,7 @@ extern "C" {
 void __kmp_reset_stats()
 {
     kmp_stats_list::iterator it;
-    for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
+    for(it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
         timeStat * timers     = (*it)->getTimers();
         counter * counters    = (*it)->getCounters();
         explicitTimer * eTimers = (*it)->getExplicitTimers();
@@ -688,7 +690,7 @@ void __kmp_reset_stats()
 // This function will reset all stats and stop all threads' explicit timers if they haven't been stopped already.
 void __kmp_output_stats(const char * heading)
 {
-    __kmp_stats_global_output.outputStats(heading);
+    __kmp_stats_global_output->outputStats(heading);
     __kmp_reset_stats();
 }
 
@@ -703,6 +705,18 @@ void __kmp_accumulate_stats_at_exit(void
 
 void __kmp_stats_init(void)
 {
+    __kmp_init_tas_lock( & __kmp_stats_lock );
+    __kmp_stats_start_time = tsc_tick_count::now();
+    __kmp_stats_global_output = new kmp_stats_output_module();
+    __kmp_stats_list = new kmp_stats_list();
+}
+
+void __kmp_stats_fini(void)
+{
+    __kmp_accumulate_stats_at_exit();
+    __kmp_stats_list->deallocate();
+    delete __kmp_stats_global_output;
+    delete __kmp_stats_list;
 }
 
 } // extern "C"

Modified: openmp/trunk/runtime/src/kmp_stats.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_stats.h?rev=286892&r1=286891&r2=286892&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_stats.h (original)
+++ openmp/trunk/runtime/src/kmp_stats.h Mon Nov 14 15:13:44 2016
@@ -104,8 +104,7 @@ enum stats_state_e {
     macro (OMP_TASKLOOP, 0, arg)                                \
     macro (TASK_executed, 0, arg)                               \
     macro (TASK_cancelled, 0, arg)                              \
-    macro (TASK_stolen, 0, arg)                                 \
-    macro (LAST,0,arg)
+    macro (TASK_stolen, 0, arg)
 
 /*!
  * \brief Add new timers under KMP_FOREACH_TIMER() macro in kmp_stats.h
@@ -123,31 +122,31 @@ enum stats_state_e {
  * @ingroup STATS_GATHERING2
  */
 #define KMP_FOREACH_TIMER(macro, arg)                              \
-    macro (OMP_worker_thread_life, 0, arg)                         \
+    macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg)   \
     macro (FOR_static_scheduling, 0, arg)                          \
     macro (FOR_dynamic_scheduling, 0, arg)                         \
     macro (OMP_critical,  0, arg)                                  \
     macro (OMP_critical_wait,  0, arg)                             \
     macro (OMP_single,    0, arg)                                  \
     macro (OMP_master,    0, arg)                                  \
-    macro (OMP_idle, 0, arg)                                       \
-    macro (OMP_plain_barrier, 0, arg)                              \
-    macro (OMP_fork_join_barrier, 0, arg)                          \
-    macro (OMP_parallel, 0, arg)                                   \
+    macro (OMP_idle, stats_flags_e::logEvent, arg)                 \
+    macro (OMP_plain_barrier, stats_flags_e::logEvent, arg)        \
+    macro (OMP_fork_barrier, stats_flags_e::logEvent, arg)         \
+    macro (OMP_join_barrier, stats_flags_e::logEvent, arg)         \
+    macro (OMP_parallel, stats_flags_e::logEvent, arg)             \
     macro (OMP_task_immediate, 0, arg)                             \
     macro (OMP_task_taskwait, 0, arg)                              \
     macro (OMP_task_taskyield, 0, arg)                             \
     macro (OMP_task_taskgroup, 0, arg)                             \
     macro (OMP_task_join_bar, 0, arg)                              \
     macro (OMP_task_plain_bar, 0, arg)                             \
-    macro (OMP_serial, 0, arg)                                     \
+    macro (OMP_serial, stats_flags_e::logEvent, arg)               \
     macro (OMP_taskloop_scheduling, 0, arg)                        \
     macro (OMP_set_numthreads,    stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
     macro (OMP_PARALLEL_args,     stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
     macro (FOR_static_iterations, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
     macro (FOR_dynamic_iterations,stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
-    KMP_FOREACH_DEVELOPER_TIMER(macro, arg)                             \
-    macro (LAST,0, arg)
+    KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
 
 
 // OMP_start_end          -- Time from when OpenMP is initialized until the stats are printed at exit
@@ -190,28 +189,22 @@ enum stats_state_e {
 // KMP_tree_release       -- time in __kmp_tree_barrier_release
 // KMP_hyper_gather       -- time in __kmp_hyper_barrier_gather
 // KMP_hyper_release      -- time in __kmp_hyper_barrier_release
-# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)                        \
-    macro (KMP_fork_call, 0, arg)                                       \
-    macro (KMP_join_call, 0, arg)                                       \
-    macro (KMP_fork_barrier, stats_flags_e::logEvent, arg)              \
-    macro (KMP_join_barrier, stats_flags_e::logEvent, arg)              \
-    macro (KMP_barrier, 0, arg)                                         \
-    macro (KMP_end_split_barrier, 0, arg)                               \
-    macro (KMP_hier_gather, 0, arg)                                     \
-    macro (KMP_hier_release, 0, arg)                                    \
-    macro (KMP_hyper_gather,  stats_flags_e::logEvent, arg)             \
-    macro (KMP_hyper_release,  stats_flags_e::logEvent, arg)            \
-    macro (KMP_linear_gather, 0, arg)                                   \
-    macro (KMP_linear_release, 0, arg)                                  \
-    macro (KMP_tree_gather, 0, arg)                                     \
-    macro (KMP_tree_release, 0, arg)                                    \
-    macro (USER_master_invoke, stats_flags_e::logEvent, arg)            \
-    macro (USER_worker_invoke, stats_flags_e::logEvent, arg)            \
-    macro (USER_resume, stats_flags_e::logEvent, arg)                   \
-    macro (USER_suspend, stats_flags_e::logEvent, arg)                  \
-    macro (USER_launch_thread_loop, stats_flags_e::logEvent, arg)       \
-    macro (KMP_allocate_team, 0, arg)                                   \
-    macro (KMP_setup_icv_copy, 0, arg)                                  \
+# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
+    macro (KMP_fork_call, 0, arg)                \
+    macro (KMP_join_call, 0, arg)                \
+    macro (KMP_end_split_barrier, 0, arg)        \
+    macro (KMP_hier_gather, 0, arg)              \
+    macro (KMP_hier_release, 0, arg)             \
+    macro (KMP_hyper_gather, 0, arg)             \
+    macro (KMP_hyper_release, 0, arg)            \
+    macro (KMP_linear_gather, 0, arg)            \
+    macro (KMP_linear_release, 0, arg)           \
+    macro (KMP_tree_gather, 0, arg)              \
+    macro (KMP_tree_release, 0, arg)             \
+    macro (USER_resume, 0, arg)                  \
+    macro (USER_suspend, 0, arg)                 \
+    macro (KMP_allocate_team, 0, arg)            \
+    macro (KMP_setup_icv_copy, 0, arg)           \
     macro (USER_icv_copy, 0, arg)
 #else
 # define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
@@ -233,47 +226,23 @@ enum stats_state_e {
  *
  * @ingroup STATS_GATHERING
 */
-#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg)     \
-    macro(OMP_worker_thread_life, 0, arg)          \
-    macro(FOR_static_scheduling, 0, arg)           \
-    macro(FOR_dynamic_scheduling, 0, arg)          \
-    macro(OMP_critical, 0, arg)                    \
-    macro(OMP_critical_wait, 0, arg)               \
-    macro(OMP_single, 0, arg)                      \
-    macro(OMP_master, 0, arg)                      \
-    macro(OMP_idle, 0, arg)                        \
-    macro(OMP_plain_barrier, 0, arg)               \
-    macro(OMP_fork_join_barrier, 0, arg)           \
-    macro(OMP_parallel, 0, arg)                    \
-    macro(OMP_task_immediate, 0, arg)              \
-    macro(OMP_task_taskwait, 0, arg)               \
-    macro(OMP_task_taskyield, 0, arg)              \
-    macro(OMP_task_taskgroup, 0, arg)              \
-    macro(OMP_task_join_bar, 0, arg)               \
-    macro(OMP_task_plain_bar, 0, arg)              \
-    macro(OMP_serial, 0, arg)                      \
-    macro(OMP_taskloop_scheduling, 0, arg)         \
-    KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro,arg)     \
-    macro(LAST, 0, arg)
-
-#if (KMP_DEVELOPER_STATS)
-# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg)               \
-    macro(USER_launch_thread_loop, stats_flags_e::logEvent, arg)
-#else
-# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg)
-#endif
+#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \
+    KMP_FOREACH_TIMER(macro, arg)
 
 #define ENUMERATE(name,ignore,prefix) prefix##name,
 enum timer_e {
     KMP_FOREACH_TIMER(ENUMERATE, TIMER_)
+    TIMER_LAST
 };
 
 enum explicit_timer_e {
     KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_)
+    EXPLICIT_TIMER_LAST
 };
 
 enum counter_e {
     KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_)
+    COUNTER_LAST
 };
 #undef ENUMERATE
 
@@ -370,7 +339,7 @@ class explicitTimer
     void start(timer_e timerEnumValue);
     void pause() { pauseStartTime = tsc_tick_count::now(); }
     void resume() { totalPauseTime += (tsc_tick_count::now() - pauseStartTime); }
-    void stop(timer_e timerEnumValue);
+    void stop(timer_e timerEnumValue, kmp_stats_list* stats_ptr = nullptr);
     void reset() { startTime = 0; pauseStartTime = 0; totalPauseTime = 0; }
 };
 
@@ -716,13 +685,14 @@ class kmp_stats_output_module {
 extern "C" {
 #endif
 void __kmp_stats_init();
+void __kmp_stats_fini();
 void __kmp_reset_stats();
 void __kmp_output_stats(const char *);
 void __kmp_accumulate_stats_at_exit(void);
 // thread local pointer to stats node within list
 extern __thread kmp_stats_list* __kmp_stats_thread_ptr;
 // head to stats list.
-extern kmp_stats_list __kmp_stats_list;
+extern kmp_stats_list* __kmp_stats_list;
 // lock for __kmp_stats_list
 extern kmp_tas_lock_t  __kmp_stats_lock;
 // reference start time
@@ -866,6 +836,7 @@ extern kmp_stats_output_module __kmp_sta
 # define KMP_COUNT_DEVELOPER_BLOCK(n)            KMP_COUNT_BLOCK(n)
 # define KMP_START_DEVELOPER_EXPLICIT_TIMER(n)   KMP_START_EXPLICIT_TIMER(n)
 # define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n)    KMP_STOP_EXPLICIT_TIMER(n)
+# define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n)
 #else
 // Null definitions
 # define KMP_TIME_DEVELOPER_BLOCK(n)             ((void)0)
@@ -873,6 +844,7 @@ extern kmp_stats_output_module __kmp_sta
 # define KMP_COUNT_DEVELOPER_BLOCK(n)            ((void)0)
 # define KMP_START_DEVELOPER_EXPLICIT_TIMER(n)   ((void)0)
 # define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n)    ((void)0)
+# define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
 #endif
 
 #else // KMP_STATS_ENABLED
@@ -894,6 +866,7 @@ extern kmp_stats_output_module __kmp_sta
 #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n)    ((void)0)
 #define KMP_INIT_PARTITIONED_TIMERS(name)       ((void)0)
 #define KMP_TIME_PARTITIONED_BLOCK(name)        ((void)0)
+#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
 #define KMP_PUSH_PARTITIONED_TIMER(name)        ((void)0)
 #define KMP_POP_PARTITIONED_TIMER()             ((void)0)
 #define KMP_SET_THREAD_STATE(state_name)        ((void)0)

Modified: openmp/trunk/runtime/src/z_Linux_util.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/z_Linux_util.c?rev=286892&r1=286891&r2=286892&view=diff
==============================================================================
--- openmp/trunk/runtime/src/z_Linux_util.c (original)
+++ openmp/trunk/runtime/src/z_Linux_util.c Mon Nov 14 15:13:44 2016
@@ -866,14 +866,12 @@ __kmp_create_worker( int gtid, kmp_info_
     // th->th.th_stats is used to transfer thread specific stats-pointer to __kmp_launch_worker
     // So when thread is created (goes into __kmp_launch_worker) it will
     // set it's __thread local pointer to th->th.th_stats
-    th->th.th_stats = __kmp_stats_list.push_back(gtid);
-    if(KMP_UBER_GTID(gtid)) {
-        __kmp_stats_start_time = tsc_tick_count::now();
-        __kmp_stats_thread_ptr = th->th.th_stats;
-        __kmp_stats_init();
-        KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life);
-        KMP_SET_THREAD_STATE(SERIAL_REGION);
-        KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
+    if(!KMP_UBER_GTID(gtid)) {
+        th->th.th_stats = __kmp_stats_list->push_back(gtid);
+    } else {
+        // For root threads, the __kmp_stats_thread_ptr is set in __kmp_register_root(), so
+        // set the th->th.th_stats field to it.
+        th->th.th_stats = __kmp_stats_thread_ptr;
     }
     __kmp_release_tas_lock(&__kmp_stats_lock, gtid);
 
@@ -1541,7 +1539,7 @@ __kmp_suspend_uninitialize_thread( kmp_i
 template <class C>
 static inline void __kmp_suspend_template( int th_gtid, C *flag )
 {
-    KMP_TIME_DEVELOPER_BLOCK(USER_suspend);
+    KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_suspend);
     kmp_info_t *th = __kmp_threads[th_gtid];
     int status;
     typename C::flag_t old_spin;
@@ -1675,7 +1673,7 @@ void __kmp_suspend_oncore(int th_gtid, k
 template <class C>
 static inline void __kmp_resume_template( int target_gtid, C *flag )
 {
-    KMP_TIME_DEVELOPER_BLOCK(USER_resume);
+    KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);
     kmp_info_t *th = __kmp_threads[target_gtid];
     int status;
 
@@ -1750,7 +1748,7 @@ void __kmp_resume_oncore(int target_gtid
 void
 __kmp_resume_monitor()
 {
-    KMP_TIME_DEVELOPER_BLOCK(USER_resume);
+    KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);
     int status;
 #ifdef KMP_DEBUG
     int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;




More information about the Openmp-commits mailing list