[Openmp-commits] [openmp] r244677 - Tidy statistics collection

Jonathan Peyton via Openmp-commits openmp-commits at lists.llvm.org
Tue Aug 11 14:36:41 PDT 2015


Author: jlpeyton
Date: Tue Aug 11 16:36:41 2015
New Revision: 244677

URL: http://llvm.org/viewvc/llvm-project?rev=244677&view=rev
Log:
Tidy statistics collection

This removes some statistics counters and timers which were not used,
adds new counters and timers for some language features that were not
monitored previously and separates the counters and timers into those
which are of interest for investigating user code and those which are
only of interest to the developer of the runtime itself.
The runtime developer statistics are now ony collected if the
additional #define KMP_DEVELOPER_STATS is set.

Additional user statistics which are now collected include:
* Count of nested parallelism (omp parallel inside a parallel region)
* Count of omp distribute occurrences
* Count of omp teams occurrences
* Counts of task related statistics (taskyield, task execution, task
  cancellation, task steal)
* Values passed to omp_set_numtheads
* Time spent in omp single and omp master

None of this affects code compiled without stats gathering enabled,
which is the normal library build mode.

This also fixes the CMake build by linking to the standard c++ library
when building the stats library as it is a requirement.  The normal library
does not have this requirement and its link phase is left alone.

Differential Revision: http://reviews.llvm.org/D11759

Modified:
    openmp/trunk/runtime/CMakeLists.txt
    openmp/trunk/runtime/src/CMakeLists.txt
    openmp/trunk/runtime/src/kmp_barrier.cpp
    openmp/trunk/runtime/src/kmp_cancel.cpp
    openmp/trunk/runtime/src/kmp_csupport.c
    openmp/trunk/runtime/src/kmp_dispatch.cpp
    openmp/trunk/runtime/src/kmp_runtime.c
    openmp/trunk/runtime/src/kmp_sched.cpp
    openmp/trunk/runtime/src/kmp_stats.cpp
    openmp/trunk/runtime/src/kmp_stats.h
    openmp/trunk/runtime/src/kmp_tasking.c
    openmp/trunk/runtime/src/z_Linux_util.c

Modified: openmp/trunk/runtime/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/CMakeLists.txt?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/CMakeLists.txt (original)
+++ openmp/trunk/runtime/CMakeLists.txt Tue Aug 11 16:36:41 2015
@@ -254,6 +254,10 @@ set(LIBOMP_STATS FALSE CACHE BOOL
 if(LIBOMP_STATS AND (NOT LIBOMP_HAVE_STATS))
   libomp_error_say("Stats-gathering functionality requested but not available")
 endif()
+# The stats functionality requires the std c++ library
+if(LIBOMP_STATS)
+  set(LIBOMP_USE_STDCPPLIB TRUE)
+endif()
 
 # OMPT-support
 # TODO: Make this a real feature check

Modified: openmp/trunk/runtime/src/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/CMakeLists.txt?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/src/CMakeLists.txt (original)
+++ openmp/trunk/runtime/src/CMakeLists.txt Tue Aug 11 16:36:41 2015
@@ -149,7 +149,10 @@ endif()
 # Remove any cmake-automatic linking of the standard C++ library.
 # We neither need (nor want) the standard C++ library dependency even though we compile c++ files.
 if(NOT ${LIBOMP_USE_STDCPPLIB})
+  set(LIBOMP_LINKER_LANGUAGE C)
   set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES)
+else()
+  set(LIBOMP_LINKER_LANGUAGE CXX)
 endif()
 
 # Add the OpenMP library
@@ -158,7 +161,7 @@ add_library(omp SHARED ${LIBOMP_SOURCE_F
 set_target_properties(omp PROPERTIES
   PREFIX "" SUFFIX "" OUTPUT_NAME "${LIBOMP_LIB_FILE}"
   LINK_FLAGS "${LIBOMP_CONFIGURED_LDFLAGS}"
-  LINKER_LANGUAGE C # use C Compiler for linking step
+  LINKER_LANGUAGE ${LIBOMP_LINKER_LANGUAGE}
   SKIP_BUILD_RPATH true # have Mac linker -install_name just be "-install_name libomp.dylib"
 )
 

Modified: openmp/trunk/runtime/src/kmp_barrier.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_barrier.cpp?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_barrier.cpp (original)
+++ openmp/trunk/runtime/src/kmp_barrier.cpp Tue Aug 11 16:36:41 2015
@@ -46,7 +46,7 @@ __kmp_linear_barrier_gather(enum barrier
                             void (*reduce)(void *, void *)
                             USE_ITT_BUILD_ARG(void * itt_sync_obj) )
 {
-    KMP_TIME_BLOCK(KMP_linear_gather);
+    KMP_TIME_DEVELOPER_BLOCK(KMP_linear_gather);
     register kmp_team_t *team = this_thr->th.th_team;
     register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
     register kmp_info_t **other_threads = team->t.t_threads;
@@ -123,7 +123,7 @@ __kmp_linear_barrier_release(enum barrie
                              int propagate_icvs
                              USE_ITT_BUILD_ARG(void *itt_sync_obj) )
 {
-    KMP_TIME_BLOCK(KMP_linear_release);
+    KMP_TIME_DEVELOPER_BLOCK(KMP_linear_release);
     register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
     register kmp_team_t *team;
 
@@ -141,17 +141,18 @@ __kmp_linear_barrier_release(enum barrie
 
         if (nproc > 1) {
 #if KMP_BARRIER_ICV_PUSH
-            KMP_START_EXPLICIT_TIMER(USER_icv_copy);
-            if (propagate_icvs) {
-                ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
-                for (i=1; i<nproc; ++i) {
-                    __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i], team, i, FALSE);
-                    ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
-                                   &team->t.t_implicit_task_taskdata[0].td_icvs);
+            {
+                KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
+                if (propagate_icvs) {
+                    ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
+                    for (i=1; i<nproc; ++i) {
+                        __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i], team, i, FALSE);
+                        ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
+                                       &team->t.t_implicit_task_taskdata[0].td_icvs);
+                    }
+                    ngo_sync();
                 }
-                ngo_sync();
             }
-            KMP_STOP_EXPLICIT_TIMER(USER_icv_copy);
 #endif // KMP_BARRIER_ICV_PUSH
 
             // Now, release all of the worker threads
@@ -217,7 +218,7 @@ __kmp_tree_barrier_gather(enum barrier_t
                           void (*reduce)(void *, void *)
                           USE_ITT_BUILD_ARG(void *itt_sync_obj) )
 {
-    KMP_TIME_BLOCK(KMP_tree_gather);
+    KMP_TIME_DEVELOPER_BLOCK(KMP_tree_gather);
     register kmp_team_t *team = this_thr->th.th_team;
     register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
     register kmp_info_t **other_threads = team->t.t_threads;
@@ -312,7 +313,7 @@ __kmp_tree_barrier_release(enum barrier_
                            int propagate_icvs
                            USE_ITT_BUILD_ARG(void *itt_sync_obj) )
 {
-    KMP_TIME_BLOCK(KMP_tree_release);
+    KMP_TIME_DEVELOPER_BLOCK(KMP_tree_release);
     register kmp_team_t *team;
     register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
     register kmp_uint32 nproc;
@@ -381,14 +382,15 @@ __kmp_tree_barrier_release(enum barrier_
 #endif /* KMP_CACHE_MANAGE */
 
 #if KMP_BARRIER_ICV_PUSH
-            KMP_START_EXPLICIT_TIMER(USER_icv_copy);
-            if (propagate_icvs) {
-                __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid],
-                                         team, child_tid, FALSE);
-                copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
-                          &team->t.t_implicit_task_taskdata[0].td_icvs);
+            {
+                KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
+                if (propagate_icvs) {
+                    __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid],
+                                             team, child_tid, FALSE);
+                    copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
+                              &team->t.t_implicit_task_taskdata[0].td_icvs);
+                }
             }
-            KMP_STOP_EXPLICIT_TIMER(USER_icv_copy);
 #endif // KMP_BARRIER_ICV_PUSH
             KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
                           "go(%p): %u => %u\n", gtid, team->t.t_id, tid,
@@ -414,7 +416,7 @@ __kmp_hyper_barrier_gather(enum barrier_
                            void (*reduce)(void *, void *)
                            USE_ITT_BUILD_ARG(void *itt_sync_obj) )
 {
-    KMP_TIME_BLOCK(KMP_hyper_gather);
+    KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_gather);
     register kmp_team_t *team = this_thr->th.th_team;
     register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
     register kmp_info_t **other_threads = team->t.t_threads;
@@ -520,7 +522,7 @@ __kmp_hyper_barrier_release(enum barrier
                             int propagate_icvs
                             USE_ITT_BUILD_ARG(void *itt_sync_obj) )
 {
-    KMP_TIME_BLOCK(KMP_hyper_release);
+    KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_release);
     register kmp_team_t    *team;
     register kmp_bstate_t  *thr_bar       = & this_thr -> th.th_bar[ bt ].bb;
     register kmp_info_t   **other_threads;
@@ -725,7 +727,7 @@ __kmp_hierarchical_barrier_gather(enum b
                                   int gtid, int tid, void (*reduce) (void *, void *)
                                   USE_ITT_BUILD_ARG(void * itt_sync_obj) )
 {
-    KMP_TIME_BLOCK(KMP_hier_gather);
+    KMP_TIME_DEVELOPER_BLOCK(KMP_hier_gather);
     register kmp_team_t *team = this_thr->th.th_team;
     register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
     register kmp_uint32 nproc = this_thr->th.th_team_nproc;
@@ -853,7 +855,7 @@ __kmp_hierarchical_barrier_release(enum
                                    int propagate_icvs
                                    USE_ITT_BUILD_ARG(void * itt_sync_obj) )
 {
-    KMP_TIME_BLOCK(KMP_hier_release);
+    KMP_TIME_DEVELOPER_BLOCK(KMP_hier_release);
     register kmp_team_t *team;
     register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
     register kmp_uint32 nproc;
@@ -1035,7 +1037,7 @@ int
 __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
               void *reduce_data, void (*reduce)(void *, void *))
 {
-    KMP_TIME_BLOCK(KMP_barrier);
+    KMP_TIME_DEVELOPER_BLOCK(KMP_barrier);
     register int tid = __kmp_tid_from_gtid(gtid);
     register kmp_info_t *this_thr = __kmp_threads[gtid];
     register kmp_team_t *team = this_thr->th.th_team;
@@ -1294,7 +1296,7 @@ __kmp_barrier(enum barrier_type bt, int
 void
 __kmp_end_split_barrier(enum barrier_type bt, int gtid)
 {
-    KMP_TIME_BLOCK(KMP_end_split_barrier);
+    KMP_TIME_DEVELOPER_BLOCK(KMP_end_split_barrier);
     int tid = __kmp_tid_from_gtid(gtid);
     kmp_info_t *this_thr = __kmp_threads[gtid];
     kmp_team_t *team = this_thr->th.th_team;
@@ -1335,7 +1337,7 @@ __kmp_end_split_barrier(enum barrier_typ
 void
 __kmp_join_barrier(int gtid)
 {
-    KMP_TIME_BLOCK(KMP_join_barrier);
+    KMP_TIME_DEVELOPER_BLOCK(KMP_join_barrier);
     register kmp_info_t *this_thr = __kmp_threads[gtid];
     register kmp_team_t *team;
     register kmp_uint nproc;
@@ -1533,7 +1535,7 @@ __kmp_join_barrier(int gtid)
 void
 __kmp_fork_barrier(int gtid, int tid)
 {
-    KMP_TIME_BLOCK(KMP_fork_barrier);
+    KMP_TIME_DEVELOPER_BLOCK(KMP_fork_barrier);
     kmp_info_t *this_thr = __kmp_threads[gtid];
     kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
 #if USE_ITT_BUILD
@@ -1648,15 +1650,16 @@ __kmp_fork_barrier(int gtid, int tid)
        this data before this function is called. We cannot modify __kmp_fork_call() to look at
        the fixed ICVs in the master's thread struct, because it is not always the case that the
        threads arrays have been allocated when __kmp_fork_call() is executed. */
-    KMP_START_EXPLICIT_TIMER(USER_icv_copy);
-    if (!KMP_MASTER_TID(tid)) {  // master thread already has ICVs
-        // Copy the initial ICVs from the master's thread struct to the implicit task for this tid.
-        KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
-        __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
-        copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
-                  &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs);
+    {
+        KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
+        if (!KMP_MASTER_TID(tid)) {  // master thread already has ICVs
+            // Copy the initial ICVs from the master's thread struct to the implicit task for this tid.
+            KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
+            __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
+            copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
+                      &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs);
+        }
     }
-    KMP_STOP_EXPLICIT_TIMER(USER_icv_copy);
 #endif // KMP_BARRIER_ICV_PULL
 
     if (__kmp_tasking_mode != tskm_immediate_exec) {
@@ -1702,7 +1705,7 @@ __kmp_fork_barrier(int gtid, int tid)
 void
 __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc )
 {
-    KMP_TIME_BLOCK(KMP_setup_icv_copy);
+    KMP_TIME_DEVELOPER_BLOCK(KMP_setup_icv_copy);
 
     KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
     KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);

Modified: openmp/trunk/runtime/src/kmp_cancel.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_cancel.cpp?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_cancel.cpp (original)
+++ openmp/trunk/runtime/src/kmp_cancel.cpp Tue Aug 11 16:36:41 2015
@@ -58,7 +58,7 @@ kmp_int32 __kmpc_cancel(ident_t* loc_ref
                 break;
             }
         case cancel_taskgroup:
-            // cancellation requests for parallel and worksharing constructs
+            // cancellation requests for a task group
             // are handled through the taskgroup structure
             {
                 kmp_taskdata_t*  task; 
@@ -141,7 +141,7 @@ kmp_int32 __kmpc_cancellationpoint(ident
                 break;
             }
         case cancel_taskgroup:
-            // cancellation requests for parallel and worksharing constructs
+            // cancellation requests for a task group
             // are handled through the taskgroup structure
             {
                 kmp_taskdata_t*  task; 

Modified: openmp/trunk/runtime/src/kmp_csupport.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_csupport.c?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_csupport.c (original)
+++ openmp/trunk/runtime/src/kmp_csupport.c Tue Aug 11 16:36:41 2015
@@ -280,9 +280,21 @@ Do the actual fork and call the microtas
 void
 __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
 {
-  KMP_STOP_EXPLICIT_TIMER(OMP_serial);
-  KMP_COUNT_BLOCK(OMP_PARALLEL);
   int         gtid = __kmp_entry_gtid();
+
+#if (KMP_STATS_ENABLED)  
+  int inParallel = __kmpc_in_parallel(loc);
+  if (inParallel)
+  {
+      KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
+  }
+  else
+  {
+      KMP_STOP_EXPLICIT_TIMER(OMP_serial);
+      KMP_COUNT_BLOCK(OMP_PARALLEL);
+  }
+#endif
+
   // maybe to save thr_state is enough here
   {
     va_list     ap;
@@ -329,7 +341,10 @@ __kmpc_fork_call(ident_t *loc, kmp_int32
     }
 #endif
   }
-  KMP_START_EXPLICIT_TIMER(OMP_serial);
+#if (KMP_STATS_ENABLED)  
+  if (!inParallel)
+      KMP_START_EXPLICIT_TIMER(OMP_serial);
+#endif
 }
 
 #if OMP_40_ENABLED
@@ -370,6 +385,8 @@ __kmpc_fork_teams(ident_t *loc, kmp_int3
     va_list     ap;
     va_start(   ap, microtask );
 
+    KMP_COUNT_BLOCK(OMP_TEAMS);
+
     // remember teams entry point and nesting level
     this_thr->th.th_teams_microtask = microtask;
     this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host
@@ -715,8 +732,10 @@ __kmpc_master(ident_t *loc, kmp_int32 gl
     if( ! TCR_4( __kmp_init_parallel ) )
         __kmp_parallel_initialize();
 
-    if( KMP_MASTER_GTID( global_tid ))
+    if( KMP_MASTER_GTID( global_tid )) {
+        KMP_START_EXPLICIT_TIMER(OMP_master);
         status = 1;
+    }
 
 #if OMPT_SUPPORT && OMPT_TRACE
     if (status) {
@@ -764,6 +783,7 @@ __kmpc_end_master(ident_t *loc, kmp_int3
     KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) );
 
     KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
+    KMP_STOP_EXPLICIT_TIMER(OMP_master);
 
 #if OMPT_SUPPORT && OMPT_TRACE
     kmp_info_t  *this_thr        = __kmp_threads[ global_tid ];
@@ -1386,6 +1406,9 @@ __kmpc_single(ident_t *loc, kmp_int32 gl
 {
     KMP_COUNT_BLOCK(OMP_SINGLE);
     kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
+    if(rc == TRUE) {
+        KMP_START_EXPLICIT_TIMER(OMP_single);
+    }
 
 #if OMPT_SUPPORT && OMPT_TRACE
     kmp_info_t *this_thr        = __kmp_threads[ global_tid ];
@@ -1427,6 +1450,7 @@ void
 __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
 {
     __kmp_exit_single( global_tid );
+    KMP_STOP_EXPLICIT_TIMER(OMP_single);
 
 #if OMPT_SUPPORT && OMPT_TRACE
     kmp_info_t *this_thr        = __kmp_threads[ global_tid ];
@@ -2191,7 +2215,6 @@ int
 __kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
 {
     KMP_COUNT_BLOCK(OMP_test_lock);
-    KMP_TIME_BLOCK(OMP_test_lock);
 
 #if KMP_USE_DYNAMIC_LOCK
     int rc;

Modified: openmp/trunk/runtime/src/kmp_dispatch.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_dispatch.cpp?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_dispatch.cpp (original)
+++ openmp/trunk/runtime/src/kmp_dispatch.cpp Tue Aug 11 16:36:41 2015
@@ -670,6 +670,7 @@ __kmp_dispatch_init(
     } else {
         pr->ordered = FALSE;
     }
+
     if ( schedule == kmp_sch_static ) {
         schedule = __kmp_static;
     } else {
@@ -761,6 +762,19 @@ __kmp_dispatch_init(
         tc = 0;                    // zero-trip
     }
 
+    // Any half-decent optimizer will remove this test when the blocks are empty since the macros expand to nothing
+    // when statistics are disabled.
+    if (schedule == __kmp_static)
+    {
+        KMP_COUNT_BLOCK(OMP_FOR_static);
+        KMP_COUNT_VALUE(FOR_static_iterations, tc);
+    }
+    else
+    {
+        KMP_COUNT_BLOCK(OMP_FOR_dynamic);
+        KMP_COUNT_VALUE(FOR_dynamic_iterations, tc);
+    }
+
     pr->u.p.lb = lb;
     pr->u.p.ub = ub;
     pr->u.p.st = st;
@@ -1384,6 +1398,11 @@ __kmp_dispatch_next(
     static const int ___kmp_size_type = sizeof( UT );
 #endif
 
+    // This is potentially slightly misleading, schedule(runtime) will appear here even if the actual runtme schedule
+    // is static. (Which points out a disadavantage of schedule(runtime): even when static scheduling is used it costs
+    // more than a compile time choice to use static scheduling would.)
+    KMP_TIME_BLOCK(FOR_dynamic_scheduling);
+
     int                                   status;
     dispatch_private_info_template< T > * pr;
     kmp_info_t                          * th   = __kmp_threads[ gtid ];
@@ -2164,7 +2183,6 @@ __kmp_dist_get_bounds(
     T                                *pupper,
     typename traits_t< T >::signed_t  incr
 ) {
-    KMP_COUNT_BLOCK(OMP_DISTR_FOR_dynamic);
     typedef typename traits_t< T >::unsigned_t  UT;
     typedef typename traits_t< T >::signed_t    ST;
     register kmp_uint32  team_id;
@@ -2222,6 +2240,7 @@ __kmp_dist_get_bounds(
     } else {
         trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
     }
+
     if( trip_count <= nteams ) {
         KMP_DEBUG_ASSERT(
             __kmp_static == kmp_sch_static_greedy || \
@@ -2297,7 +2316,6 @@ void
 __kmpc_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
                         kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
 {
-    KMP_COUNT_BLOCK(OMP_FOR_dynamic);
     KMP_DEBUG_ASSERT( __kmp_init_serial );
     __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
 }
@@ -2308,7 +2326,6 @@ void
 __kmpc_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
                         kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
 {
-    KMP_COUNT_BLOCK(OMP_FOR_dynamic);
     KMP_DEBUG_ASSERT( __kmp_init_serial );
     __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
 }
@@ -2321,7 +2338,6 @@ __kmpc_dispatch_init_8( ident_t *loc, km
                         kmp_int64 lb, kmp_int64 ub,
                         kmp_int64 st, kmp_int64 chunk )
 {
-    KMP_COUNT_BLOCK(OMP_FOR_dynamic);
     KMP_DEBUG_ASSERT( __kmp_init_serial );
     __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
 }
@@ -2334,7 +2350,6 @@ __kmpc_dispatch_init_8u( ident_t *loc, k
                          kmp_uint64 lb, kmp_uint64 ub,
                          kmp_int64 st, kmp_int64 chunk )
 {
-    KMP_COUNT_BLOCK(OMP_FOR_dynamic);
     KMP_DEBUG_ASSERT( __kmp_init_serial );
     __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
 }
@@ -2352,7 +2367,6 @@ void
 __kmpc_dist_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
     kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
 {
-    KMP_COUNT_BLOCK(OMP_FOR_dynamic);
     KMP_DEBUG_ASSERT( __kmp_init_serial );
     __kmp_dist_get_bounds< kmp_int32 >( loc, gtid, p_last, &lb, &ub, st );
     __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
@@ -2362,7 +2376,6 @@ void
 __kmpc_dist_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
     kmp_int32 *p_last, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
 {
-    KMP_COUNT_BLOCK(OMP_FOR_dynamic);
     KMP_DEBUG_ASSERT( __kmp_init_serial );
     __kmp_dist_get_bounds< kmp_uint32 >( loc, gtid, p_last, &lb, &ub, st );
     __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
@@ -2372,7 +2385,6 @@ void
 __kmpc_dist_dispatch_init_8( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
     kmp_int32 *p_last, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk )
 {
-    KMP_COUNT_BLOCK(OMP_FOR_dynamic);
     KMP_DEBUG_ASSERT( __kmp_init_serial );
     __kmp_dist_get_bounds< kmp_int64 >( loc, gtid, p_last, &lb, &ub, st );
     __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
@@ -2382,7 +2394,6 @@ void
 __kmpc_dist_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
     kmp_int32 *p_last, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk )
 {
-    KMP_COUNT_BLOCK(OMP_FOR_dynamic);
     KMP_DEBUG_ASSERT( __kmp_init_serial );
     __kmp_dist_get_bounds< kmp_uint64 >( loc, gtid, p_last, &lb, &ub, st );
     __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );

Modified: openmp/trunk/runtime/src/kmp_runtime.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_runtime.c?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_runtime.c (original)
+++ openmp/trunk/runtime/src/kmp_runtime.c Tue Aug 11 16:36:41 2015
@@ -1495,7 +1495,8 @@ __kmp_fork_call(
     kmp_hot_team_ptr_t **p_hot_teams;
 #endif
     { // KMP_TIME_BLOCK
-    KMP_TIME_BLOCK(KMP_fork_call);
+    KMP_TIME_DEVELOPER_BLOCK(KMP_fork_call);
+    KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
 
     KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid ));
     if ( __kmp_stkpadding > 0 &&  __kmp_root[gtid] != NULL ) {
@@ -1620,12 +1621,14 @@ __kmp_fork_call(
             }
 #endif
 
-            KMP_TIME_BLOCK(OMP_work);
-            __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
+            {
+                KMP_TIME_BLOCK(OMP_work);
+                __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
 #if OMPT_SUPPORT
-                , exit_runtime_p
+                                        , exit_runtime_p
 #endif
-                );
+                                        );
+            }
 
 #if OMPT_SUPPORT
             if (ompt_status & ompt_status_track) {
@@ -2224,8 +2227,8 @@ __kmp_fork_call(
     }  // END of timer KMP_fork_call block
 
     {
-        //KMP_TIME_BLOCK(OMP_work);
-        KMP_TIME_BLOCK(USER_master_invoke);
+        KMP_TIME_BLOCK(OMP_work);
+        // KMP_TIME_DEVELOPER_BLOCK(USER_master_invoke);
         if (! team->t.t_invoke( gtid )) {
             KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
         }
@@ -2280,7 +2283,7 @@ __kmp_join_call(ident_t *loc, int gtid,
 #endif /* OMP_40_ENABLED */
 )
 {
-    KMP_TIME_BLOCK(KMP_join_call);
+    KMP_TIME_DEVELOPER_BLOCK(KMP_join_call);
     kmp_team_t     *team;
     kmp_team_t     *parent_team;
     kmp_info_t     *master_th;
@@ -2582,6 +2585,7 @@ __kmp_set_num_threads( int new_nth, int
     else if (new_nth > __kmp_max_nth)
         new_nth = __kmp_max_nth;
 
+    KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
     thread = __kmp_threads[gtid];
 
     __kmp_save_internal_controls( thread );
@@ -4790,7 +4794,7 @@ __kmp_allocate_team( kmp_root_t *root, i
     kmp_internal_control_t *new_icvs,
     int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
 {
-    KMP_TIME_BLOCK(KMP_allocate_team);
+    KMP_TIME_DEVELOPER_BLOCK(KMP_allocate_team);
     int f;
     kmp_team_t *team;
     int use_hot_team = ! root->r.r_active;
@@ -5577,12 +5581,12 @@ __kmp_launch_thread( kmp_info_t *this_th
                 }
 #endif
 
-                KMP_STOP_EXPLICIT_TIMER(USER_launch_thread_loop);
+                KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
                 {
-                    KMP_TIME_BLOCK(USER_worker_invoke);
+                    KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke);
                     rc = (*pteam)->t.t_invoke( gtid );
                 }
-                KMP_START_EXPLICIT_TIMER(USER_launch_thread_loop);
+                KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
                 KMP_ASSERT( rc );
 
 #if OMPT_SUPPORT
@@ -6910,12 +6914,15 @@ __kmp_invoke_task_func( int gtid )
 #endif
 #endif
 
-    rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
-      gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv
+    {
+        KMP_TIME_BLOCK(OMP_work);
+        rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
+                                     gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv
 #if OMPT_SUPPORT
-      , exit_runtime_p
+                                     , exit_runtime_p
 #endif
-      );
+                                     );
+    }
 
 #if OMPT_SUPPORT && OMPT_TRACE
     if (ompt_status & ompt_status_track) {

Modified: openmp/trunk/runtime/src/kmp_sched.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_sched.cpp?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_sched.cpp (original)
+++ openmp/trunk/runtime/src/kmp_sched.cpp Tue Aug 11 16:36:41 2015
@@ -84,6 +84,8 @@ __kmp_for_static_init(
     typename traits_t< T >::signed_t  chunk
 ) {
     KMP_COUNT_BLOCK(OMP_FOR_static);
+    KMP_TIME_BLOCK (FOR_static_scheduling);
+
     typedef typename traits_t< T >::unsigned_t  UT;
     typedef typename traits_t< T >::signed_t    ST;
     /*  this all has to be changed back to TID and such.. */
@@ -151,6 +153,7 @@ __kmp_for_static_init(
                 team_info->microtask);
         }
 #endif
+        KMP_COUNT_VALUE (FOR_static_iterations, 0);
         return;
     }
 
@@ -246,6 +249,7 @@ __kmp_for_static_init(
             __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc );
         }
     }
+    KMP_COUNT_VALUE (FOR_static_iterations, trip_count);
 
     /* compute remaining parameters */
     switch ( schedtype ) {
@@ -372,7 +376,7 @@ __kmp_dist_for_static_init(
     typename traits_t< T >::signed_t  incr,
     typename traits_t< T >::signed_t  chunk
 ) {
-    KMP_COUNT_BLOCK(OMP_DISTR_FOR_static);
+    KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
     typedef typename traits_t< T >::unsigned_t  UT;
     typedef typename traits_t< T >::signed_t    ST;
     register kmp_uint32  tid;
@@ -437,6 +441,7 @@ __kmp_dist_for_static_init(
     } else {
         trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
     }
+
     *pstride = *pupper - *plower;  // just in case (can be unused)
     if( trip_count <= nteams ) {
         KMP_DEBUG_ASSERT(

Modified: openmp/trunk/runtime/src/kmp_stats.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_stats.cpp?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_stats.cpp (original)
+++ openmp/trunk/runtime/src/kmp_stats.cpp Tue Aug 11 16:36:41 2015
@@ -521,16 +521,14 @@ void kmp_stats_output_module::outputStat
 
         // Special handling for synthesized statistics.
         // These just have to be coded specially here for now. 
-        // At present we only have one: the total parallel work done in each thread.
+        // At present we only have a few: 
+        // The total parallel work done in each thread.
         // The variance here makes it easy to see load imbalance over the whole program (though, of course,
         // it's possible to have a code with awful load balance in every parallel region but perfect load
         // balance oever the whole program.)
+        // The time spent in barriers in each thread.
         allStats[TIMER_Total_work].addSample ((*it)->getTimer(TIMER_OMP_work)->getTotal());
 
-        // Time waiting for work (synthesized)
-        if ((t != 0) || !timeStat::workerOnly(timer_e(TIMER_OMP_await_work)))
-            allStats[TIMER_Total_await_work].addSample ((*it)->getTimer(TIMER_OMP_await_work)->getTotal());
-
         // Time in explicit barriers.
         allStats[TIMER_Total_barrier].addSample ((*it)->getTimer(TIMER_OMP_barrier)->getTotal());
 

Modified: openmp/trunk/runtime/src/kmp_stats.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_stats.h?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_stats.h (original)
+++ openmp/trunk/runtime/src/kmp_stats.h Tue Aug 11 16:36:41 2015
@@ -31,6 +31,11 @@
 #include <new> // placement new
 #include "kmp_stats_timing.h"
 
+/*
+ * Enable developer statistics here if you want them. They are more detailed than is useful for application characterisation and
+ * are intended for the runtime library developer.
+ */
+// #define KMP_DEVELOPER_STATS 1
 
 /*!
  * @ingroup STATS_GATHERING
@@ -56,7 +61,7 @@ class stats_flags_e {
  * Each thread accumulates its own count, at the end of execution the counts are aggregated treating each thread
  * as a separate measurement. (Unless onlyInMaster is set, in which case there's only a single measurement).
  * The min,mean,max are therefore the values for the threads.
- * Adding the counter here and then putting in a KMP_BLOCK_COUNTER(name) is all you need to do.
+ * Adding the counter here and then putting a KMP_BLOCK_COUNTER(name) at the point you want to count is all you need to do.
  * All of the tables and printing is generated from this macro.
  * Format is "macro(name, flags, arg)"
  *
@@ -64,21 +69,30 @@ class stats_flags_e {
 */
 #define KMP_FOREACH_COUNTER(macro, arg)                         \
     macro (OMP_PARALLEL, stats_flags_e::onlyInMaster, arg)      \
+    macro (OMP_NESTED_PARALLEL, 0, arg)                         \
     macro (OMP_FOR_static, 0, arg)                              \
     macro (OMP_FOR_dynamic, 0, arg)                             \
-    macro (OMP_DISTR_FOR_static, 0, arg)                        \
-    macro (OMP_DISTR_FOR_dynamic, 0, arg)                       \
+    macro (OMP_DISTRIBUTE, 0, arg)                              \
     macro (OMP_BARRIER, 0, arg)                                 \
     macro (OMP_CRITICAL,0, arg)                                 \
     macro (OMP_SINGLE, 0, arg)                                  \
     macro (OMP_MASTER, 0, arg)                                  \
+    macro (OMP_TEAMS, 0, arg)                                   \
     macro (OMP_set_lock, 0, arg)                                \
     macro (OMP_test_lock, 0, arg)                               \
-    macro (OMP_test_lock_failure, 0, arg)                       \
     macro (REDUCE_wait, 0, arg)                                 \
     macro (REDUCE_nowait, 0, arg)                               \
+    macro (OMP_TASKYIELD, 0, arg)                               \
+    macro (TASK_executed, 0, arg)                               \
+    macro (TASK_cancelled, 0, arg)                              \
+    macro (TASK_stolen, 0, arg)                                 \
     macro (LAST,0,arg)
 
+// OMP_PARALLEL_args      -- the number of arguments passed to a fork
+// FOR_static_iterations  -- Number of available parallel chunks of work in a static for
+// FOR_dynamic_iterations -- Number of available parallel chunks of work in a dynamic for
+//                           Both adjust for any chunking, so if there were an iteration count of 20 but a chunk size of 10, we'd record 2.
+
 /*!
  * \brief Add new timers under KMP_FOREACH_TIMER() macro in kmp_stats.h
  *
@@ -87,72 +101,45 @@ class stats_flags_e {
  *
  * \details A timer collects multiple samples of some count in each thread and then finally aggregates over all the threads.
  * The count is normally a time (in ticks), hence the name "timer". (But can be any value, so we use this for "number of arguments passed to fork"
- * as well, or we could collect "loop iteration count" if we wanted to).
+ * as well).
  * For timers the threads are not significant, it's the individual observations that count, so the statistics are at that level.
  * Format is "macro(name, flags, arg)"
  *
- * @ingroup STATS_GATHERING
+ * @ingroup STATS_GATHERING2
  */
-#define KMP_FOREACH_TIMER(macro, arg)                                       \
-    macro (OMP_PARALLEL_args, stats_flags_e::onlyInMaster | stats_flags_e::noUnits, arg) \
-    macro (FOR_static_iterations, stats_flags_e::onlyInMaster | stats_flags_e::noUnits, arg) \
-    macro (FOR_dynamic_iterations, stats_flags_e::noUnits, arg)         \
+#define KMP_FOREACH_TIMER(macro, arg)                                   \
     macro (OMP_start_end, stats_flags_e::onlyInMaster, arg)             \
     macro (OMP_serial, stats_flags_e::onlyInMaster, arg)                \
     macro (OMP_work, 0, arg)                                            \
     macro (Total_work, stats_flags_e::synthesized, arg)                 \
-    macro (OMP_await_work, stats_flags_e::notInMaster, arg)             \
-    macro (Total_await_work, stats_flags_e::synthesized, arg)           \
     macro (OMP_barrier, 0, arg)                                         \
     macro (Total_barrier, stats_flags_e::synthesized, arg)              \
-    macro (OMP_test_lock, 0, arg)                                       \
+    macro (FOR_static_iterations, stats_flags_e::noUnits, arg)          \
     macro (FOR_static_scheduling, 0, arg)                               \
+    macro (FOR_dynamic_iterations, stats_flags_e::noUnits, arg)         \
     macro (FOR_dynamic_scheduling, 0, arg)                              \
-    macro (KMP_fork_call, 0, arg) \
-    macro (KMP_join_call, 0, arg) \
-    macro (KMP_fork_barrier, stats_flags_e::logEvent, arg)              \
-    macro (KMP_join_barrier, stats_flags_e::logEvent, arg)              \
-    macro (KMP_barrier, 0, arg)                   \
-    macro (KMP_end_split_barrier, 0, arg) \
-    macro (KMP_wait_sleep, 0, arg) \
-    macro (KMP_release, 0, arg)                   \
-    macro (KMP_hier_gather, 0, arg) \
-    macro (KMP_hier_release, 0, arg) \
-    macro (KMP_hyper_gather,  stats_flags_e::logEvent, arg) \
-    macro (KMP_hyper_release,  stats_flags_e::logEvent, arg) \
-    macro (KMP_linear_gather, 0, arg)                                   \
-    macro (KMP_linear_release, 0, arg)                                  \
-    macro (KMP_tree_gather, 0, arg)                                     \
-    macro (KMP_tree_release, 0, arg)                                    \
-    macro (USER_master_invoke, stats_flags_e::logEvent, arg) \
-    macro (USER_worker_invoke, stats_flags_e::logEvent, arg) \
-    macro (USER_resume, stats_flags_e::logEvent, arg) \
-    macro (USER_suspend, stats_flags_e::logEvent, arg) \
-    macro (USER_launch_thread_loop, stats_flags_e::logEvent, arg) \
-    macro (KMP_allocate_team, 0, arg) \
-    macro (KMP_setup_icv_copy, 0, arg) \
-    macro (USER_icv_copy, 0, arg) \
+    macro (TASK_execution, 0, arg)                                      \
+    macro (OMP_set_numthreads, stats_flags_e::noUnits, arg)             \
+    macro (OMP_PARALLEL_args,  stats_flags_e::noUnits, arg)             \
+    macro (OMP_single, 0, arg)                                          \
+    macro (OMP_master, 0, arg)                                          \
+    KMP_FOREACH_DEVELOPER_TIMER(macro, arg)                             \
     macro (LAST,0, arg)
 
 
-
-// OMP_PARALLEL_args      -- the number of arguments passed to a fork
-// FOR_static_iterations  -- Number of available parallel chunks of work in a static for
-// FOR_dynamic_iterations -- Number of available parallel chunks of work in a dynamic for
-//                           Both adjust for any chunking, so if there were an iteration count of 20 but a chunk size of 10, we'd record 2.
-// OMP_serial             -- thread zero time executing serial code
 // OMP_start_end          -- time from when OpenMP is initialized until the stats are printed at exit
+// OMP_serial             -- thread zero time executing serial code
 // OMP_work               -- elapsed time in code dispatched by a fork (measured in the thread)
 // Total_work             -- a synthesized statistic summarizing how much parallel work each thread executed.
 // OMP_barrier            -- time at "real" barriers
 // Total_barrier          -- a synthesized statistic summarizing how much time at real barriers in each thread
-// OMP_set_lock           -- time in lock setting
-// OMP_test_lock          -- time in testing a lock
-// LOCK_WAIT              -- time waiting for a lock
 // FOR_static_scheduling  -- time spent doing scheduling for a static "for"
 // FOR_dynamic_scheduling -- time spent doing scheduling for a dynamic "for"
-// KMP_wait_sleep         -- time in __kmp_wait_sleep
-// KMP_release            -- time in __kmp_release
+
+#if (KMP_DEVELOPER_STATS)
+// Timers which are of interest tio runtime library developers, not end users.
+// THese have to be explicitly enabled in addition to the other stats.
+
 // KMP_fork_barrier       -- time in __kmp_fork_barrier
 // KMP_join_barrier       -- time in __kmp_join_barrier
 // KMP_barrier            -- time in __kmp_barrier
@@ -165,6 +152,32 @@ class stats_flags_e {
 // KMP_tree_release       -- time in __kmp_tree_barrier_release
 // KMP_hyper_gather       -- time in __kmp_hyper_barrier_gather
 // KMP_hyper_release      -- time in __kmp_hyper_barrier_release
+# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)                        \
+    macro (KMP_fork_call, 0, arg)                                       \
+    macro (KMP_join_call, 0, arg)                                       \
+    macro (KMP_fork_barrier, stats_flags_e::logEvent, arg)              \
+    macro (KMP_join_barrier, stats_flags_e::logEvent, arg)              \
+    macro (KMP_barrier, 0, arg)                                         \
+    macro (KMP_end_split_barrier, 0, arg)                               \
+    macro (KMP_hier_gather, 0, arg)                                     \
+    macro (KMP_hier_release, 0, arg)                                    \
+    macro (KMP_hyper_gather,  stats_flags_e::logEvent, arg)             \
+    macro (KMP_hyper_release,  stats_flags_e::logEvent, arg)            \
+    macro (KMP_linear_gather, 0, arg)                                   \
+    macro (KMP_linear_release, 0, arg)                                  \
+    macro (KMP_tree_gather, 0, arg)                                     \
+    macro (KMP_tree_release, 0, arg)                                    \
+    macro (USER_master_invoke, stats_flags_e::logEvent, arg)            \
+    macro (USER_worker_invoke, stats_flags_e::logEvent, arg)            \
+    macro (USER_resume, stats_flags_e::logEvent, arg)                   \
+    macro (USER_suspend, stats_flags_e::logEvent, arg)                  \
+    macro (USER_launch_thread_loop, stats_flags_e::logEvent, arg)       \
+    macro (KMP_allocate_team, 0, arg)                                   \
+    macro (KMP_setup_icv_copy, 0, arg)                                  \
+    macro (USER_icv_copy, 0, arg)                                       
+#else
+# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
+#endif
 
 /*!
  * \brief Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro.
@@ -182,13 +195,21 @@ class stats_flags_e {
  *
  * @ingroup STATS_GATHERING
 */
-#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg)  \
-    macro(OMP_serial, 0, arg)                   \
-    macro(OMP_start_end, 0, arg)                \
-    macro(USER_icv_copy, 0, arg) \
-    macro(USER_launch_thread_loop, stats_flags_e::logEvent, arg) \
+#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg)          \
+    macro(OMP_serial, 0, arg)                           \
+    macro(OMP_start_end, 0, arg)                        \
+    macro(OMP_single, 0, arg)                           \
+    macro(OMP_master, 0, arg)                           \
+    KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro,arg)     \
     macro(LAST, 0, arg)
 
+#if (KMP_DEVELOPER_STATS)
+# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg)               \
+    macro(USER_launch_thread_loop, stats_flags_e::logEvent, arg)
+#else
+# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg)               
+#endif
+
 #define ENUMERATE(name,ignore,prefix) prefix##name,
 enum timer_e {
     KMP_FOREACH_TIMER(ENUMERATE, TIMER_)
@@ -689,6 +710,21 @@ extern kmp_stats_output_module __kmp_sta
 */
 #define KMP_RESET_STATS()  __kmp_reset_stats()
 
+#if (KMP_DEVELOPER_STATS)
+# define KMP_TIME_DEVELOPER_BLOCK(n)             KMP_TIME_BLOCK(n)
+# define KMP_COUNT_DEVELOPER_VALUE(n,v)          KMP_COUNT_VALUE(n,v)
+# define KMP_COUNT_DEVELOPER_BLOCK(n)            KMP_COUNT_BLOCK(n)
+# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n)   KMP_START_EXPLICIT_TIMER(n)
+# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n)    KMP_STOP_EXPLICIT_TIMER(n)
+#else
+// Null definitions
+# define KMP_TIME_DEVELOPER_BLOCK(n)             ((void)0)
+# define KMP_COUNT_DEVELOPER_VALUE(n,v)          ((void)0)
+# define KMP_COUNT_DEVELOPER_BLOCK(n)            ((void)0)
+# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n)   ((void)0)
+# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n)    ((void)0)
+#endif
+
 #else // KMP_STATS_ENABLED
 
 // Null definitions
@@ -701,6 +737,11 @@ extern kmp_stats_output_module __kmp_sta
 #define KMP_OUTPUT_STATS(heading_string) ((void)0)
 #define KMP_RESET_STATS()  ((void)0)
 
+#define KMP_TIME_DEVELOPER_BLOCK(n)             ((void)0)
+#define KMP_COUNT_DEVELOPER_VALUE(n,v)          ((void)0)
+#define KMP_COUNT_DEVELOPER_BLOCK(n)            ((void)0)
+#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n)   ((void)0)
+#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n)    ((void)0)
 #endif  // KMP_STATS_ENABLED
 
 #endif // KMP_STATS_H

Modified: openmp/trunk/runtime/src/kmp_tasking.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_tasking.c?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_tasking.c (original)
+++ openmp/trunk/runtime/src/kmp_tasking.c Tue Aug 11 16:36:41 2015
@@ -17,6 +17,7 @@
 #include "kmp_i18n.h"
 #include "kmp_itt.h"
 #include "kmp_wait_release.h"
+#include "kmp_stats.h"
 
 #if OMPT_SUPPORT
 #include "ompt-specific.h"
@@ -1136,6 +1137,7 @@ __kmp_invoke_task( kmp_int32 gtid, kmp_t
         kmp_team_t * this_team = this_thr->th.th_team;
         kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
         if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
+            KMP_COUNT_BLOCK(TASK_cancelled);
             // this task belongs to a task group and we need to cancel it
             discard = 1 /* true */;
         }
@@ -1146,6 +1148,8 @@ __kmp_invoke_task( kmp_int32 gtid, kmp_t
     // Thunks generated by gcc take a different argument list.
     //
     if (!discard) {
+        KMP_COUNT_BLOCK(TASK_executed);
+        KMP_TIME_BLOCK (TASK_execution);
 #endif // OMP_40_ENABLED
 #ifdef KMP_GOMP_COMPAT
         if (taskdata->td_flags.native) {
@@ -1356,6 +1360,8 @@ __kmpc_omp_taskyield( ident_t *loc_ref,
     kmp_info_t * thread;
     int thread_finished = FALSE;
 
+    KMP_COUNT_BLOCK(OMP_TASKYIELD);
+
     KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
                   gtid, loc_ref, end_part) );
 
@@ -1648,6 +1654,7 @@ __kmp_steal_task( kmp_info_t *victim, km
 
     __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
 
+    KMP_COUNT_BLOCK(TASK_stolen);
     KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
                   "ntasks=%d head=%u tail=%u\n",
                   gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,

Modified: openmp/trunk/runtime/src/z_Linux_util.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/z_Linux_util.c?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/src/z_Linux_util.c (original)
+++ openmp/trunk/runtime/src/z_Linux_util.c Tue Aug 11 16:36:41 2015
@@ -1688,7 +1688,7 @@ __kmp_suspend_uninitialize_thread( kmp_i
 template <class C>
 static inline void __kmp_suspend_template( int th_gtid, C *flag )
 {
-    KMP_TIME_BLOCK(USER_suspend);
+    KMP_TIME_DEVELOPER_BLOCK(USER_suspend);
     kmp_info_t *th = __kmp_threads[th_gtid];
     int status;
     typename C::flag_t old_spin;
@@ -1826,6 +1826,7 @@ void __kmp_suspend_oncore(int th_gtid, k
 template <class C>
 static inline void __kmp_resume_template( int target_gtid, C *flag )
 {
+    KMP_TIME_DEVELOPER_BLOCK(USER_resume);
     kmp_info_t *th = __kmp_threads[target_gtid];
     int status;
 
@@ -1900,7 +1901,6 @@ void __kmp_resume_oncore(int target_gtid
 void
 __kmp_resume_monitor()
 {
-    KMP_TIME_BLOCK(USER_resume);
     int status;
 #ifdef KMP_DEBUG
     int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;




More information about the Openmp-commits mailing list