[Openmp-commits] [openmp] r244677 - Tidy statistics collection
Jonathan Peyton via Openmp-commits
openmp-commits at lists.llvm.org
Tue Aug 11 14:36:41 PDT 2015
Author: jlpeyton
Date: Tue Aug 11 16:36:41 2015
New Revision: 244677
URL: http://llvm.org/viewvc/llvm-project?rev=244677&view=rev
Log:
Tidy statistics collection
This removes some statistics counters and timers which were not used,
adds new counters and timers for some language features that were not
monitored previously and separates the counters and timers into those
which are of interest for investigating user code and those which are
only of interest to the developer of the runtime itself.
The runtime developer statistics are now ony collected if the
additional #define KMP_DEVELOPER_STATS is set.
Additional user statistics which are now collected include:
* Count of nested parallelism (omp parallel inside a parallel region)
* Count of omp distribute occurrences
* Count of omp teams occurrences
* Counts of task related statistics (taskyield, task execution, task
cancellation, task steal)
* Values passed to omp_set_numtheads
* Time spent in omp single and omp master
None of this affects code compiled without stats gathering enabled,
which is the normal library build mode.
This also fixes the CMake build by linking to the standard c++ library
when building the stats library as it is a requirement. The normal library
does not have this requirement and its link phase is left alone.
Differential Revision: http://reviews.llvm.org/D11759
Modified:
openmp/trunk/runtime/CMakeLists.txt
openmp/trunk/runtime/src/CMakeLists.txt
openmp/trunk/runtime/src/kmp_barrier.cpp
openmp/trunk/runtime/src/kmp_cancel.cpp
openmp/trunk/runtime/src/kmp_csupport.c
openmp/trunk/runtime/src/kmp_dispatch.cpp
openmp/trunk/runtime/src/kmp_runtime.c
openmp/trunk/runtime/src/kmp_sched.cpp
openmp/trunk/runtime/src/kmp_stats.cpp
openmp/trunk/runtime/src/kmp_stats.h
openmp/trunk/runtime/src/kmp_tasking.c
openmp/trunk/runtime/src/z_Linux_util.c
Modified: openmp/trunk/runtime/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/CMakeLists.txt?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/CMakeLists.txt (original)
+++ openmp/trunk/runtime/CMakeLists.txt Tue Aug 11 16:36:41 2015
@@ -254,6 +254,10 @@ set(LIBOMP_STATS FALSE CACHE BOOL
if(LIBOMP_STATS AND (NOT LIBOMP_HAVE_STATS))
libomp_error_say("Stats-gathering functionality requested but not available")
endif()
+# The stats functionality requires the std c++ library
+if(LIBOMP_STATS)
+ set(LIBOMP_USE_STDCPPLIB TRUE)
+endif()
# OMPT-support
# TODO: Make this a real feature check
Modified: openmp/trunk/runtime/src/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/CMakeLists.txt?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/src/CMakeLists.txt (original)
+++ openmp/trunk/runtime/src/CMakeLists.txt Tue Aug 11 16:36:41 2015
@@ -149,7 +149,10 @@ endif()
# Remove any cmake-automatic linking of the standard C++ library.
# We neither need (nor want) the standard C++ library dependency even though we compile c++ files.
if(NOT ${LIBOMP_USE_STDCPPLIB})
+ set(LIBOMP_LINKER_LANGUAGE C)
set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES)
+else()
+ set(LIBOMP_LINKER_LANGUAGE CXX)
endif()
# Add the OpenMP library
@@ -158,7 +161,7 @@ add_library(omp SHARED ${LIBOMP_SOURCE_F
set_target_properties(omp PROPERTIES
PREFIX "" SUFFIX "" OUTPUT_NAME "${LIBOMP_LIB_FILE}"
LINK_FLAGS "${LIBOMP_CONFIGURED_LDFLAGS}"
- LINKER_LANGUAGE C # use C Compiler for linking step
+ LINKER_LANGUAGE ${LIBOMP_LINKER_LANGUAGE}
SKIP_BUILD_RPATH true # have Mac linker -install_name just be "-install_name libomp.dylib"
)
Modified: openmp/trunk/runtime/src/kmp_barrier.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_barrier.cpp?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_barrier.cpp (original)
+++ openmp/trunk/runtime/src/kmp_barrier.cpp Tue Aug 11 16:36:41 2015
@@ -46,7 +46,7 @@ __kmp_linear_barrier_gather(enum barrier
void (*reduce)(void *, void *)
USE_ITT_BUILD_ARG(void * itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_linear_gather);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_linear_gather);
register kmp_team_t *team = this_thr->th.th_team;
register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
register kmp_info_t **other_threads = team->t.t_threads;
@@ -123,7 +123,7 @@ __kmp_linear_barrier_release(enum barrie
int propagate_icvs
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_linear_release);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_linear_release);
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_team_t *team;
@@ -141,17 +141,18 @@ __kmp_linear_barrier_release(enum barrie
if (nproc > 1) {
#if KMP_BARRIER_ICV_PUSH
- KMP_START_EXPLICIT_TIMER(USER_icv_copy);
- if (propagate_icvs) {
- ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
- for (i=1; i<nproc; ++i) {
- __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i], team, i, FALSE);
- ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
- &team->t.t_implicit_task_taskdata[0].td_icvs);
+ {
+ KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
+ if (propagate_icvs) {
+ ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
+ for (i=1; i<nproc; ++i) {
+ __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i], team, i, FALSE);
+ ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
+ &team->t.t_implicit_task_taskdata[0].td_icvs);
+ }
+ ngo_sync();
}
- ngo_sync();
}
- KMP_STOP_EXPLICIT_TIMER(USER_icv_copy);
#endif // KMP_BARRIER_ICV_PUSH
// Now, release all of the worker threads
@@ -217,7 +218,7 @@ __kmp_tree_barrier_gather(enum barrier_t
void (*reduce)(void *, void *)
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_tree_gather);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_tree_gather);
register kmp_team_t *team = this_thr->th.th_team;
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_info_t **other_threads = team->t.t_threads;
@@ -312,7 +313,7 @@ __kmp_tree_barrier_release(enum barrier_
int propagate_icvs
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_tree_release);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_tree_release);
register kmp_team_t *team;
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_uint32 nproc;
@@ -381,14 +382,15 @@ __kmp_tree_barrier_release(enum barrier_
#endif /* KMP_CACHE_MANAGE */
#if KMP_BARRIER_ICV_PUSH
- KMP_START_EXPLICIT_TIMER(USER_icv_copy);
- if (propagate_icvs) {
- __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid],
- team, child_tid, FALSE);
- copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
- &team->t.t_implicit_task_taskdata[0].td_icvs);
+ {
+ KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
+ if (propagate_icvs) {
+ __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid],
+ team, child_tid, FALSE);
+ copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
+ &team->t.t_implicit_task_taskdata[0].td_icvs);
+ }
}
- KMP_STOP_EXPLICIT_TIMER(USER_icv_copy);
#endif // KMP_BARRIER_ICV_PUSH
KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid,
@@ -414,7 +416,7 @@ __kmp_hyper_barrier_gather(enum barrier_
void (*reduce)(void *, void *)
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_hyper_gather);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_gather);
register kmp_team_t *team = this_thr->th.th_team;
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_info_t **other_threads = team->t.t_threads;
@@ -520,7 +522,7 @@ __kmp_hyper_barrier_release(enum barrier
int propagate_icvs
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_hyper_release);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_release);
register kmp_team_t *team;
register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
register kmp_info_t **other_threads;
@@ -725,7 +727,7 @@ __kmp_hierarchical_barrier_gather(enum b
int gtid, int tid, void (*reduce) (void *, void *)
USE_ITT_BUILD_ARG(void * itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_hier_gather);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_hier_gather);
register kmp_team_t *team = this_thr->th.th_team;
register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
register kmp_uint32 nproc = this_thr->th.th_team_nproc;
@@ -853,7 +855,7 @@ __kmp_hierarchical_barrier_release(enum
int propagate_icvs
USE_ITT_BUILD_ARG(void * itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_hier_release);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_hier_release);
register kmp_team_t *team;
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_uint32 nproc;
@@ -1035,7 +1037,7 @@ int
__kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
void *reduce_data, void (*reduce)(void *, void *))
{
- KMP_TIME_BLOCK(KMP_barrier);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_barrier);
register int tid = __kmp_tid_from_gtid(gtid);
register kmp_info_t *this_thr = __kmp_threads[gtid];
register kmp_team_t *team = this_thr->th.th_team;
@@ -1294,7 +1296,7 @@ __kmp_barrier(enum barrier_type bt, int
void
__kmp_end_split_barrier(enum barrier_type bt, int gtid)
{
- KMP_TIME_BLOCK(KMP_end_split_barrier);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_end_split_barrier);
int tid = __kmp_tid_from_gtid(gtid);
kmp_info_t *this_thr = __kmp_threads[gtid];
kmp_team_t *team = this_thr->th.th_team;
@@ -1335,7 +1337,7 @@ __kmp_end_split_barrier(enum barrier_typ
void
__kmp_join_barrier(int gtid)
{
- KMP_TIME_BLOCK(KMP_join_barrier);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_join_barrier);
register kmp_info_t *this_thr = __kmp_threads[gtid];
register kmp_team_t *team;
register kmp_uint nproc;
@@ -1533,7 +1535,7 @@ __kmp_join_barrier(int gtid)
void
__kmp_fork_barrier(int gtid, int tid)
{
- KMP_TIME_BLOCK(KMP_fork_barrier);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_fork_barrier);
kmp_info_t *this_thr = __kmp_threads[gtid];
kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
#if USE_ITT_BUILD
@@ -1648,15 +1650,16 @@ __kmp_fork_barrier(int gtid, int tid)
this data before this function is called. We cannot modify __kmp_fork_call() to look at
the fixed ICVs in the master's thread struct, because it is not always the case that the
threads arrays have been allocated when __kmp_fork_call() is executed. */
- KMP_START_EXPLICIT_TIMER(USER_icv_copy);
- if (!KMP_MASTER_TID(tid)) { // master thread already has ICVs
- // Copy the initial ICVs from the master's thread struct to the implicit task for this tid.
- KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
- __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
- copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
- &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs);
+ {
+ KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
+ if (!KMP_MASTER_TID(tid)) { // master thread already has ICVs
+ // Copy the initial ICVs from the master's thread struct to the implicit task for this tid.
+ KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
+ __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
+ copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
+ &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs);
+ }
}
- KMP_STOP_EXPLICIT_TIMER(USER_icv_copy);
#endif // KMP_BARRIER_ICV_PULL
if (__kmp_tasking_mode != tskm_immediate_exec) {
@@ -1702,7 +1705,7 @@ __kmp_fork_barrier(int gtid, int tid)
void
__kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc )
{
- KMP_TIME_BLOCK(KMP_setup_icv_copy);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_setup_icv_copy);
KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
Modified: openmp/trunk/runtime/src/kmp_cancel.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_cancel.cpp?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_cancel.cpp (original)
+++ openmp/trunk/runtime/src/kmp_cancel.cpp Tue Aug 11 16:36:41 2015
@@ -58,7 +58,7 @@ kmp_int32 __kmpc_cancel(ident_t* loc_ref
break;
}
case cancel_taskgroup:
- // cancellation requests for parallel and worksharing constructs
+ // cancellation requests for a task group
// are handled through the taskgroup structure
{
kmp_taskdata_t* task;
@@ -141,7 +141,7 @@ kmp_int32 __kmpc_cancellationpoint(ident
break;
}
case cancel_taskgroup:
- // cancellation requests for parallel and worksharing constructs
+ // cancellation requests for a task group
// are handled through the taskgroup structure
{
kmp_taskdata_t* task;
Modified: openmp/trunk/runtime/src/kmp_csupport.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_csupport.c?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_csupport.c (original)
+++ openmp/trunk/runtime/src/kmp_csupport.c Tue Aug 11 16:36:41 2015
@@ -280,9 +280,21 @@ Do the actual fork and call the microtas
void
__kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
{
- KMP_STOP_EXPLICIT_TIMER(OMP_serial);
- KMP_COUNT_BLOCK(OMP_PARALLEL);
int gtid = __kmp_entry_gtid();
+
+#if (KMP_STATS_ENABLED)
+ int inParallel = __kmpc_in_parallel(loc);
+ if (inParallel)
+ {
+ KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
+ }
+ else
+ {
+ KMP_STOP_EXPLICIT_TIMER(OMP_serial);
+ KMP_COUNT_BLOCK(OMP_PARALLEL);
+ }
+#endif
+
// maybe to save thr_state is enough here
{
va_list ap;
@@ -329,7 +341,10 @@ __kmpc_fork_call(ident_t *loc, kmp_int32
}
#endif
}
- KMP_START_EXPLICIT_TIMER(OMP_serial);
+#if (KMP_STATS_ENABLED)
+ if (!inParallel)
+ KMP_START_EXPLICIT_TIMER(OMP_serial);
+#endif
}
#if OMP_40_ENABLED
@@ -370,6 +385,8 @@ __kmpc_fork_teams(ident_t *loc, kmp_int3
va_list ap;
va_start( ap, microtask );
+ KMP_COUNT_BLOCK(OMP_TEAMS);
+
// remember teams entry point and nesting level
this_thr->th.th_teams_microtask = microtask;
this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host
@@ -715,8 +732,10 @@ __kmpc_master(ident_t *loc, kmp_int32 gl
if( ! TCR_4( __kmp_init_parallel ) )
__kmp_parallel_initialize();
- if( KMP_MASTER_GTID( global_tid ))
+ if( KMP_MASTER_GTID( global_tid )) {
+ KMP_START_EXPLICIT_TIMER(OMP_master);
status = 1;
+ }
#if OMPT_SUPPORT && OMPT_TRACE
if (status) {
@@ -764,6 +783,7 @@ __kmpc_end_master(ident_t *loc, kmp_int3
KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) );
KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
+ KMP_STOP_EXPLICIT_TIMER(OMP_master);
#if OMPT_SUPPORT && OMPT_TRACE
kmp_info_t *this_thr = __kmp_threads[ global_tid ];
@@ -1386,6 +1406,9 @@ __kmpc_single(ident_t *loc, kmp_int32 gl
{
KMP_COUNT_BLOCK(OMP_SINGLE);
kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
+ if(rc == TRUE) {
+ KMP_START_EXPLICIT_TIMER(OMP_single);
+ }
#if OMPT_SUPPORT && OMPT_TRACE
kmp_info_t *this_thr = __kmp_threads[ global_tid ];
@@ -1427,6 +1450,7 @@ void
__kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
{
__kmp_exit_single( global_tid );
+ KMP_STOP_EXPLICIT_TIMER(OMP_single);
#if OMPT_SUPPORT && OMPT_TRACE
kmp_info_t *this_thr = __kmp_threads[ global_tid ];
@@ -2191,7 +2215,6 @@ int
__kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
{
KMP_COUNT_BLOCK(OMP_test_lock);
- KMP_TIME_BLOCK(OMP_test_lock);
#if KMP_USE_DYNAMIC_LOCK
int rc;
Modified: openmp/trunk/runtime/src/kmp_dispatch.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_dispatch.cpp?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_dispatch.cpp (original)
+++ openmp/trunk/runtime/src/kmp_dispatch.cpp Tue Aug 11 16:36:41 2015
@@ -670,6 +670,7 @@ __kmp_dispatch_init(
} else {
pr->ordered = FALSE;
}
+
if ( schedule == kmp_sch_static ) {
schedule = __kmp_static;
} else {
@@ -761,6 +762,19 @@ __kmp_dispatch_init(
tc = 0; // zero-trip
}
+ // Any half-decent optimizer will remove this test when the blocks are empty since the macros expand to nothing
+ // when statistics are disabled.
+ if (schedule == __kmp_static)
+ {
+ KMP_COUNT_BLOCK(OMP_FOR_static);
+ KMP_COUNT_VALUE(FOR_static_iterations, tc);
+ }
+ else
+ {
+ KMP_COUNT_BLOCK(OMP_FOR_dynamic);
+ KMP_COUNT_VALUE(FOR_dynamic_iterations, tc);
+ }
+
pr->u.p.lb = lb;
pr->u.p.ub = ub;
pr->u.p.st = st;
@@ -1384,6 +1398,11 @@ __kmp_dispatch_next(
static const int ___kmp_size_type = sizeof( UT );
#endif
+ // This is potentially slightly misleading, schedule(runtime) will appear here even if the actual runtme schedule
+ // is static. (Which points out a disadavantage of schedule(runtime): even when static scheduling is used it costs
+ // more than a compile time choice to use static scheduling would.)
+ KMP_TIME_BLOCK(FOR_dynamic_scheduling);
+
int status;
dispatch_private_info_template< T > * pr;
kmp_info_t * th = __kmp_threads[ gtid ];
@@ -2164,7 +2183,6 @@ __kmp_dist_get_bounds(
T *pupper,
typename traits_t< T >::signed_t incr
) {
- KMP_COUNT_BLOCK(OMP_DISTR_FOR_dynamic);
typedef typename traits_t< T >::unsigned_t UT;
typedef typename traits_t< T >::signed_t ST;
register kmp_uint32 team_id;
@@ -2222,6 +2240,7 @@ __kmp_dist_get_bounds(
} else {
trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
}
+
if( trip_count <= nteams ) {
KMP_DEBUG_ASSERT(
__kmp_static == kmp_sch_static_greedy || \
@@ -2297,7 +2316,6 @@ void
__kmpc_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
{
- KMP_COUNT_BLOCK(OMP_FOR_dynamic);
KMP_DEBUG_ASSERT( __kmp_init_serial );
__kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
}
@@ -2308,7 +2326,6 @@ void
__kmpc_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
{
- KMP_COUNT_BLOCK(OMP_FOR_dynamic);
KMP_DEBUG_ASSERT( __kmp_init_serial );
__kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
}
@@ -2321,7 +2338,6 @@ __kmpc_dispatch_init_8( ident_t *loc, km
kmp_int64 lb, kmp_int64 ub,
kmp_int64 st, kmp_int64 chunk )
{
- KMP_COUNT_BLOCK(OMP_FOR_dynamic);
KMP_DEBUG_ASSERT( __kmp_init_serial );
__kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
}
@@ -2334,7 +2350,6 @@ __kmpc_dispatch_init_8u( ident_t *loc, k
kmp_uint64 lb, kmp_uint64 ub,
kmp_int64 st, kmp_int64 chunk )
{
- KMP_COUNT_BLOCK(OMP_FOR_dynamic);
KMP_DEBUG_ASSERT( __kmp_init_serial );
__kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
}
@@ -2352,7 +2367,6 @@ void
__kmpc_dist_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
{
- KMP_COUNT_BLOCK(OMP_FOR_dynamic);
KMP_DEBUG_ASSERT( __kmp_init_serial );
__kmp_dist_get_bounds< kmp_int32 >( loc, gtid, p_last, &lb, &ub, st );
__kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
@@ -2362,7 +2376,6 @@ void
__kmpc_dist_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
kmp_int32 *p_last, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
{
- KMP_COUNT_BLOCK(OMP_FOR_dynamic);
KMP_DEBUG_ASSERT( __kmp_init_serial );
__kmp_dist_get_bounds< kmp_uint32 >( loc, gtid, p_last, &lb, &ub, st );
__kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
@@ -2372,7 +2385,6 @@ void
__kmpc_dist_dispatch_init_8( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
kmp_int32 *p_last, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk )
{
- KMP_COUNT_BLOCK(OMP_FOR_dynamic);
KMP_DEBUG_ASSERT( __kmp_init_serial );
__kmp_dist_get_bounds< kmp_int64 >( loc, gtid, p_last, &lb, &ub, st );
__kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
@@ -2382,7 +2394,6 @@ void
__kmpc_dist_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
kmp_int32 *p_last, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk )
{
- KMP_COUNT_BLOCK(OMP_FOR_dynamic);
KMP_DEBUG_ASSERT( __kmp_init_serial );
__kmp_dist_get_bounds< kmp_uint64 >( loc, gtid, p_last, &lb, &ub, st );
__kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
Modified: openmp/trunk/runtime/src/kmp_runtime.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_runtime.c?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_runtime.c (original)
+++ openmp/trunk/runtime/src/kmp_runtime.c Tue Aug 11 16:36:41 2015
@@ -1495,7 +1495,8 @@ __kmp_fork_call(
kmp_hot_team_ptr_t **p_hot_teams;
#endif
{ // KMP_TIME_BLOCK
- KMP_TIME_BLOCK(KMP_fork_call);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_fork_call);
+ KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid ));
if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) {
@@ -1620,12 +1621,14 @@ __kmp_fork_call(
}
#endif
- KMP_TIME_BLOCK(OMP_work);
- __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
+ {
+ KMP_TIME_BLOCK(OMP_work);
+ __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
#if OMPT_SUPPORT
- , exit_runtime_p
+ , exit_runtime_p
#endif
- );
+ );
+ }
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
@@ -2224,8 +2227,8 @@ __kmp_fork_call(
} // END of timer KMP_fork_call block
{
- //KMP_TIME_BLOCK(OMP_work);
- KMP_TIME_BLOCK(USER_master_invoke);
+ KMP_TIME_BLOCK(OMP_work);
+ // KMP_TIME_DEVELOPER_BLOCK(USER_master_invoke);
if (! team->t.t_invoke( gtid )) {
KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
}
@@ -2280,7 +2283,7 @@ __kmp_join_call(ident_t *loc, int gtid,
#endif /* OMP_40_ENABLED */
)
{
- KMP_TIME_BLOCK(KMP_join_call);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_join_call);
kmp_team_t *team;
kmp_team_t *parent_team;
kmp_info_t *master_th;
@@ -2582,6 +2585,7 @@ __kmp_set_num_threads( int new_nth, int
else if (new_nth > __kmp_max_nth)
new_nth = __kmp_max_nth;
+ KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
thread = __kmp_threads[gtid];
__kmp_save_internal_controls( thread );
@@ -4790,7 +4794,7 @@ __kmp_allocate_team( kmp_root_t *root, i
kmp_internal_control_t *new_icvs,
int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
{
- KMP_TIME_BLOCK(KMP_allocate_team);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_allocate_team);
int f;
kmp_team_t *team;
int use_hot_team = ! root->r.r_active;
@@ -5577,12 +5581,12 @@ __kmp_launch_thread( kmp_info_t *this_th
}
#endif
- KMP_STOP_EXPLICIT_TIMER(USER_launch_thread_loop);
+ KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
{
- KMP_TIME_BLOCK(USER_worker_invoke);
+ KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke);
rc = (*pteam)->t.t_invoke( gtid );
}
- KMP_START_EXPLICIT_TIMER(USER_launch_thread_loop);
+ KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
KMP_ASSERT( rc );
#if OMPT_SUPPORT
@@ -6910,12 +6914,15 @@ __kmp_invoke_task_func( int gtid )
#endif
#endif
- rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
- gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv
+ {
+ KMP_TIME_BLOCK(OMP_work);
+ rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
+ gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv
#if OMPT_SUPPORT
- , exit_runtime_p
+ , exit_runtime_p
#endif
- );
+ );
+ }
#if OMPT_SUPPORT && OMPT_TRACE
if (ompt_status & ompt_status_track) {
Modified: openmp/trunk/runtime/src/kmp_sched.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_sched.cpp?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_sched.cpp (original)
+++ openmp/trunk/runtime/src/kmp_sched.cpp Tue Aug 11 16:36:41 2015
@@ -84,6 +84,8 @@ __kmp_for_static_init(
typename traits_t< T >::signed_t chunk
) {
KMP_COUNT_BLOCK(OMP_FOR_static);
+ KMP_TIME_BLOCK (FOR_static_scheduling);
+
typedef typename traits_t< T >::unsigned_t UT;
typedef typename traits_t< T >::signed_t ST;
/* this all has to be changed back to TID and such.. */
@@ -151,6 +153,7 @@ __kmp_for_static_init(
team_info->microtask);
}
#endif
+ KMP_COUNT_VALUE (FOR_static_iterations, 0);
return;
}
@@ -246,6 +249,7 @@ __kmp_for_static_init(
__kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc );
}
}
+ KMP_COUNT_VALUE (FOR_static_iterations, trip_count);
/* compute remaining parameters */
switch ( schedtype ) {
@@ -372,7 +376,7 @@ __kmp_dist_for_static_init(
typename traits_t< T >::signed_t incr,
typename traits_t< T >::signed_t chunk
) {
- KMP_COUNT_BLOCK(OMP_DISTR_FOR_static);
+ KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
typedef typename traits_t< T >::unsigned_t UT;
typedef typename traits_t< T >::signed_t ST;
register kmp_uint32 tid;
@@ -437,6 +441,7 @@ __kmp_dist_for_static_init(
} else {
trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
}
+
*pstride = *pupper - *plower; // just in case (can be unused)
if( trip_count <= nteams ) {
KMP_DEBUG_ASSERT(
Modified: openmp/trunk/runtime/src/kmp_stats.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_stats.cpp?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_stats.cpp (original)
+++ openmp/trunk/runtime/src/kmp_stats.cpp Tue Aug 11 16:36:41 2015
@@ -521,16 +521,14 @@ void kmp_stats_output_module::outputStat
// Special handling for synthesized statistics.
// These just have to be coded specially here for now.
- // At present we only have one: the total parallel work done in each thread.
+ // At present we only have a few:
+ // The total parallel work done in each thread.
// The variance here makes it easy to see load imbalance over the whole program (though, of course,
// it's possible to have a code with awful load balance in every parallel region but perfect load
// balance oever the whole program.)
+ // The time spent in barriers in each thread.
allStats[TIMER_Total_work].addSample ((*it)->getTimer(TIMER_OMP_work)->getTotal());
- // Time waiting for work (synthesized)
- if ((t != 0) || !timeStat::workerOnly(timer_e(TIMER_OMP_await_work)))
- allStats[TIMER_Total_await_work].addSample ((*it)->getTimer(TIMER_OMP_await_work)->getTotal());
-
// Time in explicit barriers.
allStats[TIMER_Total_barrier].addSample ((*it)->getTimer(TIMER_OMP_barrier)->getTotal());
Modified: openmp/trunk/runtime/src/kmp_stats.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_stats.h?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_stats.h (original)
+++ openmp/trunk/runtime/src/kmp_stats.h Tue Aug 11 16:36:41 2015
@@ -31,6 +31,11 @@
#include <new> // placement new
#include "kmp_stats_timing.h"
+/*
+ * Enable developer statistics here if you want them. They are more detailed than is useful for application characterisation and
+ * are intended for the runtime library developer.
+ */
+// #define KMP_DEVELOPER_STATS 1
/*!
* @ingroup STATS_GATHERING
@@ -56,7 +61,7 @@ class stats_flags_e {
* Each thread accumulates its own count, at the end of execution the counts are aggregated treating each thread
* as a separate measurement. (Unless onlyInMaster is set, in which case there's only a single measurement).
* The min,mean,max are therefore the values for the threads.
- * Adding the counter here and then putting in a KMP_BLOCK_COUNTER(name) is all you need to do.
+ * Adding the counter here and then putting a KMP_BLOCK_COUNTER(name) at the point you want to count is all you need to do.
* All of the tables and printing is generated from this macro.
* Format is "macro(name, flags, arg)"
*
@@ -64,21 +69,30 @@ class stats_flags_e {
*/
#define KMP_FOREACH_COUNTER(macro, arg) \
macro (OMP_PARALLEL, stats_flags_e::onlyInMaster, arg) \
+ macro (OMP_NESTED_PARALLEL, 0, arg) \
macro (OMP_FOR_static, 0, arg) \
macro (OMP_FOR_dynamic, 0, arg) \
- macro (OMP_DISTR_FOR_static, 0, arg) \
- macro (OMP_DISTR_FOR_dynamic, 0, arg) \
+ macro (OMP_DISTRIBUTE, 0, arg) \
macro (OMP_BARRIER, 0, arg) \
macro (OMP_CRITICAL,0, arg) \
macro (OMP_SINGLE, 0, arg) \
macro (OMP_MASTER, 0, arg) \
+ macro (OMP_TEAMS, 0, arg) \
macro (OMP_set_lock, 0, arg) \
macro (OMP_test_lock, 0, arg) \
- macro (OMP_test_lock_failure, 0, arg) \
macro (REDUCE_wait, 0, arg) \
macro (REDUCE_nowait, 0, arg) \
+ macro (OMP_TASKYIELD, 0, arg) \
+ macro (TASK_executed, 0, arg) \
+ macro (TASK_cancelled, 0, arg) \
+ macro (TASK_stolen, 0, arg) \
macro (LAST,0,arg)
+// OMP_PARALLEL_args -- the number of arguments passed to a fork
+// FOR_static_iterations -- Number of available parallel chunks of work in a static for
+// FOR_dynamic_iterations -- Number of available parallel chunks of work in a dynamic for
+// Both adjust for any chunking, so if there were an iteration count of 20 but a chunk size of 10, we'd record 2.
+
/*!
* \brief Add new timers under KMP_FOREACH_TIMER() macro in kmp_stats.h
*
@@ -87,72 +101,45 @@ class stats_flags_e {
*
* \details A timer collects multiple samples of some count in each thread and then finally aggregates over all the threads.
* The count is normally a time (in ticks), hence the name "timer". (But can be any value, so we use this for "number of arguments passed to fork"
- * as well, or we could collect "loop iteration count" if we wanted to).
+ * as well).
* For timers the threads are not significant, it's the individual observations that count, so the statistics are at that level.
* Format is "macro(name, flags, arg)"
*
- * @ingroup STATS_GATHERING
+ * @ingroup STATS_GATHERING2
*/
-#define KMP_FOREACH_TIMER(macro, arg) \
- macro (OMP_PARALLEL_args, stats_flags_e::onlyInMaster | stats_flags_e::noUnits, arg) \
- macro (FOR_static_iterations, stats_flags_e::onlyInMaster | stats_flags_e::noUnits, arg) \
- macro (FOR_dynamic_iterations, stats_flags_e::noUnits, arg) \
+#define KMP_FOREACH_TIMER(macro, arg) \
macro (OMP_start_end, stats_flags_e::onlyInMaster, arg) \
macro (OMP_serial, stats_flags_e::onlyInMaster, arg) \
macro (OMP_work, 0, arg) \
macro (Total_work, stats_flags_e::synthesized, arg) \
- macro (OMP_await_work, stats_flags_e::notInMaster, arg) \
- macro (Total_await_work, stats_flags_e::synthesized, arg) \
macro (OMP_barrier, 0, arg) \
macro (Total_barrier, stats_flags_e::synthesized, arg) \
- macro (OMP_test_lock, 0, arg) \
+ macro (FOR_static_iterations, stats_flags_e::noUnits, arg) \
macro (FOR_static_scheduling, 0, arg) \
+ macro (FOR_dynamic_iterations, stats_flags_e::noUnits, arg) \
macro (FOR_dynamic_scheduling, 0, arg) \
- macro (KMP_fork_call, 0, arg) \
- macro (KMP_join_call, 0, arg) \
- macro (KMP_fork_barrier, stats_flags_e::logEvent, arg) \
- macro (KMP_join_barrier, stats_flags_e::logEvent, arg) \
- macro (KMP_barrier, 0, arg) \
- macro (KMP_end_split_barrier, 0, arg) \
- macro (KMP_wait_sleep, 0, arg) \
- macro (KMP_release, 0, arg) \
- macro (KMP_hier_gather, 0, arg) \
- macro (KMP_hier_release, 0, arg) \
- macro (KMP_hyper_gather, stats_flags_e::logEvent, arg) \
- macro (KMP_hyper_release, stats_flags_e::logEvent, arg) \
- macro (KMP_linear_gather, 0, arg) \
- macro (KMP_linear_release, 0, arg) \
- macro (KMP_tree_gather, 0, arg) \
- macro (KMP_tree_release, 0, arg) \
- macro (USER_master_invoke, stats_flags_e::logEvent, arg) \
- macro (USER_worker_invoke, stats_flags_e::logEvent, arg) \
- macro (USER_resume, stats_flags_e::logEvent, arg) \
- macro (USER_suspend, stats_flags_e::logEvent, arg) \
- macro (USER_launch_thread_loop, stats_flags_e::logEvent, arg) \
- macro (KMP_allocate_team, 0, arg) \
- macro (KMP_setup_icv_copy, 0, arg) \
- macro (USER_icv_copy, 0, arg) \
+ macro (TASK_execution, 0, arg) \
+ macro (OMP_set_numthreads, stats_flags_e::noUnits, arg) \
+ macro (OMP_PARALLEL_args, stats_flags_e::noUnits, arg) \
+ macro (OMP_single, 0, arg) \
+ macro (OMP_master, 0, arg) \
+ KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
macro (LAST,0, arg)
-
-// OMP_PARALLEL_args -- the number of arguments passed to a fork
-// FOR_static_iterations -- Number of available parallel chunks of work in a static for
-// FOR_dynamic_iterations -- Number of available parallel chunks of work in a dynamic for
-// Both adjust for any chunking, so if there were an iteration count of 20 but a chunk size of 10, we'd record 2.
-// OMP_serial -- thread zero time executing serial code
// OMP_start_end -- time from when OpenMP is initialized until the stats are printed at exit
+// OMP_serial -- thread zero time executing serial code
// OMP_work -- elapsed time in code dispatched by a fork (measured in the thread)
// Total_work -- a synthesized statistic summarizing how much parallel work each thread executed.
// OMP_barrier -- time at "real" barriers
// Total_barrier -- a synthesized statistic summarizing how much time at real barriers in each thread
-// OMP_set_lock -- time in lock setting
-// OMP_test_lock -- time in testing a lock
-// LOCK_WAIT -- time waiting for a lock
// FOR_static_scheduling -- time spent doing scheduling for a static "for"
// FOR_dynamic_scheduling -- time spent doing scheduling for a dynamic "for"
-// KMP_wait_sleep -- time in __kmp_wait_sleep
-// KMP_release -- time in __kmp_release
+
+#if (KMP_DEVELOPER_STATS)
+// Timers which are of interest tio runtime library developers, not end users.
+// THese have to be explicitly enabled in addition to the other stats.
+
// KMP_fork_barrier -- time in __kmp_fork_barrier
// KMP_join_barrier -- time in __kmp_join_barrier
// KMP_barrier -- time in __kmp_barrier
@@ -165,6 +152,32 @@ class stats_flags_e {
// KMP_tree_release -- time in __kmp_tree_barrier_release
// KMP_hyper_gather -- time in __kmp_hyper_barrier_gather
// KMP_hyper_release -- time in __kmp_hyper_barrier_release
+# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
+ macro (KMP_fork_call, 0, arg) \
+ macro (KMP_join_call, 0, arg) \
+ macro (KMP_fork_barrier, stats_flags_e::logEvent, arg) \
+ macro (KMP_join_barrier, stats_flags_e::logEvent, arg) \
+ macro (KMP_barrier, 0, arg) \
+ macro (KMP_end_split_barrier, 0, arg) \
+ macro (KMP_hier_gather, 0, arg) \
+ macro (KMP_hier_release, 0, arg) \
+ macro (KMP_hyper_gather, stats_flags_e::logEvent, arg) \
+ macro (KMP_hyper_release, stats_flags_e::logEvent, arg) \
+ macro (KMP_linear_gather, 0, arg) \
+ macro (KMP_linear_release, 0, arg) \
+ macro (KMP_tree_gather, 0, arg) \
+ macro (KMP_tree_release, 0, arg) \
+ macro (USER_master_invoke, stats_flags_e::logEvent, arg) \
+ macro (USER_worker_invoke, stats_flags_e::logEvent, arg) \
+ macro (USER_resume, stats_flags_e::logEvent, arg) \
+ macro (USER_suspend, stats_flags_e::logEvent, arg) \
+ macro (USER_launch_thread_loop, stats_flags_e::logEvent, arg) \
+ macro (KMP_allocate_team, 0, arg) \
+ macro (KMP_setup_icv_copy, 0, arg) \
+ macro (USER_icv_copy, 0, arg)
+#else
+# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
+#endif
/*!
* \brief Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro.
@@ -182,13 +195,21 @@ class stats_flags_e {
*
* @ingroup STATS_GATHERING
*/
-#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \
- macro(OMP_serial, 0, arg) \
- macro(OMP_start_end, 0, arg) \
- macro(USER_icv_copy, 0, arg) \
- macro(USER_launch_thread_loop, stats_flags_e::logEvent, arg) \
+#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \
+ macro(OMP_serial, 0, arg) \
+ macro(OMP_start_end, 0, arg) \
+ macro(OMP_single, 0, arg) \
+ macro(OMP_master, 0, arg) \
+ KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro,arg) \
macro(LAST, 0, arg)
+#if (KMP_DEVELOPER_STATS)
+# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg) \
+ macro(USER_launch_thread_loop, stats_flags_e::logEvent, arg)
+#else
+# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg)
+#endif
+
#define ENUMERATE(name,ignore,prefix) prefix##name,
enum timer_e {
KMP_FOREACH_TIMER(ENUMERATE, TIMER_)
@@ -689,6 +710,21 @@ extern kmp_stats_output_module __kmp_sta
*/
#define KMP_RESET_STATS() __kmp_reset_stats()
+#if (KMP_DEVELOPER_STATS)
+# define KMP_TIME_DEVELOPER_BLOCK(n) KMP_TIME_BLOCK(n)
+# define KMP_COUNT_DEVELOPER_VALUE(n,v) KMP_COUNT_VALUE(n,v)
+# define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n)
+# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n)
+# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n)
+#else
+// Null definitions
+# define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0)
+# define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0)
+# define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
+# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
+# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
+#endif
+
#else // KMP_STATS_ENABLED
// Null definitions
@@ -701,6 +737,11 @@ extern kmp_stats_output_module __kmp_sta
#define KMP_OUTPUT_STATS(heading_string) ((void)0)
#define KMP_RESET_STATS() ((void)0)
+#define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0)
+#define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0)
+#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
+#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
+#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
#endif // KMP_STATS_ENABLED
#endif // KMP_STATS_H
Modified: openmp/trunk/runtime/src/kmp_tasking.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_tasking.c?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_tasking.c (original)
+++ openmp/trunk/runtime/src/kmp_tasking.c Tue Aug 11 16:36:41 2015
@@ -17,6 +17,7 @@
#include "kmp_i18n.h"
#include "kmp_itt.h"
#include "kmp_wait_release.h"
+#include "kmp_stats.h"
#if OMPT_SUPPORT
#include "ompt-specific.h"
@@ -1136,6 +1137,7 @@ __kmp_invoke_task( kmp_int32 gtid, kmp_t
kmp_team_t * this_team = this_thr->th.th_team;
kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
+ KMP_COUNT_BLOCK(TASK_cancelled);
// this task belongs to a task group and we need to cancel it
discard = 1 /* true */;
}
@@ -1146,6 +1148,8 @@ __kmp_invoke_task( kmp_int32 gtid, kmp_t
// Thunks generated by gcc take a different argument list.
//
if (!discard) {
+ KMP_COUNT_BLOCK(TASK_executed);
+ KMP_TIME_BLOCK (TASK_execution);
#endif // OMP_40_ENABLED
#ifdef KMP_GOMP_COMPAT
if (taskdata->td_flags.native) {
@@ -1356,6 +1360,8 @@ __kmpc_omp_taskyield( ident_t *loc_ref,
kmp_info_t * thread;
int thread_finished = FALSE;
+ KMP_COUNT_BLOCK(OMP_TASKYIELD);
+
KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
gtid, loc_ref, end_part) );
@@ -1648,6 +1654,7 @@ __kmp_steal_task( kmp_info_t *victim, km
__kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
+ KMP_COUNT_BLOCK(TASK_stolen);
KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
"ntasks=%d head=%u tail=%u\n",
gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
Modified: openmp/trunk/runtime/src/z_Linux_util.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/z_Linux_util.c?rev=244677&r1=244676&r2=244677&view=diff
==============================================================================
--- openmp/trunk/runtime/src/z_Linux_util.c (original)
+++ openmp/trunk/runtime/src/z_Linux_util.c Tue Aug 11 16:36:41 2015
@@ -1688,7 +1688,7 @@ __kmp_suspend_uninitialize_thread( kmp_i
template <class C>
static inline void __kmp_suspend_template( int th_gtid, C *flag )
{
- KMP_TIME_BLOCK(USER_suspend);
+ KMP_TIME_DEVELOPER_BLOCK(USER_suspend);
kmp_info_t *th = __kmp_threads[th_gtid];
int status;
typename C::flag_t old_spin;
@@ -1826,6 +1826,7 @@ void __kmp_suspend_oncore(int th_gtid, k
template <class C>
static inline void __kmp_resume_template( int target_gtid, C *flag )
{
+ KMP_TIME_DEVELOPER_BLOCK(USER_resume);
kmp_info_t *th = __kmp_threads[target_gtid];
int status;
@@ -1900,7 +1901,6 @@ void __kmp_resume_oncore(int target_gtid
void
__kmp_resume_monitor()
{
- KMP_TIME_BLOCK(USER_resume);
int status;
#ifdef KMP_DEBUG
int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
More information about the Openmp-commits
mailing list