[Openmp-commits] [openmp] r268640 - [STATS] Use partitioned timer scheme
Jonathan Peyton via Openmp-commits
openmp-commits at lists.llvm.org
Thu May 5 09:15:58 PDT 2016
Author: jlpeyton
Date: Thu May 5 11:15:57 2016
New Revision: 268640
URL: http://llvm.org/viewvc/llvm-project?rev=268640&view=rev
Log:
[STATS] Use partitioned timer scheme
This change removes the current timers with ones that partition time properly.
The current timers are nested, so that if a new timer, B, starts when the
current timer, A, is already timing, A's time will include B's. To eliminate
this problem, the partitioned timers are designed to stop the current timer (A),
let the new timer run (B), and when the new timer is finished, restart the
previously running timer (A). With this partitioning of time, a threads' timers
all sum up to the OMP_worker_thread_life time and can now easily show the
percentage of time a thread is spending in different parts of the runtime or
user code.
There is also a new state variable associated with each thread which tells where
it is executing a task. This corresponds with the timers: OMP_task_*, e.g., if
time is spent in OMP_task_taskwait, then that thread executed tasks inside a
#pragma omp taskwait construct.
The changes are mostly changing the MACROs to use the new PARITIONED_* macros,
the new partitionedTimers class and its methods, and new state logic.
Differential Revision: http://reviews.llvm.org/D19229
Modified:
openmp/trunk/runtime/src/kmp_barrier.cpp
openmp/trunk/runtime/src/kmp_csupport.c
openmp/trunk/runtime/src/kmp_dispatch.cpp
openmp/trunk/runtime/src/kmp_runtime.c
openmp/trunk/runtime/src/kmp_sched.cpp
openmp/trunk/runtime/src/kmp_stats.cpp
openmp/trunk/runtime/src/kmp_stats.h
openmp/trunk/runtime/src/kmp_stats_timing.h
openmp/trunk/runtime/src/kmp_tasking.c
openmp/trunk/runtime/src/kmp_wait_release.h
openmp/trunk/runtime/src/z_Linux_util.c
Modified: openmp/trunk/runtime/src/kmp_barrier.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_barrier.cpp?rev=268640&r1=268639&r2=268640&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_barrier.cpp (original)
+++ openmp/trunk/runtime/src/kmp_barrier.cpp Thu May 5 11:15:57 2016
@@ -1048,6 +1048,8 @@ __kmp_barrier(enum barrier_type bt, int
void *reduce_data, void (*reduce)(void *, void *))
{
KMP_TIME_DEVELOPER_BLOCK(KMP_barrier);
+ KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
+ KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier);
register int tid = __kmp_tid_from_gtid(gtid);
register kmp_info_t *this_thr = __kmp_threads[gtid];
register kmp_team_t *team = this_thr->th.th_team;
@@ -1348,6 +1350,8 @@ __kmp_end_split_barrier(enum barrier_typ
void
__kmp_join_barrier(int gtid)
{
+ KMP_TIME_PARTITIONED_BLOCK(OMP_fork_join_barrier);
+ KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
KMP_TIME_DEVELOPER_BLOCK(KMP_join_barrier);
register kmp_info_t *this_thr = __kmp_threads[gtid];
register kmp_team_t *team;
@@ -1463,6 +1467,18 @@ __kmp_join_barrier(int gtid)
__kmp_task_team_wait(this_thr, team
USE_ITT_BUILD_ARG(itt_sync_obj) );
}
+#if KMP_STATS_ENABLED
+ // Have master thread flag the workers to indicate they are now waiting for
+ // next parallel region, Also wake them up so they switch their timers to idle.
+ for (int i=0; i<team->t.t_nproc; ++i) {
+ kmp_info_t* team_thread = team->t.t_threads[i];
+ if (team_thread == this_thr)
+ continue;
+ team_thread->th.th_stats->setIdleFlag();
+ if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && team_thread->th.th_sleep_loc != NULL)
+ __kmp_null_resume_wrapper(__kmp_gtid_from_thread(team_thread), team_thread->th.th_sleep_loc);
+ }
+#endif
#if USE_ITT_BUILD
if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
__kmp_itt_barrier_middle(gtid, itt_sync_obj);
@@ -1546,6 +1562,8 @@ __kmp_join_barrier(int gtid)
void
__kmp_fork_barrier(int gtid, int tid)
{
+ KMP_TIME_PARTITIONED_BLOCK(OMP_fork_join_barrier);
+ KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
KMP_TIME_DEVELOPER_BLOCK(KMP_fork_barrier);
kmp_info_t *this_thr = __kmp_threads[gtid];
kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
Modified: openmp/trunk/runtime/src/kmp_csupport.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_csupport.c?rev=268640&r1=268639&r2=268640&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_csupport.c (original)
+++ openmp/trunk/runtime/src/kmp_csupport.c Thu May 5 11:15:57 2016
@@ -290,7 +290,6 @@ __kmpc_fork_call(ident_t *loc, kmp_int32
}
else
{
- KMP_STOP_EXPLICIT_TIMER(OMP_serial);
KMP_COUNT_BLOCK(OMP_PARALLEL);
}
#endif
@@ -345,10 +344,6 @@ __kmpc_fork_call(ident_t *loc, kmp_int32
}
#endif
}
-#if (KMP_STATS_ENABLED)
- if (!inParallel)
- KMP_START_EXPLICIT_TIMER(OMP_serial);
-#endif
}
#if OMP_40_ENABLED
@@ -669,7 +664,6 @@ void
__kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
{
KMP_COUNT_BLOCK(OMP_BARRIER);
- KMP_TIME_BLOCK(OMP_barrier);
KC_TRACE( 10, ("__kmpc_barrier: called T#%d\n", global_tid ) );
if (! TCR_4(__kmp_init_parallel))
@@ -713,7 +707,7 @@ __kmpc_master(ident_t *loc, kmp_int32 gl
if( KMP_MASTER_GTID( global_tid )) {
KMP_COUNT_BLOCK(OMP_MASTER);
- KMP_START_EXPLICIT_TIMER(OMP_master);
+ KMP_PUSH_PARTITIONED_TIMER(OMP_master);
status = 1;
}
@@ -763,7 +757,7 @@ __kmpc_end_master(ident_t *loc, kmp_int3
KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) );
KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
- KMP_STOP_EXPLICIT_TIMER(OMP_master);
+ KMP_POP_PARTITIONED_TIMER();
#if OMPT_SUPPORT && OMPT_TRACE
kmp_info_t *this_thr = __kmp_threads[ global_tid ];
@@ -1088,7 +1082,7 @@ __kmpc_critical( ident_t * loc, kmp_int3
__kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
#else
KMP_COUNT_BLOCK(OMP_CRITICAL);
- KMP_TIME_BLOCK(OMP_critical_wait); /* Time spent waiting to enter the critical section */
+ KMP_TIME_PARTITIONED_BLOCK(OMP_critical_wait); /* Time spent waiting to enter the critical section */
kmp_user_lock_p lck;
KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
@@ -1250,6 +1244,7 @@ __kmpc_critical_with_hint( ident_t * loc
__kmp_itt_critical_acquired( lck );
#endif /* USE_ITT_BUILD */
+ KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
} // __kmpc_critical_with_hint
@@ -1342,7 +1337,7 @@ __kmpc_end_critical(ident_t *loc, kmp_in
#endif
#endif // KMP_USE_DYNAMIC_LOCK
- KMP_STOP_EXPLICIT_TIMER(OMP_critical);
+ KMP_POP_PARTITIONED_TIMER();
KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid ));
}
@@ -1464,7 +1459,7 @@ __kmpc_single(ident_t *loc, kmp_int32 gl
if (rc) {
// We are going to execute the single statement, so we should count it.
KMP_COUNT_BLOCK(OMP_SINGLE);
- KMP_START_EXPLICIT_TIMER(OMP_single);
+ KMP_PUSH_PARTITIONED_TIMER(OMP_single);
}
#if OMPT_SUPPORT && OMPT_TRACE
@@ -1507,7 +1502,7 @@ void
__kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
{
__kmp_exit_single( global_tid );
- KMP_STOP_EXPLICIT_TIMER(OMP_single);
+ KMP_POP_PARTITIONED_TIMER();
#if OMPT_SUPPORT && OMPT_TRACE
kmp_info_t *this_thr = __kmp_threads[ global_tid ];
Modified: openmp/trunk/runtime/src/kmp_dispatch.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_dispatch.cpp?rev=268640&r1=268639&r2=268640&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_dispatch.cpp (original)
+++ openmp/trunk/runtime/src/kmp_dispatch.cpp Thu May 5 11:15:57 2016
@@ -1424,7 +1424,7 @@ __kmp_dispatch_next(
// This is potentially slightly misleading, schedule(runtime) will appear here even if the actual runtme schedule
// is static. (Which points out a disadavantage of schedule(runtime): even when static scheduling is used it costs
// more than a compile time choice to use static scheduling would.)
- KMP_TIME_BLOCK(FOR_dynamic_scheduling);
+ KMP_TIME_PARTITIONED_BLOCK(FOR_dynamic_scheduling);
int status;
dispatch_private_info_template< T > * pr;
Modified: openmp/trunk/runtime/src/kmp_runtime.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_runtime.c?rev=268640&r1=268639&r2=268640&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_runtime.c (original)
+++ openmp/trunk/runtime/src/kmp_runtime.c Thu May 5 11:15:57 2016
@@ -1543,7 +1543,8 @@ __kmp_fork_call(
#endif
{
- KMP_TIME_BLOCK(OMP_work);
+ KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
+ KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
__kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
#if OMPT_SUPPORT
, exit_runtime_p
@@ -1618,7 +1619,8 @@ __kmp_fork_call(
gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
{
- KMP_TIME_BLOCK(OMP_work);
+ KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
+ KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
if (! parent_team->t.t_invoke( gtid )) {
KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
}
@@ -1738,7 +1740,8 @@ __kmp_fork_call(
#endif
{
- KMP_TIME_BLOCK(OMP_work);
+ KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
+ KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
__kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
#if OMPT_SUPPORT
, exit_runtime_p
@@ -1795,7 +1798,8 @@ __kmp_fork_call(
team->t.t_level--;
// AC: call special invoker for outer "parallel" of the teams construct
{
- KMP_TIME_BLOCK(OMP_work);
+ KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
+ KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
invoker(gtid);
}
} else {
@@ -1842,7 +1846,8 @@ __kmp_fork_call(
#endif
{
- KMP_TIME_BLOCK(OMP_work);
+ KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
+ KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
__kmp_invoke_microtask( microtask, gtid, 0, argc, args
#if OMPT_SUPPORT
, exit_runtime_p
@@ -2178,7 +2183,8 @@ __kmp_fork_call(
} // END of timer KMP_fork_call block
{
- KMP_TIME_BLOCK(OMP_work);
+ KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
+ KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
// KMP_TIME_DEVELOPER_BLOCK(USER_master_invoke);
if (! team->t.t_invoke( gtid )) {
KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
@@ -5448,6 +5454,8 @@ __kmp_launch_thread( kmp_info_t *this_th
KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
{
KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke);
+ KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
+ KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
rc = (*pteam)->t.t_invoke( gtid );
}
KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
@@ -6783,7 +6791,8 @@ __kmp_invoke_task_func( int gtid )
#endif
{
- KMP_TIME_BLOCK(OMP_work);
+ KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
+ KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv
#if OMPT_SUPPORT
Modified: openmp/trunk/runtime/src/kmp_sched.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_sched.cpp?rev=268640&r1=268639&r2=268640&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_sched.cpp (original)
+++ openmp/trunk/runtime/src/kmp_sched.cpp Thu May 5 11:15:57 2016
@@ -84,7 +84,7 @@ __kmp_for_static_init(
typename traits_t< T >::signed_t chunk
) {
KMP_COUNT_BLOCK(OMP_FOR_static);
- KMP_TIME_BLOCK (FOR_static_scheduling);
+ KMP_TIME_PARTITIONED_BLOCK(FOR_static_scheduling);
typedef typename traits_t< T >::unsigned_t UT;
typedef typename traits_t< T >::signed_t ST;
Modified: openmp/trunk/runtime/src/kmp_stats.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_stats.cpp?rev=268640&r1=268639&r2=268640&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_stats.cpp (original)
+++ openmp/trunk/runtime/src/kmp_stats.cpp Thu May 5 11:15:57 2016
@@ -157,6 +157,7 @@ std::string statistic::format(char unit,
void explicitTimer::start(timer_e timerEnumValue) {
startTime = tsc_tick_count::now();
+ totalPauseTime = 0;
if(timeStat::logEvent(timerEnumValue)) {
__kmp_stats_thread_ptr->incrementNestValue();
}
@@ -170,7 +171,7 @@ void explicitTimer::stop(timer_e timerEn
tsc_tick_count finishTime = tsc_tick_count::now();
//stat->addSample ((tsc_tick_count::now() - startTime).ticks());
- stat->addSample ((finishTime - startTime).ticks());
+ stat->addSample(((finishTime - startTime) - totalPauseTime).ticks());
if(timeStat::logEvent(timerEnumValue)) {
__kmp_stats_thread_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue);
@@ -182,6 +183,74 @@ void explicitTimer::stop(timer_e timerEn
return;
}
+/* ************************************************************** */
+/* ************* partitionedTimers member functions ************* */
+partitionedTimers::partitionedTimers() {
+ timer_stack.reserve(8);
+}
+
+// add a timer to this collection of partitioned timers.
+void partitionedTimers::add_timer(explicit_timer_e timer_index, explicitTimer* timer_pointer) {
+ KMP_DEBUG_ASSERT((int)timer_index < (int)EXPLICIT_TIMER_LAST+1);
+ timers[timer_index] = timer_pointer;
+}
+
+// initialize the paritioned timers to an initial timer
+void partitionedTimers::init(timerPair init_timer_pair) {
+ KMP_DEBUG_ASSERT(this->timer_stack.size() == 0);
+ timer_stack.push_back(init_timer_pair);
+ timers[init_timer_pair.get_index()]->start(init_timer_pair.get_timer());
+}
+
+// stop/save the current timer, and start the new timer (timer_pair)
+// There is a special condition where if the current timer is equal to
+// the one you are trying to push, then it only manipulates the stack,
+// and it won't stop/start the currently running timer.
+void partitionedTimers::push(timerPair timer_pair) {
+ // get the current timer
+ // stop current timer
+ // push new timer
+ // start the new timer
+ KMP_DEBUG_ASSERT(this->timer_stack.size() > 0);
+ timerPair current_timer = timer_stack.back();
+ timer_stack.push_back(timer_pair);
+ if(current_timer != timer_pair) {
+ timers[current_timer.get_index()]->pause();
+ timers[timer_pair.get_index()]->start(timer_pair.get_timer());
+ }
+}
+
+// stop/discard the current timer, and start the previously saved timer
+void partitionedTimers::pop() {
+ // get the current timer
+ // stop current timer
+ // pop current timer
+ // get the new current timer and start it back up
+ KMP_DEBUG_ASSERT(this->timer_stack.size() > 1);
+ timerPair current_timer = timer_stack.back();
+ timer_stack.pop_back();
+ timerPair new_timer = timer_stack.back();
+ if(current_timer != new_timer) {
+ timers[current_timer.get_index()]->stop(current_timer.get_timer());
+ timers[new_timer.get_index()]->resume();
+ }
+}
+
+// Wind up all the currently running timers.
+// This pops off all the timers from the stack and clears the stack
+// After this is called, init() must be run again to initialize the
+// stack of timers
+void partitionedTimers::windup() {
+ while(timer_stack.size() > 1) {
+ this->pop();
+ }
+ if(timer_stack.size() > 0) {
+ timerPair last_timer = timer_stack.back();
+ timer_stack.pop_back();
+ timers[last_timer.get_index()]->stop(last_timer.get_timer());
+ }
+}
+
/* ******************************************************************* */
/* ************* kmp_stats_event_vector member functions ************* */
@@ -397,8 +466,10 @@ void kmp_stats_output_module::windupExpl
// If the timer wasn't running, this won't record anything anyway.
kmp_stats_list::iterator it;
for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
+ kmp_stats_list* ptr = *it;
+ ptr->getPartitionedTimers()->windup();
for (int timer=0; timer<EXPLICIT_TIMER_LAST; timer++) {
- (*it)->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer);
+ ptr->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer);
}
}
}
@@ -595,11 +666,7 @@ void __kmp_reset_stats()
// reset the event vector so all previous events are "erased"
(*it)->resetEventVector();
-
- // May need to restart the explicit timers in thread zero?
}
- KMP_START_EXPLICIT_TIMER(OMP_serial);
- KMP_START_EXPLICIT_TIMER(OMP_start_end);
}
// This function will reset all stats and stop all threads' explicit timers if they haven't been stopped already.
Modified: openmp/trunk/runtime/src/kmp_stats.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_stats.h?rev=268640&r1=268639&r2=268640&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_stats.h (original)
+++ openmp/trunk/runtime/src/kmp_stats.h Thu May 5 11:15:57 2016
@@ -27,6 +27,7 @@
#include <limits>
#include <math.h>
+#include <vector>
#include <string>
#include <stdint.h>
#include <new> // placement new
@@ -52,6 +53,23 @@ enum stats_flags_e {
};
/*!
+ * @ingroup STATS_GATHERING
+ * \brief the states which a thread can be in
+ *
+ */
+enum stats_state_e {
+ IDLE,
+ SERIAL_REGION,
+ FORK_JOIN_BARRIER,
+ PLAIN_BARRIER,
+ TASKWAIT,
+ TASKYIELD,
+ TASKGROUP,
+ IMPLICIT_TASK,
+ EXPLICIT_TASK
+};
+
+/*!
* \brief Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h
*
* @param macro a user defined macro that takes three arguments - macro(COUNTER_NAME, flags, arg)
@@ -103,18 +121,25 @@ enum stats_flags_e {
*
* @ingroup STATS_GATHERING2
*/
-#define KMP_FOREACH_TIMER(macro, arg) \
- macro (OMP_start_end, stats_flags_e::onlyInMaster | stats_flags_e::noTotal, arg) \
- macro (OMP_serial, stats_flags_e::onlyInMaster | stats_flags_e::noTotal, arg) \
- macro (OMP_work, 0, arg) \
- macro (OMP_barrier, 0, arg) \
- macro (FOR_static_scheduling, 0, arg) \
- macro (FOR_dynamic_scheduling, 0, arg) \
- macro (OMP_task, 0, arg) \
- macro (OMP_critical, 0, arg) \
- macro (OMP_critical_wait, 0, arg) \
- macro (OMP_single, 0, arg) \
- macro (OMP_master, 0, arg) \
+#define KMP_FOREACH_TIMER(macro, arg) \
+ macro (OMP_worker_thread_life, 0, arg) \
+ macro (FOR_static_scheduling, 0, arg) \
+ macro (FOR_dynamic_scheduling, 0, arg) \
+ macro (OMP_critical, 0, arg) \
+ macro (OMP_critical_wait, 0, arg) \
+ macro (OMP_single, 0, arg) \
+ macro (OMP_master, 0, arg) \
+ macro (OMP_idle, 0, arg) \
+ macro (OMP_plain_barrier, 0, arg) \
+ macro (OMP_fork_join_barrier, 0, arg) \
+ macro (OMP_parallel, 0, arg) \
+ macro (OMP_task_immediate, 0, arg) \
+ macro (OMP_task_taskwait, 0, arg) \
+ macro (OMP_task_taskyield, 0, arg) \
+ macro (OMP_task_taskgroup, 0, arg) \
+ macro (OMP_task_join_bar, 0, arg) \
+ macro (OMP_task_plain_bar, 0, arg) \
+ macro (OMP_serial, 0, arg) \
macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
macro (FOR_static_iterations, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
@@ -129,7 +154,16 @@ enum stats_flags_e {
// OMP_barrier -- Time at "real" barriers (includes task time)
// FOR_static_scheduling -- Time spent doing scheduling for a static "for"
// FOR_dynamic_scheduling -- Time spent doing scheduling for a dynamic "for"
-// OMP_task -- Time spent executing tasks
+// OMP_idle -- Worker threads time spent waiting for inclusion in a parallel region
+// OMP_plain_barrier -- Time spent in a barrier construct
+// OMP_fork_join_barrier -- Time spent in a the fork-join barrier surrounding a parallel region
+// OMP_parallel -- Time spent inside a parallel construct
+// OMP_task_immediate -- Time spent executing non-deferred tasks
+// OMP_task_taskwait -- Time spent executing tasks inside a taskwait construct
+// OMP_task_taskyield -- Time spent executing tasks inside a taskyield construct
+// OMP_task_taskgroup -- Time spent executing tasks inside a taskygroup construct
+// OMP_task_join_bar -- Time spent executing tasks inside a join barrier
+// OMP_task_plain_bar -- Time spent executing tasks inside a barrier construct
// OMP_single -- Time spent executing a "single" region
// OMP_master -- Time spent executing a "master" region
// OMP_set_numthreads -- Values passed to omp_set_num_threads
@@ -197,12 +231,25 @@ enum stats_flags_e {
*
* @ingroup STATS_GATHERING
*/
-#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \
- macro(OMP_serial, 0, arg) \
- macro(OMP_start_end, 0, arg) \
- macro(OMP_critical, 0, arg) \
- macro(OMP_single, 0, arg) \
- macro(OMP_master, 0, arg) \
+#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \
+ macro(OMP_worker_thread_life, 0, arg) \
+ macro(FOR_static_scheduling, 0, arg) \
+ macro(FOR_dynamic_scheduling, 0, arg) \
+ macro(OMP_critical, 0, arg) \
+ macro(OMP_critical_wait, 0, arg) \
+ macro(OMP_single, 0, arg) \
+ macro(OMP_master, 0, arg) \
+ macro(OMP_idle, 0, arg) \
+ macro(OMP_plain_barrier, 0, arg) \
+ macro(OMP_fork_join_barrier, 0, arg) \
+ macro(OMP_parallel, 0, arg) \
+ macro(OMP_task_immediate, 0, arg) \
+ macro(OMP_task_taskwait, 0, arg) \
+ macro(OMP_task_taskyield, 0, arg) \
+ macro(OMP_task_taskgroup, 0, arg) \
+ macro(OMP_task_join_bar, 0, arg) \
+ macro(OMP_task_plain_bar, 0, arg) \
+ macro(OMP_serial, 0, arg) \
KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro,arg) \
macro(LAST, 0, arg)
@@ -227,6 +274,21 @@ enum counter_e {
};
#undef ENUMERATE
+class timerPair {
+ explicit_timer_e timer_index;
+ timer_e timer;
+ public:
+ timerPair(explicit_timer_e ti, timer_e t) : timer_index(ti), timer(t) {}
+ inline explicit_timer_e get_index() const { return timer_index; }
+ inline timer_e get_timer() const { return timer; }
+ bool operator==(const timerPair & rhs) {
+ return this->get_index() == rhs.get_index();
+ }
+ bool operator!=(const timerPair & rhs) {
+ return !(*this == rhs);
+ }
+};
+
class statistic
{
double minVal;
@@ -294,15 +356,19 @@ class explicitTimer
{
timeStat * stat;
tsc_tick_count startTime;
+ tsc_tick_count pauseStartTime;
+ tsc_tick_count::tsc_interval_t totalPauseTime;
public:
- explicitTimer () : stat(0), startTime(0) { }
- explicitTimer (timeStat * s) : stat(s), startTime() { }
+ explicitTimer () : stat(0), startTime(0), pauseStartTime(0), totalPauseTime() { }
+ explicitTimer (timeStat * s) : stat(s), startTime(), pauseStartTime(0), totalPauseTime() { }
void setStat (timeStat *s) { stat = s; }
void start(timer_e timerEnumValue);
+ void pause() { pauseStartTime = tsc_tick_count::now(); }
+ void resume() { totalPauseTime += (tsc_tick_count::now() - pauseStartTime); }
void stop(timer_e timerEnumValue);
- void reset() { startTime = 0; }
+ void reset() { startTime = 0; pauseStartTime = 0; totalPauseTime = 0; }
};
// Where all you need is to time a block, this is enough.
@@ -315,6 +381,49 @@ class blockTimer : public explicitTimer
~blockTimer() { stop(timerEnumValue); }
};
+// Where you need to partition a threads clock ticks into separate states
+// e.g., a partitionedTimers class with two timers of EXECUTING_TASK, and
+// DOING_NOTHING would render these conditions:
+// time(EXECUTING_TASK) + time(DOING_NOTHING) = total time thread is alive
+// No clock tick in the EXECUTING_TASK is a member of DOING_NOTHING and vice versa
+class partitionedTimers
+{
+ private:
+ explicitTimer* timers[EXPLICIT_TIMER_LAST+1];
+ std::vector<timerPair> timer_stack;
+ public:
+ partitionedTimers();
+ void add_timer(explicit_timer_e timer_index, explicitTimer* timer_pointer);
+ void init(timerPair timer_index);
+ void push(timerPair timer_index);
+ void pop();
+ void windup();
+};
+
+// Special wrapper around the partioned timers to aid timing code blocks
+// It avoids the need to have an explicit end, leaving the scope suffices.
+class blockPartitionedTimer
+{
+ partitionedTimers* part_timers;
+ timerPair timer_pair;
+ public:
+ blockPartitionedTimer(partitionedTimers* pt, timerPair tp) : part_timers(pt), timer_pair(tp) { part_timers->push(timer_pair); }
+ ~blockPartitionedTimer() { part_timers->pop(); }
+};
+
+// Special wrapper around the thread state to aid in keeping state in code blocks
+// It avoids the need to have an explicit end, leaving the scope suffices.
+class blockThreadState
+{
+ stats_state_e* state_pointer;
+ stats_state_e old_state;
+ public:
+ blockThreadState(stats_state_e* thread_state_pointer, stats_state_e new_state) : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) {
+ *state_pointer = new_state;
+ }
+ ~blockThreadState() { *state_pointer = old_state; }
+};
+
// If all you want is a count, then you can use this...
// The individual per-thread counts will be aggregated into a statistic at program exit.
class counter
@@ -473,14 +582,19 @@ class kmp_stats_list {
timeStat _timers[TIMER_LAST+1];
counter _counters[COUNTER_LAST+1];
explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST+1];
+ partitionedTimers _partitionedTimers;
int _nestLevel; // one per thread
kmp_stats_event_vector _event_vector;
kmp_stats_list* next;
kmp_stats_list* prev;
+ stats_state_e state;
+ int thread_is_idle_flag;
public:
- kmp_stats_list() : next(this) , prev(this) , _event_vector(), _nestLevel(0) {
+ kmp_stats_list() : _nestLevel(0), _event_vector(), next(this), prev(this),
+ state(IDLE), thread_is_idle_flag(0) {
#define doInit(name,ignore1,ignore2) \
- getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name));
+ getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); \
+ _partitionedTimers.add_timer(EXPLICIT_TIMER_##name, getExplicitTimer(EXPLICIT_TIMER_##name));
KMP_FOREACH_EXPLICIT_TIMER(doInit,0);
#undef doInit
}
@@ -488,6 +602,7 @@ class kmp_stats_list {
inline timeStat * getTimer(timer_e idx) { return &_timers[idx]; }
inline counter * getCounter(counter_e idx) { return &_counters[idx]; }
inline explicitTimer * getExplicitTimer(explicit_timer_e idx) { return &_explicitTimers[idx]; }
+ inline partitionedTimers * getPartitionedTimers() { return &_partitionedTimers; }
inline timeStat * getTimers() { return _timers; }
inline counter * getCounters() { return _counters; }
inline explicitTimer * getExplicitTimers() { return _explicitTimers; }
@@ -498,6 +613,12 @@ class kmp_stats_list {
inline void decrementNestValue() { _nestLevel--; }
inline int getGtid() const { return gtid; }
inline void setGtid(int newgtid) { gtid = newgtid; }
+ inline void setState(stats_state_e newstate) { state = newstate; }
+ inline stats_state_e getState() const { return state; }
+ inline stats_state_e * getStatePointer() { return &state; }
+ inline bool isIdle() { return thread_is_idle_flag==1; }
+ inline void setIdleFlag() { thread_is_idle_flag = 1; }
+ inline void resetIdleFlag() { thread_is_idle_flag = 0; }
kmp_stats_list* push_back(int gtid); // returns newly created list node
inline void push_event(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) {
_event_vector.push_back(start_time, stop_time, nest_level, name);
@@ -699,6 +820,35 @@ extern kmp_stats_output_module __kmp_sta
__kmp_output_stats(heading_string)
/*!
+ * \brief Initializes the paritioned timers to begin with name.
+ *
+ * @param name timer which you want this thread to begin with
+ *
+ * @ingroup STATS_GATHERING
+*/
+#define KMP_INIT_PARTITIONED_TIMERS(name) \
+ __kmp_stats_thread_ptr->getPartitionedTimers()->init(timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
+
+#define KMP_TIME_PARTITIONED_BLOCK(name) \
+ blockPartitionedTimer __PBLOCKTIME__(__kmp_stats_thread_ptr->getPartitionedTimers(), \
+ timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
+
+#define KMP_PUSH_PARTITIONED_TIMER(name) \
+ __kmp_stats_thread_ptr->getPartitionedTimers()->push(timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
+
+#define KMP_POP_PARTITIONED_TIMER() \
+ __kmp_stats_thread_ptr->getPartitionedTimers()->pop()
+
+#define KMP_SET_THREAD_STATE(state_name) \
+ __kmp_stats_thread_ptr->setState(state_name)
+
+#define KMP_GET_THREAD_STATE() \
+ __kmp_stats_thread_ptr->getState()
+
+#define KMP_SET_THREAD_STATE_BLOCK(state_name) \
+ blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), state_name)
+
+/*!
* \brief resets all stats (counters to 0, timers to 0 elapsed ticks)
*
* \details Reset all stats for all threads.
@@ -739,6 +889,13 @@ extern kmp_stats_output_module __kmp_sta
#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
+#define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0)
+#define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0)
+#define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0)
+#define KMP_POP_PARTITIONED_TIMER() ((void)0)
+#define KMP_SET_THREAD_STATE(state_name) ((void)0)
+#define KMP_GET_THREAD_STATE() ((void)0)
+#define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0)
#endif // KMP_STATS_ENABLED
#endif // KMP_STATS_H
Modified: openmp/trunk/runtime/src/kmp_stats_timing.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_stats_timing.h?rev=268640&r1=268639&r2=268640&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_stats_timing.h (original)
+++ openmp/trunk/runtime/src/kmp_stats_timing.h Thu May 5 11:15:57 2016
@@ -40,11 +40,16 @@ class tsc_tick_count {
#endif
double ticks() const { return double(value); }
int64_t getValue() const { return value; }
+ tsc_interval_t& operator=(int64_t nvalue) { value = nvalue; return *this; }
friend class tsc_tick_count;
- friend tsc_interval_t operator-(
- const tsc_tick_count t1, const tsc_tick_count t0);
+ friend tsc_interval_t operator-(const tsc_tick_count& t1,
+ const tsc_tick_count& t0);
+ friend tsc_interval_t operator-(const tsc_tick_count::tsc_interval_t& i1,
+ const tsc_tick_count::tsc_interval_t& i0);
+ friend tsc_interval_t& operator+=(tsc_tick_count::tsc_interval_t& i1,
+ const tsc_tick_count::tsc_interval_t& i0);
};
#if KMP_HAVE___BUILTIN_READCYCLECOUNTER
@@ -66,14 +71,25 @@ class tsc_tick_count {
static double tick_time(); // returns seconds per cycle (period) of clock
#endif
static tsc_tick_count now() { return tsc_tick_count(); } // returns the rdtsc register value
- friend tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count t1, const tsc_tick_count t0);
+ friend tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count& t1, const tsc_tick_count& t0);
};
-inline tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count t1, const tsc_tick_count t0)
+inline tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count& t1, const tsc_tick_count& t0)
{
return tsc_tick_count::tsc_interval_t( t1.my_count-t0.my_count );
}
+inline tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count::tsc_interval_t& i1, const tsc_tick_count::tsc_interval_t& i0)
+{
+ return tsc_tick_count::tsc_interval_t( i1.value-i0.value );
+}
+
+inline tsc_tick_count::tsc_interval_t& operator+=(tsc_tick_count::tsc_interval_t& i1, const tsc_tick_count::tsc_interval_t& i0)
+{
+ i1.value += i0.value;
+ return i1;
+}
+
#if KMP_HAVE_TICK_TIME
inline double tsc_tick_count::tsc_interval_t::seconds() const
{
Modified: openmp/trunk/runtime/src/kmp_tasking.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_tasking.c?rev=268640&r1=268639&r2=268640&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_tasking.c (original)
+++ openmp/trunk/runtime/src/kmp_tasking.c Thu May 5 11:15:57 2016
@@ -36,16 +36,6 @@ static int __kmp_realloc_task_threads_d
static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
#endif
-static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
- if (!flag) return;
- // Attempt to wake up a thread: examine its type and call appropriate template
- switch (((kmp_flag_64 *)flag)->get_type()) {
- case flag32: __kmp_resume_32(gtid, NULL); break;
- case flag64: __kmp_resume_64(gtid, NULL); break;
- case flag_oncore: __kmp_resume_oncore(gtid, NULL); break;
- }
-}
-
#ifdef BUILD_TIED_TASK_STACK
//---------------------------------------------------------------------------
@@ -1207,8 +1197,17 @@ __kmp_invoke_task( kmp_int32 gtid, kmp_t
// Thunks generated by gcc take a different argument list.
//
if (!discard) {
+#if KMP_STATS_ENABLED
KMP_COUNT_BLOCK(TASK_executed);
- KMP_TIME_BLOCK (OMP_task);
+ switch(KMP_GET_THREAD_STATE()) {
+ case FORK_JOIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar); break;
+ case PLAIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar); break;
+ case TASKYIELD: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield); break;
+ case TASKWAIT: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait); break;
+ case TASKGROUP: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup); break;
+ default: KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate); break;
+ }
+#endif // KMP_STATS_ENABLED
#endif // OMP_40_ENABLED
#if OMPT_SUPPORT && OMPT_TRACE
@@ -1231,6 +1230,7 @@ __kmp_invoke_task( kmp_int32 gtid, kmp_t
{
(*(task->routine))(gtid, task);
}
+ KMP_POP_PARTITIONED_TIMER();
#if OMPT_SUPPORT && OMPT_TRACE
/* let OMPT know that we're returning to the callee task */
@@ -1369,6 +1369,7 @@ kmp_int32
__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
{
kmp_int32 res;
+ KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
#if KMP_DEBUG
kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
@@ -1392,6 +1393,7 @@ __kmpc_omp_taskwait( ident_t *loc_ref, k
kmp_taskdata_t * taskdata;
kmp_info_t * thread;
int thread_finished = FALSE;
+ KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) );
@@ -1481,6 +1483,7 @@ __kmpc_omp_taskyield( ident_t *loc_ref,
int thread_finished = FALSE;
KMP_COUNT_BLOCK(OMP_TASKYIELD);
+ KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
gtid, loc_ref, end_part) );
@@ -1561,6 +1564,7 @@ __kmpc_end_taskgroup( ident_t* loc, int
KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
KMP_DEBUG_ASSERT( taskgroup != NULL );
+ KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
#if USE_ITT_BUILD
Modified: openmp/trunk/runtime/src/kmp_wait_release.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_wait_release.h?rev=268640&r1=268639&r2=268640&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_wait_release.h (original)
+++ openmp/trunk/runtime/src/kmp_wait_release.h Thu May 5 11:15:57 2016
@@ -18,6 +18,7 @@
#include "kmp.h"
#include "kmp_itt.h"
+#include "kmp_stats.h"
/*!
@defgroup WAIT_RELEASE Wait/Release operations
@@ -104,6 +105,9 @@ __kmp_wait_template(kmp_info_t *this_thr
}
th_gtid = this_thr->th.th_info.ds.ds_gtid;
KA_TRACE(20, ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
+#if KMP_STATS_ENABLED
+ stats_state_e thread_state = KMP_GET_THREAD_STATE();
+#endif
#if OMPT_SUPPORT && OMPT_BLAME
ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
@@ -223,6 +227,15 @@ __kmp_wait_template(kmp_info_t *this_thr
}
}
+#if KMP_STATS_ENABLED
+ // Check if thread has been signalled to idle state
+ // This indicates that the logical "join-barrier" has finished
+ if (this_thr->th.th_stats->isIdle() && KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
+ KMP_SET_THREAD_STATE(IDLE);
+ KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
+ }
+#endif
+
// Don't suspend if KMP_BLOCKTIME is set to "infinite"
if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
continue;
@@ -273,6 +286,14 @@ __kmp_wait_template(kmp_info_t *this_thr
}
}
#endif
+#if KMP_STATS_ENABLED
+ // If we were put into idle state, pop that off the state stack
+ if (KMP_GET_THREAD_STATE() == IDLE) {
+ KMP_POP_PARTITIONED_TIMER();
+ KMP_SET_THREAD_STATE(thread_state);
+ this_thr->th.th_stats->resetIdleFlag();
+ }
+#endif
KMP_FSYNC_SPIN_ACQUIRED(spin);
}
@@ -556,6 +577,15 @@ public:
flag_type get_ptr_type() { return flag_oncore; }
};
+// Used to wake up threads, volatile void* flag is usually the th_sleep_loc associated
+// with int gtid.
+static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
+ switch (((kmp_flag_64 *)flag)->get_type()) {
+ case flag32: __kmp_resume_32(gtid, NULL); break;
+ case flag64: __kmp_resume_64(gtid, NULL); break;
+ case flag_oncore: __kmp_resume_oncore(gtid, NULL); break;
+ }
+}
/*!
@}
Modified: openmp/trunk/runtime/src/z_Linux_util.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/z_Linux_util.c?rev=268640&r1=268639&r2=268640&view=diff
==============================================================================
--- openmp/trunk/runtime/src/z_Linux_util.c (original)
+++ openmp/trunk/runtime/src/z_Linux_util.c Thu May 5 11:15:57 2016
@@ -697,6 +697,9 @@ __kmp_launch_worker( void *thr )
#if KMP_STATS_ENABLED
// set __thread local index to point to thread-specific stats
__kmp_stats_thread_ptr = ((kmp_info_t*)thr)->th.th_stats;
+ KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life);
+ KMP_SET_THREAD_STATE(IDLE);
+ KMP_INIT_PARTITIONED_TIMERS(OMP_idle);
#endif
#if USE_ITT_BUILD
@@ -972,8 +975,9 @@ __kmp_create_worker( int gtid, kmp_info_
__kmp_stats_start_time = tsc_tick_count::now();
__kmp_stats_thread_ptr = th->th.th_stats;
__kmp_stats_init();
- KMP_START_EXPLICIT_TIMER(OMP_serial);
- KMP_START_EXPLICIT_TIMER(OMP_start_end);
+ KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life);
+ KMP_SET_THREAD_STATE(SERIAL_REGION);
+ KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
}
__kmp_release_tas_lock(&__kmp_stats_lock, gtid);
@@ -1856,6 +1860,7 @@ void __kmp_resume_oncore(int target_gtid
void
__kmp_resume_monitor()
{
+ KMP_TIME_DEVELOPER_BLOCK(USER_resume);
int status;
#ifdef KMP_DEBUG
int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
More information about the Openmp-commits
mailing list