[Openmp-commits] [openmp] r338146 - [OMPT] Fix OMPT callbacks for the taskloop construct and add testcase
Joachim Protze via Openmp-commits
openmp-commits at lists.llvm.org
Fri Jul 27 11:13:25 PDT 2018
Author: jprotze
Date: Fri Jul 27 11:13:24 2018
New Revision: 338146
URL: http://llvm.org/viewvc/llvm-project?rev=338146&view=rev
Log:
[OMPT] Fix OMPT callbacks for the taskloop construct and add testcase
Fix the order of callbacks related to the taskloop construct.
Add the iteration_count to work callbacks (according to the spec).
Use kmpc_omp_task() instead of kmp_omp_task() to include OMPT callbacks.
Add a testcase.
Patch by Simon Convent
Reviewed by: protze.joachim, hbae
Subscribers: openmp-commits
Differential Revision: https://reviews.llvm.org/D47709
Added:
openmp/trunk/runtime/test/ompt/tasks/taskloop.c
Modified:
openmp/trunk/runtime/src/kmp_tasking.cpp
Modified: openmp/trunk/runtime/src/kmp_tasking.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_tasking.cpp?rev=338146&r1=338145&r2=338146&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_tasking.cpp (original)
+++ openmp/trunk/runtime/src/kmp_tasking.cpp Fri Jul 27 11:13:24 2018
@@ -1596,6 +1596,61 @@ kmp_int32 __kmpc_omp_task(ident_t *loc_r
return res;
}
+// __kmp_omp_taskloop_task: Wrapper around __kmp_omp_task to schedule
+// a taskloop task with the correct OMPT return address
+//
+// loc_ref: location of original task pragma (ignored)
+// gtid: Global Thread ID of encountering thread
+// new_task: non-thread-switchable task thunk allocated by
+// __kmp_omp_task_alloc()
+// codeptr_ra: return address for OMPT callback
+// Returns:
+// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
+// be resumed later.
+// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
+// resumed later.
+kmp_int32 __kmp_omp_taskloop_task(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_task_t *new_task, void *codeptr_ra) {
+ kmp_int32 res;
+ KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
+
+#if KMP_DEBUG || OMPT_SUPPORT
+ kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
+#endif
+ KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
+ new_taskdata));
+
+#if OMPT_SUPPORT
+ kmp_taskdata_t *parent = NULL;
+ if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) {
+ parent = new_taskdata->td_parent;
+ if (!parent->ompt_task_info.frame.enter_frame)
+ parent->ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ if (ompt_enabled.ompt_callback_task_create) {
+ ompt_data_t task_data = ompt_data_none;
+ ompt_callbacks.ompt_callback(ompt_callback_task_create)(
+ parent ? &(parent->ompt_task_info.task_data) : &task_data,
+ parent ? &(parent->ompt_task_info.frame) : NULL,
+ &(new_taskdata->ompt_task_info.task_data),
+ ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
+ codeptr_ra);
+ }
+ }
+#endif
+
+ res = __kmp_omp_task(gtid, new_task, true);
+
+ KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning "
+ "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
+ gtid, loc_ref, new_taskdata));
+#if OMPT_SUPPORT
+ if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
+ parent->ompt_task_info.frame.enter_frame = NULL;
+ }
+#endif
+ return res;
+}
+
template <bool ompt>
static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid,
void *frame_address,
@@ -3680,23 +3735,28 @@ public:
// __kmp_taskloop_linear: Start tasks of the taskloop linearly
//
-// loc Source location information
-// gtid Global thread ID
-// task Pattern task, exposes the loop iteration range
-// lb Pointer to loop lower bound in task structure
-// ub Pointer to loop upper bound in task structure
-// st Loop stride
-// ub_glob Global upper bound (used for lastprivate check)
-// num_tasks Number of tasks to execute
-// grainsize Number of loop iterations per task
-// extras Number of chunks with grainsize+1 iterations
-// tc Iterations count
-// task_dup Tasks duplication routine
+// loc Source location information
+// gtid Global thread ID
+// task Pattern task, exposes the loop iteration range
+// lb Pointer to loop lower bound in task structure
+// ub Pointer to loop upper bound in task structure
+// st Loop stride
+// ub_glob Global upper bound (used for lastprivate check)
+// num_tasks Number of tasks to execute
+// grainsize Number of loop iterations per task
+// extras Number of chunks with grainsize+1 iterations
+// tc Iterations count
+// task_dup Tasks duplication routine
+// codeptr_ra Return address for OMPT events
void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
kmp_uint64 ub_glob, kmp_uint64 num_tasks,
kmp_uint64 grainsize, kmp_uint64 extras,
- kmp_uint64 tc, void *task_dup) {
+ kmp_uint64 tc,
+#if OMPT_SUPPORT
+ void *codeptr_ra,
+#endif
+ void *task_dup) {
KMP_COUNT_BLOCK(OMP_TASKLOOP);
KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
@@ -3764,7 +3824,12 @@ void __kmp_taskloop_linear(ident_t *loc,
gtid, i, next_task, lower, upper, st,
next_task_bounds.get_lower_offset(),
next_task_bounds.get_upper_offset()));
+#if OMPT_SUPPORT
+ __kmp_omp_taskloop_task(NULL, gtid, next_task,
+ codeptr_ra); // schedule new task
+#else
__kmp_omp_task(gtid, next_task, true); // schedule new task
+#endif
lower = upper + st; // adjust lower bound for the next iteration
}
// free the pattern task and exit
@@ -3787,11 +3852,17 @@ typedef struct __taskloop_params {
kmp_uint64 extras;
kmp_uint64 tc;
kmp_uint64 num_t_min;
+#if OMPT_SUPPORT
+ void *codeptr_ra;
+#endif
} __taskloop_params_t;
void __kmp_taskloop_recur(ident_t *, int, kmp_task_t *, kmp_uint64 *,
kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64,
kmp_uint64, kmp_uint64, kmp_uint64, kmp_uint64,
+#if OMPT_SUPPORT
+ void *,
+#endif
void *);
// Execute part of the the taskloop submitted as a task.
@@ -3810,6 +3881,9 @@ int __kmp_taskloop_task(int gtid, void *
kmp_uint64 extras = p->extras;
kmp_uint64 tc = p->tc;
kmp_uint64 num_t_min = p->num_t_min;
+#if OMPT_SUPPORT
+ void *codeptr_ra = p->codeptr_ra;
+#endif
#if KMP_DEBUG
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
KMP_DEBUG_ASSERT(task != NULL);
@@ -3821,10 +3895,18 @@ int __kmp_taskloop_task(int gtid, void *
KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min);
if (num_tasks > num_t_min)
__kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
- grainsize, extras, tc, num_t_min, task_dup);
+ grainsize, extras, tc, num_t_min,
+#if OMPT_SUPPORT
+ codeptr_ra,
+#endif
+ task_dup);
else
__kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
- grainsize, extras, tc, task_dup);
+ grainsize, extras, tc,
+#if OMPT_SUPPORT
+ codeptr_ra,
+#endif
+ task_dup);
KA_TRACE(40, ("__kmp_taskloop_task(exit): T#%d\n", gtid));
return 0;
@@ -3833,24 +3915,29 @@ int __kmp_taskloop_task(int gtid, void *
// Schedule part of the the taskloop as a task,
// execute the rest of the the taskloop.
//
-// loc Source location information
-// gtid Global thread ID
-// task Pattern task, exposes the loop iteration range
-// lb Pointer to loop lower bound in task structure
-// ub Pointer to loop upper bound in task structure
-// st Loop stride
-// ub_glob Global upper bound (used for lastprivate check)
-// num_tasks Number of tasks to execute
-// grainsize Number of loop iterations per task
-// extras Number of chunks with grainsize+1 iterations
-// tc Iterations count
-// num_t_min Threashold to launch tasks recursively
-// task_dup Tasks duplication routine
+// loc Source location information
+// gtid Global thread ID
+// task Pattern task, exposes the loop iteration range
+// lb Pointer to loop lower bound in task structure
+// ub Pointer to loop upper bound in task structure
+// st Loop stride
+// ub_glob Global upper bound (used for lastprivate check)
+// num_tasks Number of tasks to execute
+// grainsize Number of loop iterations per task
+// extras Number of chunks with grainsize+1 iterations
+// tc Iterations count
+// num_t_min Threashold to launch tasks recursively
+// task_dup Tasks duplication routine
+// codeptr_ra Return address for OMPT events
void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
kmp_uint64 ub_glob, kmp_uint64 num_tasks,
kmp_uint64 grainsize, kmp_uint64 extras,
- kmp_uint64 tc, kmp_uint64 num_t_min, void *task_dup) {
+ kmp_uint64 tc, kmp_uint64 num_t_min,
+#if OMPT_SUPPORT
+ void *codeptr_ra,
+#endif
+ void *task_dup) {
#if KMP_DEBUG
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
KMP_DEBUG_ASSERT(task != NULL);
@@ -3920,15 +4007,32 @@ void __kmp_taskloop_recur(ident_t *loc,
p->extras = ext1;
p->tc = tc1;
p->num_t_min = num_t_min;
- __kmp_omp_task(gtid, new_task, true); // schedule new task
+#if OMPT_SUPPORT
+ p->codeptr_ra = codeptr_ra;
+#endif
+
+#if OMPT_SUPPORT
+ // schedule new task with correct return address for OMPT events
+ __kmp_omp_taskloop_task(NULL, gtid, new_task, codeptr_ra);
+#else
+ __kmp_omp_task(NULL, gtid, new_task); // schedule new task
+#endif
// execute the 1st half of current subrange
if (n_tsk0 > num_t_min)
__kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0,
- ext0, tc0, num_t_min, task_dup);
+ ext0, tc0, num_t_min,
+#if OMPT_SUPPORT
+ codeptr_ra,
+#endif
+ task_dup);
else
__kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0,
- gr_size0, ext0, tc0, task_dup);
+ gr_size0, ext0, tc0,
+#if OMPT_SUPPORT
+ codeptr_ra,
+#endif
+ task_dup);
KA_TRACE(40, ("__kmpc_taskloop_recur(exit): T#%d\n", gtid));
}
@@ -3955,16 +4059,6 @@ void __kmpc_taskloop(ident_t *loc, int g
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
KMP_DEBUG_ASSERT(task != NULL);
-#if OMPT_SUPPORT && OMPT_OPTIONAL
- ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
- ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
- if (ompt_enabled.ompt_callback_work) {
- ompt_callbacks.ompt_callback(ompt_callback_work)(
- ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data),
- &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
- }
-#endif
-
if (nogroup == 0) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
OMPT_STORE_RETURN_ADDRESS(gtid);
@@ -4005,6 +4099,17 @@ void __kmpc_taskloop(ident_t *loc, int g
__kmp_task_finish<false>(gtid, task, current_task);
return;
}
+
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
+ ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
+ if (ompt_enabled.ompt_callback_work) {
+ ompt_callbacks.ompt_callback(ompt_callback_work)(
+ ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data),
+ &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
+ }
+#endif
+
if (num_tasks_min == 0)
// TODO: can we choose better default heuristic?
num_tasks_min =
@@ -4051,47 +4156,51 @@ void __kmpc_taskloop(ident_t *loc, int g
if (if_val == 0) { // if(0) specified, mark task as serial
taskdata->td_flags.task_serial = 1;
taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
-#if OMPT_SUPPORT && OMPT_OPTIONAL
- OMPT_STORE_RETURN_ADDRESS(gtid);
-#endif
// always start serial tasks linearly
__kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
- grainsize, extras, tc, task_dup);
+ grainsize, extras, tc,
+#if OMPT_SUPPORT
+ OMPT_GET_RETURN_ADDRESS(0),
+#endif
+ task_dup);
// !taskdata->td_flags.native => currently force linear spawning of tasks
// for GOMP_taskloop
} else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) {
KA_TRACE(20, ("__kmpc_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
"(%lld), grain %llu, extras %llu\n",
gtid, tc, num_tasks, num_tasks_min, grainsize, extras));
-#if OMPT_SUPPORT && OMPT_OPTIONAL
- OMPT_STORE_RETURN_ADDRESS(gtid);
-#endif
__kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
- grainsize, extras, tc, num_tasks_min, task_dup);
+ grainsize, extras, tc, num_tasks_min,
+#if OMPT_SUPPORT
+ OMPT_GET_RETURN_ADDRESS(0),
+#endif
+ task_dup);
} else {
KA_TRACE(20, ("__kmpc_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
"(%lld), grain %llu, extras %llu\n",
gtid, tc, num_tasks, num_tasks_min, grainsize, extras));
-#if OMPT_SUPPORT && OMPT_OPTIONAL
- OMPT_STORE_RETURN_ADDRESS(gtid);
-#endif
__kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
- grainsize, extras, tc, task_dup);
- }
-
- if (nogroup == 0) {
-#if OMPT_SUPPORT && OMPT_OPTIONAL
- OMPT_STORE_RETURN_ADDRESS(gtid);
+ grainsize, extras, tc,
+#if OMPT_SUPPORT
+ OMPT_GET_RETURN_ADDRESS(0),
#endif
- __kmpc_end_taskgroup(loc, gtid);
+ task_dup);
}
+
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_work) {
ompt_callbacks.ompt_callback(ompt_callback_work)(
ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data),
- &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
+ &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
}
#endif
+
+ if (nogroup == 0) {
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+ __kmpc_end_taskgroup(loc, gtid);
+ }
KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid));
}
Added: openmp/trunk/runtime/test/ompt/tasks/taskloop.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/test/ompt/tasks/taskloop.c?rev=338146&view=auto
==============================================================================
--- openmp/trunk/runtime/test/ompt/tasks/taskloop.c (added)
+++ openmp/trunk/runtime/test/ompt/tasks/taskloop.c Fri Jul 27 11:13:24 2018
@@ -0,0 +1,62 @@
+// RUN: %libomp-compile && %libomp-run | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main() {
+ unsigned int i, j, x;
+
+#pragma omp parallel num_threads(2)
+#pragma omp master
+#pragma omp taskloop
+ for (j = 0; j < 5; j += 3) {
+ x++;
+ }
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin:
+ // CHECK-SAME: parent_task_id={{[0-9]+}}
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]]
+ // CHECK-SAME: requested_team_size=2
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin:
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]]
+ // CHECK-SAME: task_id=[[IMPLICIT_TASK_ID1:[0-9]+]]
+ // CHECK-SAME: team_size=2, thread_num=0
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskgroup_begin:
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID1]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskloop_begin:
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]]
+ // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]]
+ // CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]], count=2
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
+ // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]]
+ // CHECK-SAME: new_task_id=[[TASK_ID1:[0-9]+]]
+ // CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS]]
+ // CHECK-SAME: task_type=ompt_task_explicit=4
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
+ // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]]
+ // CHECK-SAME: new_task_id=[[TASK_ID2:[0-9]+]]
+ // CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS]]
+ // CHECK-SAME: task_type=ompt_task_explicit=4
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskloop_end:
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]]
+ // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]]
+ // CHECK-SAME: count=2
+ // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_begin:
+ // Schedule events:
+ // CHECK-DAG: {{^.*}}first_task_id={{[0-9]+}}, second_task_id=[[TASK_ID1]]
+ // CHECK-DAG: {{^.*}}first_task_id=[[TASK_ID1]], second_task_id={{[0-9]+}}
+ // CHECK-DAG: {{^.*}}first_task_id={{[0-9]+}}, second_task_id=[[TASK_ID2]]
+ // CHECK-DAG: {{^.*}}first_task_id=[[TASK_ID2]], second_task_id={{[0-9]+}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_end:
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID1]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskgroup_end:
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID1]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0
+ // CHECK-SAME: task_id=[[IMPLICIT_TASK_ID1]], team_size=2, thread_num=0
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end:
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]]
+
+ return 0;
+}
More information about the Openmp-commits
mailing list