[Openmp-commits] [openmp] r338146 - [OMPT] Fix OMPT callbacks for the taskloop construct and add testcase

Joachim Protze via Openmp-commits openmp-commits at lists.llvm.org
Fri Jul 27 11:13:25 PDT 2018


Author: jprotze
Date: Fri Jul 27 11:13:24 2018
New Revision: 338146

URL: http://llvm.org/viewvc/llvm-project?rev=338146&view=rev
Log:
[OMPT] Fix OMPT callbacks for the taskloop construct and add testcase

Fix the order of callbacks related to the taskloop construct.
Add the iteration_count to work callbacks (according to the spec).
Use kmpc_omp_task() instead of kmp_omp_task() to include OMPT callbacks.
Add a testcase.

Patch by Simon Convent

Reviewed by: protze.joachim, hbae

Subscribers: openmp-commits

Differential Revision: https://reviews.llvm.org/D47709

Added:
    openmp/trunk/runtime/test/ompt/tasks/taskloop.c
Modified:
    openmp/trunk/runtime/src/kmp_tasking.cpp

Modified: openmp/trunk/runtime/src/kmp_tasking.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_tasking.cpp?rev=338146&r1=338145&r2=338146&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_tasking.cpp (original)
+++ openmp/trunk/runtime/src/kmp_tasking.cpp Fri Jul 27 11:13:24 2018
@@ -1596,6 +1596,61 @@ kmp_int32 __kmpc_omp_task(ident_t *loc_r
   return res;
 }
 
+// __kmp_omp_taskloop_task: Wrapper around __kmp_omp_task to schedule
+// a taskloop task with the correct OMPT return address
+//
+// loc_ref: location of original task pragma (ignored)
+// gtid: Global Thread ID of encountering thread
+// new_task: non-thread-switchable task thunk allocated by
+// __kmp_omp_task_alloc()
+// codeptr_ra: return address for OMPT callback
+// Returns:
+//    TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
+//    be resumed later.
+//    TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
+//    resumed later.
+kmp_int32 __kmp_omp_taskloop_task(ident_t *loc_ref, kmp_int32 gtid,
+                                  kmp_task_t *new_task, void *codeptr_ra) {
+  kmp_int32 res;
+  KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
+
+#if KMP_DEBUG || OMPT_SUPPORT
+  kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
+#endif
+  KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
+                new_taskdata));
+
+#if OMPT_SUPPORT
+  kmp_taskdata_t *parent = NULL;
+  if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) {
+    parent = new_taskdata->td_parent;
+    if (!parent->ompt_task_info.frame.enter_frame)
+      parent->ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+    if (ompt_enabled.ompt_callback_task_create) {
+      ompt_data_t task_data = ompt_data_none;
+      ompt_callbacks.ompt_callback(ompt_callback_task_create)(
+          parent ? &(parent->ompt_task_info.task_data) : &task_data,
+          parent ? &(parent->ompt_task_info.frame) : NULL,
+          &(new_taskdata->ompt_task_info.task_data),
+          ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
+          codeptr_ra);
+    }
+  }
+#endif
+
+  res = __kmp_omp_task(gtid, new_task, true);
+
+  KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning "
+                "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
+                gtid, loc_ref, new_taskdata));
+#if OMPT_SUPPORT
+  if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
+    parent->ompt_task_info.frame.enter_frame = NULL;
+  }
+#endif
+  return res;
+}
+
 template <bool ompt>
 static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid,
                                               void *frame_address,
@@ -3680,23 +3735,28 @@ public:
 
 // __kmp_taskloop_linear: Start tasks of the taskloop linearly
 //
-// loc       Source location information
-// gtid      Global thread ID
-// task      Pattern task, exposes the loop iteration range
-// lb        Pointer to loop lower bound in task structure
-// ub        Pointer to loop upper bound in task structure
-// st        Loop stride
-// ub_glob   Global upper bound (used for lastprivate check)
-// num_tasks Number of tasks to execute
-// grainsize Number of loop iterations per task
-// extras    Number of chunks with grainsize+1 iterations
-// tc        Iterations count
-// task_dup  Tasks duplication routine
+// loc        Source location information
+// gtid       Global thread ID
+// task       Pattern task, exposes the loop iteration range
+// lb         Pointer to loop lower bound in task structure
+// ub         Pointer to loop upper bound in task structure
+// st         Loop stride
+// ub_glob    Global upper bound (used for lastprivate check)
+// num_tasks  Number of tasks to execute
+// grainsize  Number of loop iterations per task
+// extras     Number of chunks with grainsize+1 iterations
+// tc         Iterations count
+// task_dup   Tasks duplication routine
+// codeptr_ra Return address for OMPT events
 void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
                            kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
                            kmp_uint64 ub_glob, kmp_uint64 num_tasks,
                            kmp_uint64 grainsize, kmp_uint64 extras,
-                           kmp_uint64 tc, void *task_dup) {
+                           kmp_uint64 tc,
+#if OMPT_SUPPORT
+                           void *codeptr_ra,
+#endif
+                           void *task_dup) {
   KMP_COUNT_BLOCK(OMP_TASKLOOP);
   KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
   p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
@@ -3764,7 +3824,12 @@ void __kmp_taskloop_linear(ident_t *loc,
               gtid, i, next_task, lower, upper, st,
               next_task_bounds.get_lower_offset(),
               next_task_bounds.get_upper_offset()));
+#if OMPT_SUPPORT
+    __kmp_omp_taskloop_task(NULL, gtid, next_task,
+                           codeptr_ra); // schedule new task
+#else
     __kmp_omp_task(gtid, next_task, true); // schedule new task
+#endif
     lower = upper + st; // adjust lower bound for the next iteration
   }
   // free the pattern task and exit
@@ -3787,11 +3852,17 @@ typedef struct __taskloop_params {
   kmp_uint64 extras;
   kmp_uint64 tc;
   kmp_uint64 num_t_min;
+#if OMPT_SUPPORT
+  void *codeptr_ra;
+#endif
 } __taskloop_params_t;
 
 void __kmp_taskloop_recur(ident_t *, int, kmp_task_t *, kmp_uint64 *,
                           kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64,
                           kmp_uint64, kmp_uint64, kmp_uint64, kmp_uint64,
+#if OMPT_SUPPORT
+                          void *,
+#endif
                           void *);
 
 // Execute part of the the taskloop submitted as a task.
@@ -3810,6 +3881,9 @@ int __kmp_taskloop_task(int gtid, void *
   kmp_uint64 extras = p->extras;
   kmp_uint64 tc = p->tc;
   kmp_uint64 num_t_min = p->num_t_min;
+#if OMPT_SUPPORT
+  void *codeptr_ra = p->codeptr_ra;
+#endif
 #if KMP_DEBUG
   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
   KMP_DEBUG_ASSERT(task != NULL);
@@ -3821,10 +3895,18 @@ int __kmp_taskloop_task(int gtid, void *
   KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min);
   if (num_tasks > num_t_min)
     __kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
-                         grainsize, extras, tc, num_t_min, task_dup);
+                         grainsize, extras, tc, num_t_min,
+#if OMPT_SUPPORT
+                         codeptr_ra,
+#endif
+                         task_dup);
   else
     __kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
-                          grainsize, extras, tc, task_dup);
+                          grainsize, extras, tc,
+#if OMPT_SUPPORT
+                          codeptr_ra,
+#endif
+                          task_dup);
 
   KA_TRACE(40, ("__kmp_taskloop_task(exit): T#%d\n", gtid));
   return 0;
@@ -3833,24 +3915,29 @@ int __kmp_taskloop_task(int gtid, void *
 // Schedule part of the the taskloop as a task,
 // execute the rest of the the taskloop.
 //
-// loc       Source location information
-// gtid      Global thread ID
-// task      Pattern task, exposes the loop iteration range
-// lb        Pointer to loop lower bound in task structure
-// ub        Pointer to loop upper bound in task structure
-// st        Loop stride
-// ub_glob   Global upper bound (used for lastprivate check)
-// num_tasks Number of tasks to execute
-// grainsize Number of loop iterations per task
-// extras    Number of chunks with grainsize+1 iterations
-// tc        Iterations count
-// num_t_min Threashold to launch tasks recursively
-// task_dup  Tasks duplication routine
+// loc        Source location information
+// gtid       Global thread ID
+// task       Pattern task, exposes the loop iteration range
+// lb         Pointer to loop lower bound in task structure
+// ub         Pointer to loop upper bound in task structure
+// st         Loop stride
+// ub_glob    Global upper bound (used for lastprivate check)
+// num_tasks  Number of tasks to execute
+// grainsize  Number of loop iterations per task
+// extras     Number of chunks with grainsize+1 iterations
+// tc         Iterations count
+// num_t_min  Threashold to launch tasks recursively
+// task_dup   Tasks duplication routine
+// codeptr_ra Return address for OMPT events
 void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
                           kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
                           kmp_uint64 ub_glob, kmp_uint64 num_tasks,
                           kmp_uint64 grainsize, kmp_uint64 extras,
-                          kmp_uint64 tc, kmp_uint64 num_t_min, void *task_dup) {
+                          kmp_uint64 tc, kmp_uint64 num_t_min,
+#if OMPT_SUPPORT
+                          void *codeptr_ra,
+#endif
+                          void *task_dup) {
 #if KMP_DEBUG
   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
   KMP_DEBUG_ASSERT(task != NULL);
@@ -3920,15 +4007,32 @@ void __kmp_taskloop_recur(ident_t *loc,
   p->extras = ext1;
   p->tc = tc1;
   p->num_t_min = num_t_min;
-  __kmp_omp_task(gtid, new_task, true); // schedule new task
+#if OMPT_SUPPORT
+  p->codeptr_ra = codeptr_ra;
+#endif
+
+#if OMPT_SUPPORT
+  // schedule new task with correct return address for OMPT events
+  __kmp_omp_taskloop_task(NULL, gtid, new_task, codeptr_ra);
+#else
+  __kmp_omp_task(NULL, gtid, new_task); // schedule new task
+#endif
 
   // execute the 1st half of current subrange
   if (n_tsk0 > num_t_min)
     __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0,
-                         ext0, tc0, num_t_min, task_dup);
+                         ext0, tc0, num_t_min,
+#if OMPT_SUPPORT
+                         codeptr_ra,
+#endif
+                         task_dup);
   else
     __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0,
-                          gr_size0, ext0, tc0, task_dup);
+                          gr_size0, ext0, tc0,
+#if OMPT_SUPPORT
+                          codeptr_ra,
+#endif
+                          task_dup);
 
   KA_TRACE(40, ("__kmpc_taskloop_recur(exit): T#%d\n", gtid));
 }
@@ -3955,16 +4059,6 @@ void __kmpc_taskloop(ident_t *loc, int g
   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
   KMP_DEBUG_ASSERT(task != NULL);
 
-#if OMPT_SUPPORT && OMPT_OPTIONAL
-  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
-  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
-  if (ompt_enabled.ompt_callback_work) {
-    ompt_callbacks.ompt_callback(ompt_callback_work)(
-        ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data),
-        &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
-  }
-#endif
-
   if (nogroup == 0) {
 #if OMPT_SUPPORT && OMPT_OPTIONAL
     OMPT_STORE_RETURN_ADDRESS(gtid);
@@ -4005,6 +4099,17 @@ void __kmpc_taskloop(ident_t *loc, int g
     __kmp_task_finish<false>(gtid, task, current_task);
     return;
   }
+
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
+  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
+  if (ompt_enabled.ompt_callback_work) {
+    ompt_callbacks.ompt_callback(ompt_callback_work)(
+        ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data),
+        &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
+  }
+#endif
+
   if (num_tasks_min == 0)
     // TODO: can we choose better default heuristic?
     num_tasks_min =
@@ -4051,47 +4156,51 @@ void __kmpc_taskloop(ident_t *loc, int g
   if (if_val == 0) { // if(0) specified, mark task as serial
     taskdata->td_flags.task_serial = 1;
     taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
-#if OMPT_SUPPORT && OMPT_OPTIONAL
-    OMPT_STORE_RETURN_ADDRESS(gtid);
-#endif
     // always start serial tasks linearly
     __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
-                          grainsize, extras, tc, task_dup);
+                          grainsize, extras, tc,
+#if OMPT_SUPPORT
+                          OMPT_GET_RETURN_ADDRESS(0),
+#endif
+                          task_dup);
     // !taskdata->td_flags.native => currently force linear spawning of tasks
     // for GOMP_taskloop
   } else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) {
     KA_TRACE(20, ("__kmpc_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
                   "(%lld), grain %llu, extras %llu\n",
                   gtid, tc, num_tasks, num_tasks_min, grainsize, extras));
-#if OMPT_SUPPORT && OMPT_OPTIONAL
-    OMPT_STORE_RETURN_ADDRESS(gtid);
-#endif
     __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
-                         grainsize, extras, tc, num_tasks_min, task_dup);
+                         grainsize, extras, tc, num_tasks_min,
+#if OMPT_SUPPORT
+                         OMPT_GET_RETURN_ADDRESS(0),
+#endif
+                         task_dup);
   } else {
     KA_TRACE(20, ("__kmpc_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
                   "(%lld), grain %llu, extras %llu\n",
                   gtid, tc, num_tasks, num_tasks_min, grainsize, extras));
-#if OMPT_SUPPORT && OMPT_OPTIONAL
-    OMPT_STORE_RETURN_ADDRESS(gtid);
-#endif
     __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
-                          grainsize, extras, tc, task_dup);
-  }
-
-  if (nogroup == 0) {
-#if OMPT_SUPPORT && OMPT_OPTIONAL
-    OMPT_STORE_RETURN_ADDRESS(gtid);
+                          grainsize, extras, tc,
+#if OMPT_SUPPORT
+                          OMPT_GET_RETURN_ADDRESS(0),
 #endif
-    __kmpc_end_taskgroup(loc, gtid);
+                          task_dup);
   }
+
 #if OMPT_SUPPORT && OMPT_OPTIONAL
   if (ompt_enabled.ompt_callback_work) {
     ompt_callbacks.ompt_callback(ompt_callback_work)(
         ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data),
-        &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
+        &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
   }
 #endif
+
+  if (nogroup == 0) {
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+    __kmpc_end_taskgroup(loc, gtid);
+  }
   KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid));
 }
 

Added: openmp/trunk/runtime/test/ompt/tasks/taskloop.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/test/ompt/tasks/taskloop.c?rev=338146&view=auto
==============================================================================
--- openmp/trunk/runtime/test/ompt/tasks/taskloop.c (added)
+++ openmp/trunk/runtime/test/ompt/tasks/taskloop.c Fri Jul 27 11:13:24 2018
@@ -0,0 +1,62 @@
+// RUN: %libomp-compile && %libomp-run | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+int main() {
+  unsigned int i, j, x;
+
+#pragma omp parallel num_threads(2)
+#pragma omp master
+#pragma omp taskloop
+  for (j = 0; j < 5; j += 3) {
+    x++;
+  }
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin:
+  // CHECK-SAME: parent_task_id={{[0-9]+}}
+  // CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]]
+  // CHECK-SAME: requested_team_size=2
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin:
+  // CHECK-SAME: parallel_id=[[PARALLEL_ID]]
+  // CHECK-SAME: task_id=[[IMPLICIT_TASK_ID1:[0-9]+]]
+  // CHECK-SAME: team_size=2, thread_num=0
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskgroup_begin:
+  // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID1]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskloop_begin:
+  // CHECK-SAME: parallel_id=[[PARALLEL_ID]]
+  // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]]
+  // CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]], count=2
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
+  // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]]
+  // CHECK-SAME: new_task_id=[[TASK_ID1:[0-9]+]]
+  // CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS]]
+  // CHECK-SAME: task_type=ompt_task_explicit=4
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create:
+  // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]]
+  // CHECK-SAME: new_task_id=[[TASK_ID2:[0-9]+]]
+  // CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS]]
+  // CHECK-SAME: task_type=ompt_task_explicit=4
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskloop_end:
+  // CHECK-SAME: parallel_id=[[PARALLEL_ID]]
+  // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]]
+  // CHECK-SAME: count=2
+  // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_begin:
+  // Schedule events:
+  // CHECK-DAG: {{^.*}}first_task_id={{[0-9]+}}, second_task_id=[[TASK_ID1]]
+  // CHECK-DAG: {{^.*}}first_task_id=[[TASK_ID1]], second_task_id={{[0-9]+}}
+  // CHECK-DAG: {{^.*}}first_task_id={{[0-9]+}}, second_task_id=[[TASK_ID2]]
+  // CHECK-DAG: {{^.*}}first_task_id=[[TASK_ID2]], second_task_id={{[0-9]+}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_end:
+  // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID1]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskgroup_end:
+  // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID1]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0
+  // CHECK-SAME: task_id=[[IMPLICIT_TASK_ID1]], team_size=2, thread_num=0
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end:
+  // CHECK-SAME: parallel_id=[[PARALLEL_ID]]
+
+  return 0;
+}




More information about the Openmp-commits mailing list