[llvm-branch-commits] [openmp] 540007b - [OpenMP] Add strict mode in num_tasks and grainsize

Nawrin Sultana via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Dec 9 14:51:11 PST 2020


Author: Nawrin Sultana
Date: 2020-12-09T16:46:30-06:00
New Revision: 540007b42701b5ac9adba076824bfd648a265413

URL: https://github.com/llvm/llvm-project/commit/540007b42701b5ac9adba076824bfd648a265413
DIFF: https://github.com/llvm/llvm-project/commit/540007b42701b5ac9adba076824bfd648a265413.diff

LOG: [OpenMP] Add strict mode in num_tasks and grainsize

This patch adds new API __kmpc_taskloop_5 to accomadate strict
modifier (introduced in OpenMP 5.1) in num_tasks and grainsize
clause.

Differential Revision: https://reviews.llvm.org/D92352

Added: 
    openmp/runtime/test/tasking/kmp_taskloop_5.c

Modified: 
    openmp/runtime/src/dllexports
    openmp/runtime/src/kmp.h
    openmp/runtime/src/kmp_tasking.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports
index 6e41376a16b9..1c29ca90657a 100644
--- a/openmp/runtime/src/dllexports
+++ b/openmp/runtime/src/dllexports
@@ -371,6 +371,7 @@ kmpc_set_defaults                           224
         __kmpc_doacross_fini                264
         __kmpc_taskloop                     266
         __kmpc_critical_with_hint           270
+        __kmpc_taskloop_5                   285
 %endif
 kmpc_aligned_malloc                         265
 kmpc_set_disp_num_buffers                   267

diff  --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index e450b128a005..64431a60aef3 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -3783,6 +3783,12 @@ KMP_EXPORT void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task,
                                 kmp_uint64 *ub, kmp_int64 st, kmp_int32 nogroup,
                                 kmp_int32 sched, kmp_uint64 grainsize,
                                 void *task_dup);
+KMP_EXPORT void __kmpc_taskloop_5(ident_t *loc, kmp_int32 gtid,
+                                  kmp_task_t *task, kmp_int32 if_val,
+                                  kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
+                                  kmp_int32 nogroup, kmp_int32 sched,
+                                  kmp_uint64 grainsize, kmp_int32 modifier,
+                                  void *task_dup);
 KMP_EXPORT void *__kmpc_task_reduction_init(int gtid, int num_data, void *data);
 KMP_EXPORT void *__kmpc_taskred_init(int gtid, int num_data, void *data);
 KMP_EXPORT void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d);

diff  --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 424576ed440f..f95a92d872d4 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -4142,6 +4142,7 @@ class kmp_taskloop_bounds_t {
 // num_tasks  Number of tasks to execute
 // grainsize  Number of loop iterations per task
 // extras     Number of chunks with grainsize+1 iterations
+// last_chunk Reduction of grainsize for last task
 // tc         Iterations count
 // task_dup   Tasks duplication routine
 // codeptr_ra Return address for OMPT events
@@ -4149,7 +4150,7 @@ void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
                            kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
                            kmp_uint64 ub_glob, kmp_uint64 num_tasks,
                            kmp_uint64 grainsize, kmp_uint64 extras,
-                           kmp_uint64 tc,
+                           kmp_int64 last_chunk, kmp_uint64 tc,
 #if OMPT_SUPPORT
                            void *codeptr_ra,
 #endif
@@ -4167,13 +4168,14 @@ void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
   kmp_task_t *next_task;
   kmp_int32 lastpriv = 0;
 
-  KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
+  KMP_DEBUG_ASSERT(
+      tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras));
   KMP_DEBUG_ASSERT(num_tasks > extras);
   KMP_DEBUG_ASSERT(num_tasks > 0);
   KA_TRACE(20, ("__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, "
-                "extras %lld, i=%lld,%lld(%d)%lld, dup %p\n",
-                gtid, num_tasks, grainsize, extras, lower, upper, ub_glob, st,
-                task_dup));
+                "extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n",
+                gtid, num_tasks, grainsize, extras, last_chunk, lower, upper,
+                ub_glob, st, task_dup));
 
   // Launch num_tasks tasks, assign grainsize iterations each task
   for (i = 0; i < num_tasks; ++i) {
@@ -4185,6 +4187,9 @@ void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
       --extras; // first extras iterations get bigger chunk (grainsize+1)
     }
     upper = lower + st * chunk_minus_1;
+    if (upper > *ub) {
+      upper = *ub;
+    }
     if (i == num_tasks - 1) {
       // schedule the last task, set lastprivate flag if needed
       if (st == 1) { // most common case
@@ -4248,6 +4253,7 @@ typedef struct __taskloop_params {
   kmp_uint64 num_tasks;
   kmp_uint64 grainsize;
   kmp_uint64 extras;
+  kmp_int64 last_chunk;
   kmp_uint64 tc;
   kmp_uint64 num_t_min;
 #if OMPT_SUPPORT
@@ -4257,7 +4263,8 @@ typedef struct __taskloop_params {
 
 void __kmp_taskloop_recur(ident_t *, int, kmp_task_t *, kmp_uint64 *,
                           kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64,
-                          kmp_uint64, kmp_uint64, kmp_uint64, kmp_uint64,
+                          kmp_uint64, kmp_uint64, kmp_int64, kmp_uint64,
+                          kmp_uint64,
 #if OMPT_SUPPORT
                           void *,
 #endif
@@ -4277,6 +4284,7 @@ int __kmp_taskloop_task(int gtid, void *ptask) {
   kmp_uint64 num_tasks = p->num_tasks;
   kmp_uint64 grainsize = p->grainsize;
   kmp_uint64 extras = p->extras;
+  kmp_int64 last_chunk = p->last_chunk;
   kmp_uint64 tc = p->tc;
   kmp_uint64 num_t_min = p->num_t_min;
 #if OMPT_SUPPORT
@@ -4285,22 +4293,23 @@ int __kmp_taskloop_task(int gtid, void *ptask) {
 #if KMP_DEBUG
   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
   KMP_DEBUG_ASSERT(task != NULL);
-  KA_TRACE(20, ("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
-                " %lld, extras %lld, i=%lld,%lld(%d), dup %p\n",
-                gtid, taskdata, num_tasks, grainsize, extras, *lb, *ub, st,
-                task_dup));
+  KA_TRACE(20,
+           ("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
+            " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
+            gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
+            st, task_dup));
 #endif
   KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min);
   if (num_tasks > num_t_min)
     __kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
-                         grainsize, extras, tc, num_t_min,
+                         grainsize, extras, last_chunk, tc, num_t_min,
 #if OMPT_SUPPORT
                          codeptr_ra,
 #endif
                          task_dup);
   else
     __kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
-                          grainsize, extras, tc,
+                          grainsize, extras, last_chunk, tc,
 #if OMPT_SUPPORT
                           codeptr_ra,
 #endif
@@ -4323,6 +4332,7 @@ int __kmp_taskloop_task(int gtid, void *ptask) {
 // num_tasks  Number of tasks to execute
 // grainsize  Number of loop iterations per task
 // extras     Number of chunks with grainsize+1 iterations
+// last_chunk Reduction of grainsize for last task
 // tc         Iterations count
 // num_t_min  Threshold to launch tasks recursively
 // task_dup   Tasks duplication routine
@@ -4331,7 +4341,8 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
                           kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
                           kmp_uint64 ub_glob, kmp_uint64 num_tasks,
                           kmp_uint64 grainsize, kmp_uint64 extras,
-                          kmp_uint64 tc, kmp_uint64 num_t_min,
+                          kmp_int64 last_chunk, kmp_uint64 tc,
+                          kmp_uint64 num_t_min,
 #if OMPT_SUPPORT
                           void *codeptr_ra,
 #endif
@@ -4339,10 +4350,11 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
   KMP_DEBUG_ASSERT(task != NULL);
   KMP_DEBUG_ASSERT(num_tasks > num_t_min);
-  KA_TRACE(20, ("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
-                " %lld, extras %lld, i=%lld,%lld(%d), dup %p\n",
-                gtid, taskdata, num_tasks, grainsize, extras, *lb, *ub, st,
-                task_dup));
+  KA_TRACE(20,
+           ("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
+            " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
+            gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
+            st, task_dup));
   p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
   kmp_uint64 lower = *lb;
   kmp_info_t *thread = __kmp_threads[gtid];
@@ -4353,16 +4365,23 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
   size_t upper_offset =
       (char *)ub - (char *)task; // remember offset of ub in the task structure
 
-  KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
+  KMP_DEBUG_ASSERT(
+      tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras));
   KMP_DEBUG_ASSERT(num_tasks > extras);
   KMP_DEBUG_ASSERT(num_tasks > 0);
 
   // split the loop in two halves
   kmp_uint64 lb1, ub0, tc0, tc1, ext0, ext1;
+  kmp_int64 last_chunk0 = 0, last_chunk1 = 0;
   kmp_uint64 gr_size0 = grainsize;
   kmp_uint64 n_tsk0 = num_tasks >> 1; // num_tasks/2 to execute
   kmp_uint64 n_tsk1 = num_tasks - n_tsk0; // to schedule as a task
-  if (n_tsk0 <= extras) {
+  if (last_chunk < 0) {
+    ext0 = ext1 = 0;
+    last_chunk1 = last_chunk;
+    tc0 = grainsize * n_tsk0;
+    tc1 = tc - tc0;
+  } else if (n_tsk0 <= extras) {
     gr_size0++; // integrate extras into grainsize
     ext0 = 0; // no extra iters in 1st half
     ext1 = extras - n_tsk0; // remaining extras
@@ -4404,6 +4423,7 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
   p->num_tasks = n_tsk1;
   p->grainsize = grainsize;
   p->extras = ext1;
+  p->last_chunk = last_chunk1;
   p->tc = tc1;
   p->num_t_min = num_t_min;
 #if OMPT_SUPPORT
@@ -4420,44 +4440,28 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
   // execute the 1st half of current subrange
   if (n_tsk0 > num_t_min)
     __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0,
-                         ext0, tc0, num_t_min,
+                         ext0, last_chunk0, tc0, num_t_min,
 #if OMPT_SUPPORT
                          codeptr_ra,
 #endif
                          task_dup);
   else
     __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0,
-                          gr_size0, ext0, tc0,
+                          gr_size0, ext0, last_chunk0, tc0,
 #if OMPT_SUPPORT
                           codeptr_ra,
 #endif
                           task_dup);
 
-  KA_TRACE(40, ("__kmpc_taskloop_recur(exit): T#%d\n", gtid));
+  KA_TRACE(40, ("__kmp_taskloop_recur(exit): T#%d\n", gtid));
 }
 
-/*!
- at ingroup TASKING
- at param loc       Source location information
- at param gtid      Global thread ID
- at param task      Task structure
- at param if_val    Value of the if clause
- at param lb        Pointer to loop lower bound in task structure
- at param ub        Pointer to loop upper bound in task structure
- at param st        Loop stride
- at param nogroup   Flag, 1 if no taskgroup needs to be added, 0 otherwise
- at param sched     Schedule specified 0/1/2 for none/grainsize/num_tasks
- at param grainsize Schedule value if specified
- at param task_dup  Tasks duplication routine
-
-Execute the taskloop construct.
-*/
-void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
-                     kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,
-                     int sched, kmp_uint64 grainsize, void *task_dup) {
+static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
+                           kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
+                           int nogroup, int sched, kmp_uint64 grainsize,
+                           int modifier, void *task_dup) {
   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
   KMP_DEBUG_ASSERT(task != NULL);
-  __kmp_assert_valid_gtid(gtid);
   if (nogroup == 0) {
 #if OMPT_SUPPORT && OMPT_OPTIONAL
     OMPT_STORE_RETURN_ADDRESS(gtid);
@@ -4474,13 +4478,16 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
   kmp_uint64 upper = task_bounds.get_ub();
   kmp_uint64 ub_glob = upper; // global upper used to calc lastprivate flag
   kmp_uint64 num_tasks = 0, extras = 0;
+  kmp_int64 last_chunk =
+      0; // reduce grainsize of last task by last_chunk in strict mode
   kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks;
   kmp_info_t *thread = __kmp_threads[gtid];
   kmp_taskdata_t *current_task = thread->th.th_current_task;
 
-  KA_TRACE(20, ("__kmpc_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
-                "grain %llu(%d), dup %p\n",
-                gtid, taskdata, lower, upper, st, grainsize, sched, task_dup));
+  KA_TRACE(20, ("__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
+                "grain %llu(%d, %d), dup %p\n",
+                gtid, taskdata, lower, upper, st, grainsize, sched, modifier,
+                task_dup));
 
   // compute trip count
   if (st == 1) { // most common case
@@ -4491,7 +4498,7 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
     tc = (upper - lower) / st + 1;
   }
   if (tc == 0) {
-    KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid));
+    KA_TRACE(20, ("__kmp_taskloop(exit): T#%d zero-trip loop\n", gtid));
     // free the pattern task and exit
     __kmp_task_start(gtid, task, current_task);
     // do not execute anything for zero-trip loop
@@ -4533,20 +4540,28 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
     break;
   case 1: // grainsize provided
     if (grainsize > tc) {
-      num_tasks = 1; // too big grainsize requested, adjust values
-      grainsize = tc;
+      num_tasks = 1;
+      grainsize = tc; // too big grainsize requested, adjust values
       extras = 0;
     } else {
-      num_tasks = tc / grainsize;
-      // adjust grainsize for balanced distribution of iterations
-      grainsize = tc / num_tasks;
-      extras = tc % num_tasks;
+      if (modifier) {
+        num_tasks = (tc + grainsize - 1) / grainsize;
+        last_chunk = tc - (num_tasks * grainsize);
+        extras = 0;
+      } else {
+        num_tasks = tc / grainsize;
+        // adjust grainsize for balanced distribution of iterations
+        grainsize = tc / num_tasks;
+        extras = tc % num_tasks;
+      }
     }
     break;
   default:
     KMP_ASSERT2(0, "unknown scheduling of taskloop");
   }
-  KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
+
+  KMP_DEBUG_ASSERT(
+      tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras));
   KMP_DEBUG_ASSERT(num_tasks > extras);
   KMP_DEBUG_ASSERT(num_tasks > 0);
   // =========================================================================
@@ -4558,7 +4573,7 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
     taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
     // always start serial tasks linearly
     __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
-                          grainsize, extras, tc,
+                          grainsize, extras, last_chunk, tc,
 #if OMPT_SUPPORT
                           OMPT_GET_RETURN_ADDRESS(0),
 #endif
@@ -4566,21 +4581,23 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
     // !taskdata->td_flags.native => currently force linear spawning of tasks
     // for GOMP_taskloop
   } else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) {
-    KA_TRACE(20, ("__kmpc_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
-                  "(%lld), grain %llu, extras %llu\n",
-                  gtid, tc, num_tasks, num_tasks_min, grainsize, extras));
+    KA_TRACE(20, ("__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
+                  "(%lld), grain %llu, extras %llu, last_chunk %lld\n",
+                  gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
+                  last_chunk));
     __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
-                         grainsize, extras, tc, num_tasks_min,
+                         grainsize, extras, last_chunk, tc, num_tasks_min,
 #if OMPT_SUPPORT
                          OMPT_GET_RETURN_ADDRESS(0),
 #endif
                          task_dup);
   } else {
-    KA_TRACE(20, ("__kmpc_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
-                  "(%lld), grain %llu, extras %llu\n",
-                  gtid, tc, num_tasks, num_tasks_min, grainsize, extras));
+    KA_TRACE(20, ("__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
+                  "(%lld), grain %llu, extras %llu, last_chunk %lld\n",
+                  gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
+                  last_chunk));
     __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
-                          grainsize, extras, tc,
+                          grainsize, extras, last_chunk, tc,
 #if OMPT_SUPPORT
                           OMPT_GET_RETURN_ADDRESS(0),
 #endif
@@ -4601,5 +4618,59 @@ void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
 #endif
     __kmpc_end_taskgroup(loc, gtid);
   }
+  KA_TRACE(20, ("__kmp_taskloop(exit): T#%d\n", gtid));
+}
+
+/*!
+ at ingroup TASKING
+ at param loc       Source location information
+ at param gtid      Global thread ID
+ at param task      Task structure
+ at param if_val    Value of the if clause
+ at param lb        Pointer to loop lower bound in task structure
+ at param ub        Pointer to loop upper bound in task structure
+ at param st        Loop stride
+ at param nogroup   Flag, 1 if nogroup clause specified, 0 otherwise
+ at param sched     Schedule specified 0/1/2 for none/grainsize/num_tasks
+ at param grainsize Schedule value if specified
+ at param task_dup  Tasks duplication routine
+
+Execute the taskloop construct.
+*/
+void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
+                     kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,
+                     int sched, kmp_uint64 grainsize, void *task_dup) {
+  __kmp_assert_valid_gtid(gtid);
+  KA_TRACE(20, ("__kmpc_taskloop(enter): T#%d\n", gtid));
+  __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
+                 0, task_dup);
   KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid));
 }
+
+/*!
+ at ingroup TASKING
+ at param loc       Source location information
+ at param gtid      Global thread ID
+ at param task      Task structure
+ at param if_val    Value of the if clause
+ at param lb        Pointer to loop lower bound in task structure
+ at param ub        Pointer to loop upper bound in task structure
+ at param st        Loop stride
+ at param nogroup   Flag, 1 if nogroup clause specified, 0 otherwise
+ at param sched     Schedule specified 0/1/2 for none/grainsize/num_tasks
+ at param grainsize Schedule value if specified
+ at param modifer   Modifier 'strict' for sched, 1 if present, 0 otherwise
+ at param task_dup  Tasks duplication routine
+
+Execute the taskloop construct.
+*/
+void __kmpc_taskloop_5(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
+                       kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
+                       int nogroup, int sched, kmp_uint64 grainsize,
+                       int modifier, void *task_dup) {
+  __kmp_assert_valid_gtid(gtid);
+  KA_TRACE(20, ("__kmpc_taskloop_5(enter): T#%d\n", gtid));
+  __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
+                 modifier, task_dup);
+  KA_TRACE(20, ("__kmpc_taskloop_5(exit): T#%d\n", gtid));
+}

diff  --git a/openmp/runtime/test/tasking/kmp_taskloop_5.c b/openmp/runtime/test/tasking/kmp_taskloop_5.c
new file mode 100644
index 000000000000..aca0e7565213
--- /dev/null
+++ b/openmp/runtime/test/tasking/kmp_taskloop_5.c
@@ -0,0 +1,167 @@
+// RUN: %libomp-compile-and-run
+// RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run
+
+#include <stdio.h>
+#include <omp.h>
+#include "omp_my_sleep.h"
+
+#define N 4
+#define ST 3
+#define UB 118
+#define LB 0
+
+// globals
+int counter;
+int task_count;
+
+// Compiler-generated code (emulation)
+typedef struct ident {
+  void* dummy;
+} ident_t;
+
+typedef struct shar {
+  int *pcounter;
+  int *pj;
+  int *ptask_count;
+} *pshareds;
+
+typedef struct task {
+  pshareds shareds;
+  int(* routine)(int,struct task*);
+  int part_id;
+  unsigned long long lb; // library always uses ULONG
+  unsigned long long ub;
+  int st;
+  int last;
+  int i;
+  int j;
+  int th;
+} *ptask, kmp_task_t;
+
+typedef int(* task_entry_t)( int, ptask );
+
+void
+__task_dup_entry(ptask task_dst, ptask task_src, int lastpriv)
+{
+// setup lastprivate flag
+  task_dst->last = lastpriv;
+// could be constructor calls here...
+}
+
+// OpenMP RTL interfaces
+typedef unsigned long long kmp_uint64;
+typedef long long kmp_int64;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+void
+__kmpc_taskloop_5(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
+                  kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
+                  int nogroup, int sched, kmp_int64 grainsize, int modifier,
+                  void *task_dup);
+ptask
+__kmpc_omp_task_alloc(ident_t *loc, int gtid, int flags,
+                      size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+                      task_entry_t task_entry);
+void __kmpc_atomic_fixed4_add(void *id_ref, int gtid, int * lhs, int rhs);
+int  __kmpc_global_thread_num(void *id_ref);
+#ifdef __cplusplus
+}
+#endif
+
+// User's code
+int task_entry(int gtid, ptask task)
+{
+  pshareds pshar = task->shareds;
+  __kmpc_atomic_fixed4_add(NULL, gtid, pshar->ptask_count, 1);
+
+  for (task->i = task->lb; task->i <= (int)task->ub; task->i += task->st) {
+    task->th = omp_get_thread_num();
+    __kmpc_atomic_fixed4_add(NULL,gtid,pshar->pcounter,1);
+    task->j = task->i;
+  }
+  my_sleep( 0.1 ); // sleep 100 ms in order to allow other threads to steal tasks
+  if (task->last) {
+    *(pshar->pj) = task->j; // lastprivate
+  }
+  return 0;
+}
+
+void task_loop(int sched_type, int sched_val, int modifier)
+{
+  int i, j, gtid = __kmpc_global_thread_num(NULL);
+  ptask task;
+  pshareds psh;
+  omp_set_dynamic(0);
+  counter = 0;
+  task_count = 0;
+  #pragma omp parallel num_threads(N)
+  {
+    #pragma omp master
+    {
+      int gtid = __kmpc_global_thread_num(NULL);
+      task = __kmpc_omp_task_alloc(NULL, gtid, 1, sizeof(struct task),
+                                   sizeof(struct shar), &task_entry);
+      psh = task->shareds;
+      psh->pcounter = &counter;
+      psh->ptask_count = &task_count;
+      psh->pj = &j;
+      task->lb = LB;
+      task->ub = UB;
+      task->st = ST;
+
+      __kmpc_taskloop_5(
+        NULL,             // location
+        gtid,             // gtid
+        task,             // task structure
+        1,                // if clause value
+        &task->lb,        // lower bound
+        &task->ub,        // upper bound
+        ST,               // loop increment
+        0,                // 1 if nogroup specified
+        sched_type,       // schedule type: 0-none, 1-grainsize, 2-num_tasks
+        sched_val,        // schedule value (ignored for type 0)
+        modifier,         // strict modifier
+        (void*)&__task_dup_entry // tasks duplication routine
+      );
+    } // end master
+  } // end parallel
+// check results
+  int tc;
+  if (ST == 1) { // most common case
+    tc = UB - LB + 1;
+  } else if (ST < 0) {
+    tc = (LB - UB) / (-ST) + 1;
+  } else { // ST > 0
+    tc = (UB - LB) / ST + 1;
+  }
+  int count;
+  if (sched_type == 1) {
+    count = (sched_val > tc) ? 1 : (tc + sched_val - 1) / sched_val;
+  } else {
+    count = (sched_val > tc) ? tc : sched_val;
+  }
+  if (j != LB + (tc - 1) * ST) {
+    printf("Error in lastprivate, %d != %d\n", j, LB + (tc - 1) * ST);
+    exit(1);
+  }
+  if (counter != tc) {
+    printf("Error, counter %d != %d\n", counter, tc);
+    exit(1);
+  }
+  if (task_count != count) {
+    printf("Error, task count %d != %d\n", task_count, count);
+    exit(1);
+  }
+}
+
+int main(int argc, char *argv[]) {
+  task_loop(1, 6, 1); // create 7 tasks
+  task_loop(2, 6, 1); // create 6 tasks
+  task_loop(1, 50, 1); // create 1 task
+  task_loop(2, 50, 1); // create 40 tasks
+
+  printf("Test passed\n");
+  return 0;
+}


        


More information about the llvm-branch-commits mailing list