[Openmp-commits] [openmp] r304724 - OpenMP 4.5: implemented support of schedule(simd:guided) and
Andrey Churbanov via Openmp-commits
openmp-commits at lists.llvm.org
Mon Jun 5 10:17:33 PDT 2017
Author: achurbanov
Date: Mon Jun 5 12:17:33 2017
New Revision: 304724
URL: http://llvm.org/viewvc/llvm-project?rev=304724&view=rev
Log:
OpenMP 4.5: implemented support of schedule(simd:guided) and
schedule(simd:runtime) - library part. Compiler generation should use newly
introduced scheduling kinds kmp_sch_guided_simd = 46, kmp_sch_runtime_simd = 47,
as parameters to __kmpc_dispatch_init_* entries.
Differential Revision: https://reviews.llvm.org/D31602
Added:
openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_guided.c (with props)
openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c (with props)
openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c (with props)
openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c (with props)
Modified:
openmp/trunk/runtime/src/kmp.h
openmp/trunk/runtime/src/kmp_dispatch.cpp
openmp/trunk/runtime/src/kmp_runtime.cpp
Modified: openmp/trunk/runtime/src/kmp.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp.h?rev=304724&r1=304723&r2=304724&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp.h (original)
+++ openmp/trunk/runtime/src/kmp.h Mon Jun 5 12:17:33 2017
@@ -334,10 +334,12 @@ enum sched_type {
#if OMP_45_ENABLED
/* static with chunk adjustment (e.g., simd) */
kmp_sch_static_balanced_chunked = 45,
+ kmp_sch_guided_simd = 46, /**< guided with chunk adjustment */
+ kmp_sch_runtime_simd = 47, /**< runtime with chunk adjustment */
#endif
/* accessible only through KMP_SCHEDULE environment variable */
- kmp_sch_upper = 46, /**< upper bound for unordered values */
+ kmp_sch_upper = 48, /**< upper bound for unordered values */
kmp_ord_lower = 64, /**< lower bound for ordered values, must be power of 2 */
kmp_ord_static_chunked = 65,
Modified: openmp/trunk/runtime/src/kmp_dispatch.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_dispatch.cpp?rev=304724&r1=304723&r2=304724&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_dispatch.cpp (original)
+++ openmp/trunk/runtime/src/kmp_dispatch.cpp Mon Jun 5 12:17:33 2017
@@ -681,6 +681,35 @@ __kmp_dispatch_init(ident_t *loc, int gt
schedule = kmp_sch_guided_iterative_chunked;
KMP_WARNING(DispatchManyThreads);
}
+ if (schedule == kmp_sch_runtime_simd) {
+ // compiler provides simd_width in the chunk parameter
+ schedule = team->t.t_sched.r_sched_type;
+ // Detail the schedule if needed (global controls are differentiated
+ // appropriately)
+ if (schedule == kmp_sch_static || schedule == kmp_sch_auto ||
+ schedule == __kmp_static) {
+ schedule = kmp_sch_static_balanced_chunked;
+ } else {
+ if (schedule == kmp_sch_guided_chunked || schedule == __kmp_guided) {
+ schedule = kmp_sch_guided_simd;
+ }
+ chunk = team->t.t_sched.chunk * chunk;
+ }
+#if USE_ITT_BUILD
+ cur_chunk = chunk;
+#endif
+#ifdef KMP_DEBUG
+ {
+ const char *buff;
+ // create format specifiers before the debug output
+ buff = __kmp_str_format("__kmp_dispatch_init: T#%%d new: schedule:%%d"
+ " chunk:%%%s\n",
+ traits_t<ST>::spec);
+ KD_TRACE(10, (buff, gtid, schedule, chunk));
+ __kmp_str_free(&buff);
+ }
+#endif
+ }
pr->u.p.parm1 = chunk;
}
KMP_ASSERT2((kmp_sch_lower < schedule && schedule < kmp_sch_upper),
@@ -878,7 +907,21 @@ __kmp_dispatch_init(ident_t *loc, int gt
}
break;
} // case
- case kmp_sch_guided_iterative_chunked: {
+ case kmp_sch_static_balanced_chunked: {
+ // similar to balanced, but chunk adjusted to multiple of simd width
+ T nth = th->th.th_team_nproc;
+ KD_TRACE(100, ("__kmp_dispatch_init: T#%d runtime(simd:static)"
+ " -> falling-through to static_greedy\n",
+ gtid));
+ schedule = kmp_sch_static_greedy;
+ if (nth > 1)
+ pr->u.p.parm1 = ((tc + nth - 1) / nth + chunk - 1) & ~(chunk - 1);
+ else
+ pr->u.p.parm1 = tc;
+ break;
+ } // case
+ case kmp_sch_guided_iterative_chunked:
+ case kmp_sch_guided_simd: {
T nproc = th->th.th_team_nproc;
KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_guided_iterative_chunked"
" case\n",
@@ -1140,6 +1183,7 @@ __kmp_dispatch_init(ident_t *loc, int gt
break;
case kmp_sch_guided_iterative_chunked:
case kmp_sch_guided_analytical_chunked:
+ case kmp_sch_guided_simd:
schedtype = 2;
break;
default:
@@ -1954,6 +1998,89 @@ static int __kmp_dispatch_next(ident_t *
if (compare_and_swap<ST>((ST *)&sh->u.s.iteration, (ST)init,
(ST)limit)) {
// CAS was successful, chunk obtained
+ status = 1;
+ --limit;
+ break;
+ } // if
+ } // while
+ if (status != 0) {
+ start = pr->u.p.lb;
+ incr = pr->u.p.st;
+ if (p_st != NULL)
+ *p_st = incr;
+ *p_lb = start + init * incr;
+ *p_ub = start + limit * incr;
+ if (pr->ordered) {
+ pr->u.p.ordered_lower = init;
+ pr->u.p.ordered_upper = limit;
+#ifdef KMP_DEBUG
+ {
+ const char *buff;
+ // create format specifiers before the debug output
+ buff = __kmp_str_format("__kmp_dispatch_next: T#%%d "
+ "ordered_lower:%%%s ordered_upper:%%%s\n",
+ traits_t<UT>::spec, traits_t<UT>::spec);
+ KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
+ pr->u.p.ordered_upper));
+ __kmp_str_free(&buff);
+ }
+#endif
+ } // if
+ } else {
+ *p_lb = 0;
+ *p_ub = 0;
+ if (p_st != NULL)
+ *p_st = 0;
+ } // if
+ } // case
+ break;
+
+ case kmp_sch_guided_simd: {
+ // same as iterative but curr-chunk adjusted to be multiple of given
+ // chunk
+ T chunk = pr->u.p.parm1;
+ KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_guided_simd case\n",
+ gtid));
+ trip = pr->u.p.tc;
+ // Start atomic part of calculations
+ while (1) {
+ ST remaining; // signed, because can be < 0
+ init = sh->u.s.iteration; // shared value
+ remaining = trip - init;
+ if (remaining <= 0) { // AC: need to compare with 0 first
+ status = 0; // nothing to do, don't try atomic op
+ break;
+ }
+ KMP_DEBUG_ASSERT(init % chunk == 0);
+ // compare with K*nproc*(chunk+1), K=2 by default
+ if ((T)remaining < pr->u.p.parm2) {
+ // use dynamic-style shcedule
+ // atomically inrement iterations, get old value
+ init = test_then_add<ST>((ST *)&sh->u.s.iteration, (ST)chunk);
+ remaining = trip - init;
+ if (remaining <= 0) {
+ status = 0; // all iterations got by other threads
+ } else {
+ // got some iterations to work on
+ status = 1;
+ if ((T)remaining > chunk) {
+ limit = init + chunk - 1;
+ } else {
+ last = 1; // the last chunk
+ limit = init + remaining - 1;
+ } // if
+ } // if
+ break;
+ } // if
+ // divide by K*nproc
+ UT span = remaining * (*(double *)&pr->u.p.parm3);
+ UT rem = span % chunk;
+ if (rem) // adjust so that span%chunk == 0
+ span += chunk - rem;
+ limit = init + span;
+ if (compare_and_swap<ST>((ST *)&sh->u.s.iteration, (ST)init,
+ (ST)limit)) {
+ // CAS was successful, chunk obtained
status = 1;
--limit;
break;
Modified: openmp/trunk/runtime/src/kmp_runtime.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_runtime.cpp?rev=304724&r1=304723&r2=304724&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_runtime.cpp (original)
+++ openmp/trunk/runtime/src/kmp_runtime.cpp Mon Jun 5 12:17:33 2017
@@ -2744,7 +2744,7 @@ void __kmp_set_schedule(int gtid, kmp_sc
__kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
kmp_sched_lower - 2];
}
- if (kind == kmp_sched_auto) {
+ if (kind == kmp_sched_auto || chunk < 1) {
// ignore parameter chunk for schedule auto
thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
} else {
Added: openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_guided.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_guided.c?rev=304724&view=auto
==============================================================================
--- openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_guided.c (added)
+++ openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_guided.c Mon Jun 5 12:17:33 2017
@@ -0,0 +1,410 @@
+// RUN: %libomp-compile-and-run
+/*
+ Test for the 'schedule(simd:guided)' clause.
+ Compiler needs to generate a dynamic dispatching and pass the schedule
+ value 46 to the OpenMP RTL. Test uses numerous loop parameter combinations.
+*/
+#include <stdio.h>
+#include <omp.h>
+
+#if defined(WIN32) || defined(_WIN32)
+#include <windows.h>
+#define delay() Sleep(1);
+#else
+#include <unistd.h>
+#define delay() usleep(10);
+#endif
+
+// uncomment for debug diagnostics:
+//#define DEBUG
+
+#define SIMD_LEN 4
+
+// ---------------------------------------------------------------------------
+// Various definitions copied from OpenMP RTL
+enum sched {
+ kmp_sch_static_balanced_chunked = 45,
+ kmp_sch_guided_simd = 46,
+ kmp_sch_runtime_simd = 47,
+};
+typedef unsigned u32;
+typedef long long i64;
+typedef unsigned long long u64;
+typedef struct {
+ int reserved_1;
+ int flags;
+ int reserved_2;
+ int reserved_3;
+ char *psource;
+} id;
+
+extern int __kmpc_global_thread_num(id*);
+extern void __kmpc_barrier(id*, int gtid);
+extern void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int);
+extern void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64);
+extern int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*);
+extern int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*);
+// End of definitions copied from OpenMP RTL.
+// ---------------------------------------------------------------------------
+static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
+
+// ---------------------------------------------------------------------------
+int run_loop_64(i64 loop_lb, i64 loop_ub, i64 loop_st, int loop_chunk) {
+ int err = 0;
+ static int volatile loop_sync = 0;
+ i64 lb; // Chunk lower bound
+ i64 ub; // Chunk upper bound
+ i64 st; // Chunk stride
+ int rc;
+ int tid = omp_get_thread_num();
+ int gtid = tid;
+ int last;
+#if DEBUG
+ printf("run_loop_<%d>(lb=%d, ub=%d, st=%d, ch=%d)\n",
+ (int)sizeof(i64), gtid, tid,
+ (int)loop_lb, (int)loop_ub, (int)loop_st, loop_chunk);
+#endif
+ // Don't test degenerate cases that should have been discovered by codegen
+ if (loop_st == 0)
+ return 0;
+ if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub)
+ return 0;
+
+ __kmpc_dispatch_init_8(&loc, gtid, kmp_sch_guided_simd,
+ loop_lb, loop_ub, loop_st, loop_chunk);
+ if (tid == 0) {
+ // Let the master thread handle the chunks alone
+ int chunk; // No of current chunk
+ i64 next_lb; // Lower bound of the next chunk
+ i64 last_ub; // Upper bound of the last processed chunk
+ u64 cur; // Number of interations in current chunk
+ u64 max; // Max allowed iterations for current chunk
+ int undersized = 0;
+
+ chunk = 0;
+ next_lb = loop_lb;
+ max = (loop_ub - loop_lb) / loop_st + 1;
+ // The first chunk can consume all iterations
+ while (__kmpc_dispatch_next_8(&loc, gtid, &last, &lb, &ub, &st)) {
+ ++ chunk;
+#if DEBUG
+ printf("chunk=%d, lb=%d, ub=%d\n", chunk, (int)lb, (int)ub);
+#endif
+ // Check if previous chunk (it is not the final chunk) is undersized
+ if (undersized) {
+ printf("Error with chunk %d\n", chunk);
+ err++;
+ }
+ // Check lower and upper bounds
+ if (lb != next_lb) {
+ printf("Error with lb %d, %d, ch %d\n", (int)lb, (int)next_lb, chunk);
+ err++;
+ }
+ if (loop_st > 0) {
+ if (!(ub <= loop_ub)) {
+ printf("Error with ub %d, %d, ch %d\n", (int)ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(lb <= ub)) {
+ printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk);
+ err++;
+ }
+ } else {
+ if (!(ub >= loop_ub)) {
+ printf("Error with ub %d, %d, %d\n", (int)ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(lb >= ub)) {
+ printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk);
+ err++;
+ }
+ }; // if
+ // Stride should not change
+ if (!(st == loop_st)) {
+ printf("Error with st %d, %d, ch %d\n", (int)st, (int)loop_st, chunk);
+ err++;
+ }
+ cur = (ub - lb) / loop_st + 1;
+ // Guided scheduling uses FP computations, so current chunk may
+ // be a bit bigger (+1) than allowed maximum
+ if (!(cur <= max + 1)) {
+ printf("Error with iter %d, %d\n", cur, max);
+ err++;
+ }
+ // Update maximum for the next chunk
+ if (cur < max)
+ max = cur;
+ next_lb = ub + loop_st;
+ last_ub = ub;
+ undersized = (cur < loop_chunk);
+ }; // while
+ // Must have at least one chunk
+ if (!(chunk > 0)) {
+ printf("Error with chunk %d\n", chunk);
+ err++;
+ }
+ // Must have the right last iteration index
+ if (loop_st > 0) {
+ if (!(last_ub <= loop_ub)) {
+ printf("Error with last1 %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(last_ub + loop_st > loop_ub)) {
+ printf("Error with last2 %d, %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk);
+ err++;
+ }
+ } else {
+ if (!(last_ub >= loop_ub)) {
+ printf("Error with last1 %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(last_ub + loop_st < loop_ub)) {
+ printf("Error with last2 %d, %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk);
+ err++;
+ }
+ }; // if
+ // Let non-master threads go
+ loop_sync = 1;
+ } else {
+ int i;
+ // Workers wait for master thread to finish, then call __kmpc_dispatch_next
+ for (i = 0; i < 1000000; ++ i) {
+ if (loop_sync != 0) {
+ break;
+ }; // if
+ }; // for i
+ while (loop_sync == 0) {
+ delay();
+ }; // while
+ // At this moment we do not have any more chunks -- all the chunks already
+ // processed by master thread
+ rc = __kmpc_dispatch_next_8(&loc, gtid, &last, &lb, &ub, &st);
+ if (rc) {
+ printf("Error return value\n");
+ err++;
+ }
+ }; // if
+
+ __kmpc_barrier(&loc, gtid);
+ if (tid == 0) {
+ loop_sync = 0; // Restore original state
+#if DEBUG
+ printf("run_loop_64(): at the end\n");
+#endif
+ }; // if
+ __kmpc_barrier(&loc, gtid);
+ return err;
+} // run_loop
+
+// ---------------------------------------------------------------------------
+int run_loop_32(int loop_lb, int loop_ub, int loop_st, int loop_chunk) {
+ int err = 0;
+ static int volatile loop_sync = 0;
+ int lb; // Chunk lower bound
+ int ub; // Chunk upper bound
+ int st; // Chunk stride
+ int rc;
+ int tid = omp_get_thread_num();
+ int gtid = tid;
+ int last;
+#if DEBUG
+ printf("run_loop_<%d>(lb=%d, ub=%d, st=%d, ch=%d)\n",
+ (int)sizeof(int), gtid, tid,
+ (int)loop_lb, (int)loop_ub, (int)loop_st, loop_chunk);
+#endif
+ // Don't test degenerate cases that should have been discovered by codegen
+ if (loop_st == 0)
+ return 0;
+ if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub)
+ return 0;
+
+ __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_guided_simd,
+ loop_lb, loop_ub, loop_st, loop_chunk);
+ if (tid == 0) {
+ // Let the master thread handle the chunks alone
+ int chunk; // No of current chunk
+ int next_lb; // Lower bound of the next chunk
+ int last_ub; // Upper bound of the last processed chunk
+ u64 cur; // Number of interations in current chunk
+ u64 max; // Max allowed iterations for current chunk
+ int undersized = 0;
+
+ chunk = 0;
+ next_lb = loop_lb;
+ max = (loop_ub - loop_lb) / loop_st + 1;
+ // The first chunk can consume all iterations
+ while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) {
+ ++ chunk;
+#if DEBUG
+ printf("chunk=%d, lb=%d, ub=%d\n", chunk, (int)lb, (int)ub);
+#endif
+ // Check if previous chunk (it is not the final chunk) is undersized
+ if (undersized) {
+ printf("Error with chunk %d\n", chunk);
+ err++;
+ }
+ // Check lower and upper bounds
+ if (lb != next_lb) {
+ printf("Error with lb %d, %d, ch %d\n", (int)lb, (int)next_lb, chunk);
+ err++;
+ }
+ if (loop_st > 0) {
+ if (!(ub <= loop_ub)) {
+ printf("Error with ub %d, %d, ch %d\n", (int)ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(lb <= ub)) {
+ printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk);
+ err++;
+ }
+ } else {
+ if (!(ub >= loop_ub)) {
+ printf("Error with ub %d, %d, %d\n", (int)ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(lb >= ub)) {
+ printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk);
+ err++;
+ }
+ }; // if
+ // Stride should not change
+ if (!(st == loop_st)) {
+ printf("Error with st %d, %d, ch %d\n", (int)st, (int)loop_st, chunk);
+ err++;
+ }
+ cur = (ub - lb) / loop_st + 1;
+ // Guided scheduling uses FP computations, so current chunk may
+ // be a bit bigger (+1) than allowed maximum
+ if (!(cur <= max + 1)) {
+ printf("Error with iter %d, %d\n", cur, max);
+ err++;
+ }
+ // Update maximum for the next chunk
+ if (cur < max)
+ max = cur;
+ next_lb = ub + loop_st;
+ last_ub = ub;
+ undersized = (cur < loop_chunk);
+ }; // while
+ // Must have at least one chunk
+ if (!(chunk > 0)) {
+ printf("Error with chunk %d\n", chunk);
+ err++;
+ }
+ // Must have the right last iteration index
+ if (loop_st > 0) {
+ if (!(last_ub <= loop_ub)) {
+ printf("Error with last1 %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(last_ub + loop_st > loop_ub)) {
+ printf("Error with last2 %d, %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk);
+ err++;
+ }
+ } else {
+ if (!(last_ub >= loop_ub)) {
+ printf("Error with last1 %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_ub, chunk);
+ err++;
+ }
+ if (!(last_ub + loop_st < loop_ub)) {
+ printf("Error with last2 %d, %d, %d, ch %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk);
+ err++;
+ }
+ }; // if
+ // Let non-master threads go
+ loop_sync = 1;
+ } else {
+ int i;
+ // Workers wait for master thread to finish, then call __kmpc_dispatch_next
+ for (i = 0; i < 1000000; ++ i) {
+ if (loop_sync != 0) {
+ break;
+ }; // if
+ }; // for i
+ while (loop_sync == 0) {
+ delay();
+ }; // while
+ // At this moment we do not have any more chunks -- all the chunks already
+ // processed by the master thread
+ rc = __kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st);
+ if (rc) {
+ printf("Error return value\n");
+ err++;
+ }
+ }; // if
+
+ __kmpc_barrier(&loc, gtid);
+ if (tid == 0) {
+ loop_sync = 0; // Restore original state
+#if DEBUG
+ printf("run_loop<>(): at the end\n");
+#endif
+ }; // if
+ __kmpc_barrier(&loc, gtid);
+ return err;
+} // run_loop
+
+// ---------------------------------------------------------------------------
+int run_64(int num_th)
+{
+ int err = 0;
+#pragma omp parallel num_threads(num_th)
+ {
+ int chunk;
+ i64 st, lb, ub;
+ for (chunk = SIMD_LEN; chunk <= 3*SIMD_LEN; chunk += SIMD_LEN) {
+ for (st = 1; st <= 3; ++ st) {
+ for (lb = -3 * num_th * st; lb <= 3 * num_th * st; ++ lb) {
+ for (ub = lb; ub < lb + num_th * (chunk+1) * st; ++ ub) {
+ err += run_loop_64(lb, ub, st, chunk);
+ err += run_loop_64(ub, lb, -st, chunk);
+ }; // for ub
+ }; // for lb
+ }; // for st
+ }; // for chunk
+ }
+ return err;
+} // run_all
+
+int run_32(int num_th)
+{
+ int err = 0;
+#pragma omp parallel num_threads(num_th)
+ {
+ int chunk, st, lb, ub;
+ for (chunk = SIMD_LEN; chunk <= 3*SIMD_LEN; chunk += SIMD_LEN) {
+ for (st = 1; st <= 3; ++ st) {
+ for (lb = -3 * num_th * st; lb <= 3 * num_th * st; ++ lb) {
+ for (ub = lb; ub < lb + num_th * (chunk+1) * st; ++ ub) {
+ err += run_loop_32(lb, ub, st, chunk);
+ err += run_loop_32(ub, lb, -st, chunk);
+ }; // for ub
+ }; // for lb
+ }; // for st
+ }; // for chunk
+ }
+ return err;
+} // run_all
+
+// ---------------------------------------------------------------------------
+int main()
+{
+ int n, err = 0;
+ for (n = 1; n <= 4; ++ n) {
+ err += run_32(n);
+ err += run_64(n);
+ }; // for n
+ if (err)
+ printf("failed with %d errors\n", err);
+ else
+ printf("passed\n");
+ return err;
+}
Propchange: openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_guided.c
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_guided.c
------------------------------------------------------------------------------
svn:keywords = Author Date Id Rev URL
Propchange: openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_guided.c
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c?rev=304724&view=auto
==============================================================================
--- openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c (added)
+++ openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c Mon Jun 5 12:17:33 2017
@@ -0,0 +1,221 @@
+// RUN: %libomp-compile-and-run
+
+// The test checks schedule(simd:runtime)
+// in combination with omp_set_schedule()
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+#if defined(WIN32) || defined(_WIN32)
+#include <windows.h>
+#define delay() Sleep(1);
+#define seten(a,b,c) _putenv_s((a),(b))
+#else
+#include <unistd.h>
+#define delay() usleep(10);
+#define seten(a,b,c) setenv((a),(b),(c))
+#endif
+
+#define SIMD_LEN 4
+int err = 0;
+
+// ---------------------------------------------------------------------------
+// Various definitions copied from OpenMP RTL.
+enum sched {
+ kmp_sch_static_balanced_chunked = 45,
+ kmp_sch_guided_simd = 46,
+ kmp_sch_runtime_simd = 47,
+};
+typedef unsigned u32;
+typedef long long i64;
+typedef unsigned long long u64;
+typedef struct {
+ int reserved_1;
+ int flags;
+ int reserved_2;
+ int reserved_3;
+ char *psource;
+} id;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+ int __kmpc_global_thread_num(id*);
+ void __kmpc_barrier(id*, int gtid);
+ void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int);
+ void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64);
+ int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*);
+ int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+// End of definitions copied from OpenMP RTL.
+// ---------------------------------------------------------------------------
+static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
+
+// ---------------------------------------------------------------------------
+void
+run_loop(
+ int loop_lb, // Loop lower bound.
+ int loop_ub, // Loop upper bound.
+ int loop_st, // Loop stride.
+ int lchunk
+) {
+ static int volatile loop_sync = 0;
+ int lb; // Chunk lower bound.
+ int ub; // Chunk upper bound.
+ int st; // Chunk stride.
+ int rc;
+ int tid = omp_get_thread_num();
+ int gtid = __kmpc_global_thread_num(&loc);
+ int last;
+ int tc = (loop_ub - loop_lb) / loop_st + 1;
+ int ch;
+ int no_chunk = 0;
+ if (lchunk == 0) {
+ no_chunk = 1;
+ lchunk = 1;
+ }
+ ch = lchunk * SIMD_LEN;
+#if _DEBUG > 1
+ printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n",
+ gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk);
+#endif
+ // Don't test degenerate cases that should have been discovered by codegen.
+ if (loop_st == 0)
+ return;
+ if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub)
+ return;
+ __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd,
+ loop_lb, loop_ub, loop_st, SIMD_LEN);
+ {
+ // Let the master thread handle the chunks alone.
+ int chunk; // No of current chunk.
+ int last_ub; // Upper bound of the last processed chunk.
+ u64 cur; // Number of interations in current chunk.
+ u64 max; // Max allowed iterations for current chunk.
+ int undersized = 0;
+ last_ub = loop_ub;
+ chunk = 0;
+ max = (loop_ub - loop_lb) / loop_st + 1;
+ // The first chunk can consume all iterations.
+ while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) {
+ ++ chunk;
+#if _DEBUG
+ printf("th %d: chunk=%d, lb=%d, ub=%d ch %d\n",
+ tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1));
+#endif
+ // Check if previous chunk (it is not the final chunk) is undersized.
+ if (undersized)
+ printf("Error with chunk %d, th %d, err %d\n", chunk, tid, ++err);
+ if (loop_st > 0) {
+ if (!(ub <= loop_ub))
+ printf("Error with ub %d, %d, ch %d, err %d\n",
+ (int)ub, (int)loop_ub, chunk, ++err);
+ if (!(lb <= ub))
+ printf("Error with bounds %d, %d, %d, err %d\n",
+ (int)lb, (int)ub, chunk, ++err);
+ } else {
+ if (!(ub >= loop_ub))
+ printf("Error with ub %d, %d, %d, err %d\n",
+ (int)ub, (int)loop_ub, chunk, ++err);
+ if (!(lb >= ub))
+ printf("Error with bounds %d, %d, %d, err %d\n",
+ (int)lb, (int)ub, chunk, ++err);
+ }; // if
+ // Stride should not change.
+ if (!(st == loop_st))
+ printf("Error with st %d, %d, ch %d, err %d\n",
+ (int)st, (int)loop_st, chunk, ++err);
+ cur = ( ub - lb ) / loop_st + 1;
+ // Guided scheduling uses FP computations, so current chunk may
+ // be a bit bigger (+1) than allowed maximum.
+ if (!( cur <= max + 1))
+ printf("Error with iter %d, %d, err %d\n", cur, max, ++err);
+ // Update maximum for the next chunk.
+ if (last) {
+ if (!no_chunk && cur > ch)
+ printf("Error: too big last chunk %d (%d), tid %d, err %d\n",
+ (int)cur, ch, tid, ++err);
+ } else {
+ if (cur % ch)
+ printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n",
+ chunk, (int)cur, ch, tid, ++err);
+ }
+ if (cur < max)
+ max = cur;
+ last_ub = ub;
+ undersized = (cur < ch);
+#if _DEBUG > 1
+ if (last)
+ printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n",
+ undersized,cur,ch,tid,ub,lb,loop_st);
+#endif
+ } // while
+ // Must have the right last iteration index.
+ if (loop_st > 0) {
+ if (!(last_ub <= loop_ub))
+ printf("Error with last1 %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_ub, chunk, ++err);
+ if (last && !(last_ub + loop_st > loop_ub))
+ printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
+ } else {
+ if (!(last_ub >= loop_ub))
+ printf("Error with last1 %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_ub, chunk, ++err);
+ if (last && !(last_ub + loop_st < loop_ub))
+ printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
+ } // if
+ }
+ __kmpc_barrier(&loc, gtid);
+} // run_loop
+
+int main(int argc, char *argv[])
+{
+ int chunk = 0;
+// static (no chunk)
+ omp_set_schedule(omp_sched_static,0);
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+
+// auto (chunk should be ignorted)
+ omp_set_schedule(omp_sched_auto,0);
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+
+// static,1
+ chunk = 1;
+ omp_set_schedule(omp_sched_static,1);
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+
+// dynamic,1
+ omp_set_schedule(omp_sched_dynamic,1);
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+
+// guided,1
+ omp_set_schedule(omp_sched_guided,1);
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+
+// dynamic,0 - use default chunk size 1
+ omp_set_schedule(omp_sched_dynamic,0);
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+
+// guided,0 - use default chunk size 1
+ omp_set_schedule(omp_sched_guided,0);
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+
+ if (err) {
+ printf("failed, err = %d\n", err);
+ return 1;
+ } else {
+ printf("passed\n");
+ return 0;
+ }
+}
Propchange: openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c
------------------------------------------------------------------------------
svn:keywords = Author Date Id Rev URL
Propchange: openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c?rev=304724&view=auto
==============================================================================
--- openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c (added)
+++ openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c Mon Jun 5 12:17:33 2017
@@ -0,0 +1,196 @@
+// RUN: %libomp-compile
+// RUN: env OMP_SCHEDULE=guided %libomp-run
+// RUN: env OMP_SCHEDULE=guided,1 %libomp-run 1
+// RUN: env OMP_SCHEDULE=guided,2 %libomp-run 2
+// RUN: env OMP_SCHEDULE=dynamic %libomp-run
+// RUN: env OMP_SCHEDULE=dynamic,1 %libomp-run 1
+// RUN: env OMP_SCHEDULE=dynamic,2 %libomp-run 2
+// RUN: env OMP_SCHEDULE=auto %libomp-run
+
+// The test checks schedule(simd:runtime)
+// in combination with OMP_SCHEDULE=guided[,chunk]
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+#if defined(WIN32) || defined(_WIN32)
+#include <windows.h>
+#define delay() Sleep(1);
+#define seten(a,b,c) _putenv_s((a),(b))
+#else
+#include <unistd.h>
+#define delay() usleep(10);
+#define seten(a,b,c) setenv((a),(b),(c))
+#endif
+
+#define UBOUND 100
+#define SIMD_LEN 4
+int err = 0;
+
+// ---------------------------------------------------------------------------
+// Various definitions copied from OpenMP RTL.
+enum sched {
+ kmp_sch_static_balanced_chunked = 45,
+ kmp_sch_guided_simd = 46,
+ kmp_sch_runtime_simd = 47,
+};
+typedef unsigned u32;
+typedef long long i64;
+typedef unsigned long long u64;
+typedef struct {
+ int reserved_1;
+ int flags;
+ int reserved_2;
+ int reserved_3;
+ char *psource;
+} id;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+ int __kmpc_global_thread_num(id*);
+ void __kmpc_barrier(id*, int gtid);
+ void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int);
+ void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64);
+ int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*);
+ int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+// End of definitions copied from OpenMP RTL.
+// ---------------------------------------------------------------------------
+static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
+
+// ---------------------------------------------------------------------------
+void
+run_loop(
+ int loop_lb, // Loop lower bound.
+ int loop_ub, // Loop upper bound.
+ int loop_st, // Loop stride.
+ int lchunk
+) {
+ static int volatile loop_sync = 0;
+ int lb; // Chunk lower bound.
+ int ub; // Chunk upper bound.
+ int st; // Chunk stride.
+ int rc;
+ int tid = omp_get_thread_num();
+ int gtid = __kmpc_global_thread_num(&loc);
+ int last;
+ int tc = (loop_ub - loop_lb) / loop_st + 1;
+ int ch;
+ int no_chunk = 0;
+ if (lchunk == 0) {
+ no_chunk = 1;
+ lchunk = 1;
+ }
+ ch = lchunk * SIMD_LEN;
+#if _DEBUG > 1
+ printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n",
+ gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk);
+#endif
+ // Don't test degenerate cases that should have been discovered by codegen.
+ if (loop_st == 0)
+ return;
+ if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub)
+ return;
+ __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd,
+ loop_lb, loop_ub, loop_st, SIMD_LEN);
+ {
+ // Let the master thread handle the chunks alone.
+ int chunk; // No of current chunk.
+ int last_ub; // Upper bound of the last processed chunk.
+ u64 cur; // Number of interations in current chunk.
+ u64 max; // Max allowed iterations for current chunk.
+ int undersized = 0;
+ last_ub = loop_ub;
+ chunk = 0;
+ max = (loop_ub - loop_lb) / loop_st + 1;
+ // The first chunk can consume all iterations.
+ while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) {
+ ++ chunk;
+#if _DEBUG
+ printf("th %d: chunk=%d, lb=%d, ub=%d ch %d\n",
+ tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1));
+#endif
+ // Check if previous chunk (it is not the final chunk) is undersized.
+ if (undersized)
+ printf("Error with chunk %d, th %d, err %d\n", chunk, tid, ++err);
+ if (loop_st > 0) {
+ if (!(ub <= loop_ub))
+ printf("Error with ub %d, %d, ch %d, err %d\n",
+ (int)ub, (int)loop_ub, chunk, ++err);
+ if (!(lb <= ub))
+ printf("Error with bounds %d, %d, %d, err %d\n",
+ (int)lb, (int)ub, chunk, ++err);
+ } else {
+ if (!(ub >= loop_ub))
+ printf("Error with ub %d, %d, %d, err %d\n",
+ (int)ub, (int)loop_ub, chunk, ++err);
+ if (!(lb >= ub))
+ printf("Error with bounds %d, %d, %d, err %d\n",
+ (int)lb, (int)ub, chunk, ++err);
+ }; // if
+ // Stride should not change.
+ if (!(st == loop_st))
+ printf("Error with st %d, %d, ch %d, err %d\n",
+ (int)st, (int)loop_st, chunk, ++err);
+ cur = ( ub - lb ) / loop_st + 1;
+ // Guided scheduling uses FP computations, so current chunk may
+ // be a bit bigger (+1) than allowed maximum.
+ if (!( cur <= max + 1))
+ printf("Error with iter %d, %d, err %d\n", cur, max, ++err);
+ // Update maximum for the next chunk.
+ if (!last && cur % ch)
+ printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n",
+ chunk, (int)cur, ch, tid, ++err);
+ if (last && !no_chunk && cur > ch)
+ printf("Error: too big last chunk %d (%d), tid %d, err %d\n",
+ (int)cur, ch, tid, ++err);
+ if (cur < max)
+ max = cur;
+ last_ub = ub;
+ undersized = (cur < ch);
+#if _DEBUG > 1
+ if (last)
+ printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n",
+ undersized,cur,ch,tid,ub,lb,loop_st);
+#endif
+ } // while
+ // Must have the right last iteration index.
+ if (loop_st > 0) {
+ if (!(last_ub <= loop_ub))
+ printf("Error with last1 %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_ub, chunk, ++err);
+ if (last && !(last_ub + loop_st > loop_ub))
+ printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
+ } else {
+ if (!(last_ub >= loop_ub))
+ printf("Error with last1 %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_ub, chunk, ++err);
+ if (last && !(last_ub + loop_st < loop_ub))
+ printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
+ } // if
+ }
+ __kmpc_barrier(&loc, gtid);
+} // run_loop
+
+int main(int argc, char *argv[])
+{
+ int chunk = 0;
+ if (argc > 1) {
+ // expect chunk size as a parameter
+ chunk = atoi(argv[1]);
+ }
+#pragma omp parallel //num_threads(num_th)
+ run_loop(0, UBOUND, 1, chunk);
+ if (err) {
+ printf("failed, err = %d\n", err);
+ return 1;
+ } else {
+ printf("passed\n");
+ return 0;
+ }
+}
Propchange: openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c
------------------------------------------------------------------------------
svn:keywords = Author Date Id Rev URL
Propchange: openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c?rev=304724&view=auto
==============================================================================
--- openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c (added)
+++ openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c Mon Jun 5 12:17:33 2017
@@ -0,0 +1,201 @@
+// RUN: %libomp-compile && %libomp-run
+// RUN: %libomp-run 1 && %libomp-run 2
+
+// The test checks schedule(simd:runtime)
+// in combination with OMP_SCHEDULE=static[,chunk]
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+#if defined(WIN32) || defined(_WIN32)
+#include <windows.h>
+#define delay() Sleep(1);
+#define seten(a,b,c) _putenv_s((a),(b))
+#else
+#include <unistd.h>
+#define delay() usleep(10);
+#define seten(a,b,c) setenv((a),(b),(c))
+#endif
+
+#define SIMD_LEN 4
+int err = 0;
+
+// ---------------------------------------------------------------------------
+// Various definitions copied from OpenMP RTL.
+enum sched {
+ kmp_sch_static_balanced_chunked = 45,
+ kmp_sch_guided_simd = 46,
+ kmp_sch_runtime_simd = 47,
+};
+typedef unsigned u32;
+typedef long long i64;
+typedef unsigned long long u64;
+typedef struct {
+ int reserved_1;
+ int flags;
+ int reserved_2;
+ int reserved_3;
+ char *psource;
+} id;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+ int __kmpc_global_thread_num(id*);
+ void __kmpc_barrier(id*, int gtid);
+ void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int);
+ void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64);
+ int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*);
+ int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+// End of definitions copied from OpenMP RTL.
+// ---------------------------------------------------------------------------
+static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
+
+// ---------------------------------------------------------------------------
+void
+run_loop(
+ int loop_lb, // Loop lower bound.
+ int loop_ub, // Loop upper bound.
+ int loop_st, // Loop stride.
+ int lchunk
+) {
+ static int volatile loop_sync = 0;
+ int lb; // Chunk lower bound.
+ int ub; // Chunk upper bound.
+ int st; // Chunk stride.
+ int rc;
+ int tid = omp_get_thread_num();
+ int gtid = __kmpc_global_thread_num(&loc);
+ int last;
+ int tc = (loop_ub - loop_lb) / loop_st + 1;
+ int ch;
+ int no_chunk = 0;
+ if (lchunk == 0) {
+ no_chunk = 1;
+ lchunk = 1;
+ }
+ ch = lchunk * SIMD_LEN;
+#if _DEBUG > 1
+ printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n",
+ gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk);
+#endif
+ // Don't test degenerate cases that should have been discovered by codegen.
+ if (loop_st == 0)
+ return;
+ if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub)
+ return;
+ __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd,
+ loop_lb, loop_ub, loop_st, SIMD_LEN);
+ {
+ // Let the master thread handle the chunks alone.
+ int chunk; // No of current chunk.
+ int last_ub; // Upper bound of the last processed chunk.
+ u64 cur; // Number of interations in current chunk.
+ u64 max; // Max allowed iterations for current chunk.
+ int undersized = 0;
+ last_ub = loop_ub;
+ chunk = 0;
+ max = (loop_ub - loop_lb) / loop_st + 1;
+ // The first chunk can consume all iterations.
+ while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) {
+ ++ chunk;
+#if _DEBUG
+ printf("th %d: chunk=%d, lb=%d, ub=%d ch %d\n",
+ tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1));
+#endif
+ // Check if previous chunk (it is not the final chunk) is undersized.
+ if (undersized)
+ printf("Error with chunk %d, th %d, err %d\n", chunk, tid, ++err);
+ if (loop_st > 0) {
+ if (!(ub <= loop_ub))
+ printf("Error with ub %d, %d, ch %d, err %d\n",
+ (int)ub, (int)loop_ub, chunk, ++err);
+ if (!(lb <= ub))
+ printf("Error with bounds %d, %d, %d, err %d\n",
+ (int)lb, (int)ub, chunk, ++err);
+ } else {
+ if (!(ub >= loop_ub))
+ printf("Error with ub %d, %d, %d, err %d\n",
+ (int)ub, (int)loop_ub, chunk, ++err);
+ if (!(lb >= ub))
+ printf("Error with bounds %d, %d, %d, err %d\n",
+ (int)lb, (int)ub, chunk, ++err);
+ }; // if
+ // Stride should not change.
+ if (!(st == loop_st))
+ printf("Error with st %d, %d, ch %d, err %d\n",
+ (int)st, (int)loop_st, chunk, ++err);
+ cur = ( ub - lb ) / loop_st + 1;
+ // Guided scheduling uses FP computations, so current chunk may
+ // be a bit bigger (+1) than allowed maximum.
+ if (!( cur <= max + 1))
+ printf("Error with iter %d, %d, err %d\n", cur, max, ++err);
+ // Update maximum for the next chunk.
+ if (last) {
+ if (!no_chunk && cur > ch)
+ printf("Error: too big last chunk %d (%d), tid %d, err %d\n",
+ (int)cur, ch, tid, ++err);
+ } else {
+ if (cur % ch)
+ printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n",
+ chunk, (int)cur, ch, tid, ++err);
+ }
+ if (cur < max)
+ max = cur;
+ last_ub = ub;
+ undersized = (cur < ch);
+#if _DEBUG > 1
+ if (last)
+ printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n",
+ undersized,cur,ch,tid,ub,lb,loop_st);
+#endif
+ } // while
+ // Must have the right last iteration index.
+ if (loop_st > 0) {
+ if (!(last_ub <= loop_ub))
+ printf("Error with last1 %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_ub, chunk, ++err);
+ if (last && !(last_ub + loop_st > loop_ub))
+ printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
+ } else {
+ if (!(last_ub >= loop_ub))
+ printf("Error with last1 %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_ub, chunk, ++err);
+ if (last && !(last_ub + loop_st < loop_ub))
+ printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
+ (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
+ } // if
+ }
+ __kmpc_barrier(&loc, gtid);
+} // run_loop
+
+int main(int argc, char *argv[])
+{
+ int chunk = 0;
+ if (argc > 1) {
+ char *buf = malloc(8 + strlen(argv[1]));
+ // expect chunk size as a parameter
+ chunk = atoi(argv[1]);
+ strcpy(buf,"static,");
+ strcat(buf,argv[1]);
+ seten("OMP_SCHEDULE",buf,1);
+ printf("Testing schedule(simd:%s)\n", buf);
+ free(buf);
+ } else {
+ seten("OMP_SCHEDULE","static",1);
+ printf("Testing schedule(simd:static)\n");
+ }
+#pragma omp parallel// num_threads(num_th)
+ run_loop(0, 26, 1, chunk);
+ if (err) {
+ printf("failed, err = %d\n", err);
+ return 1;
+ } else {
+ printf("passed\n");
+ return 0;
+ }
+}
Propchange: openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c
------------------------------------------------------------------------------
svn:keywords = Author Date Id Rev URL
Propchange: openmp/trunk/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c
------------------------------------------------------------------------------
svn:mime-type = text/plain
More information about the Openmp-commits
mailing list