[Openmp-commits] [openmp] r262532 - Add new OpenMP 4.5 doacross loop nest feature
Jonathan Peyton via Openmp-commits
openmp-commits at lists.llvm.org
Wed Mar 2 14:42:07 PST 2016
Author: jlpeyton
Date: Wed Mar 2 16:42:06 2016
New Revision: 262532
URL: http://llvm.org/viewvc/llvm-project?rev=262532&view=rev
Log:
Add new OpenMP 4.5 doacross loop nest feature
>From the standard: A doacross loop nest is a loop nest that has cross-iteration
dependence. An iteration is dependent on one or more lexicographically earlier
iterations. The ordered clause parameter on a loop directive identifies the
loop(s) associated with the doacross loop nest.
The init/fini routines allocate/free doacross buffer(s) for each loop for each
thread. The wait routine waits for a flag designated by the dependence vector.
The post routine sets the flag designated by current iteration vector. We use
a similar technique of shared buffer indices that covers up to 7 nowait loops
executed simultaneously by different threads (number 7 has no real meaning,
just heuristic value). Also, the size of structures are kept intact via
reducing dummy arrays.
This needs to be put into the OpenMP runtime library in order for the compiler
team to develop the compiler side of the implementation.
Differential Revision: http://reviews.llvm.org/D17399
Modified:
openmp/trunk/runtime/src/dllexports
openmp/trunk/runtime/src/kmp.h
openmp/trunk/runtime/src/kmp_csupport.c
openmp/trunk/runtime/src/kmp_dispatch.cpp
openmp/trunk/runtime/src/kmp_runtime.c
Modified: openmp/trunk/runtime/src/dllexports
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/dllexports?rev=262532&r1=262531&r2=262532&view=diff
==============================================================================
--- openmp/trunk/runtime/src/dllexports (original)
+++ openmp/trunk/runtime/src/dllexports Wed Mar 2 16:42:06 2016
@@ -389,6 +389,10 @@ kmpc_set_defaults
%ifdef OMP_41
__kmpc_proxy_task_completed 259
__kmpc_proxy_task_completed_ooo 260
+ __kmpc_doacross_init 261
+ __kmpc_doacross_wait 262
+ __kmpc_doacross_post 263
+ __kmpc_doacross_fini 264
%endif
%endif
Modified: openmp/trunk/runtime/src/kmp.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp.h?rev=262532&r1=262531&r2=262532&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp.h (original)
+++ openmp/trunk/runtime/src/kmp.h Wed Mar 2 16:42:06 2016
@@ -1665,7 +1665,7 @@ typedef struct dispatch_shared_info64 {
volatile kmp_uint64 iteration;
volatile kmp_uint64 num_done;
volatile kmp_uint64 ordered_iteration;
- kmp_int64 ordered_dummy[KMP_MAX_ORDERED-1]; // to retain the structure size after making ordered_iteration scalar
+ kmp_int64 ordered_dummy[KMP_MAX_ORDERED-3]; // to retain the structure size after making ordered_iteration scalar
} dispatch_shared_info64_t;
typedef struct dispatch_shared_info {
@@ -1673,8 +1673,12 @@ typedef struct dispatch_shared_info {
dispatch_shared_info32_t s32;
dispatch_shared_info64_t s64;
} u;
-/* volatile kmp_int32 dispatch_abort; depricated */
volatile kmp_uint32 buffer_index;
+#if OMP_41_ENABLED
+ volatile kmp_int32 doacross_buf_idx; // teamwise index
+ volatile kmp_uint32 *doacross_flags; // shared array of iteration flags (0/1)
+ kmp_int32 doacross_num_done; // count finished threads
+#endif
} dispatch_shared_info_t;
typedef struct kmp_disp {
@@ -1688,7 +1692,13 @@ typedef struct kmp_disp {
dispatch_private_info_t *th_disp_buffer;
kmp_int32 th_disp_index;
+#if OMP_41_ENABLED
+ kmp_int32 th_doacross_buf_idx; // thread's doacross buffer index
+ volatile kmp_uint32 *th_doacross_flags; // pointer to shared array of flags
+ kmp_int64 *th_doacross_info; // info on loop bounds
+#else
void* dummy_padding[2]; // make it 64 bytes on Intel(R) 64
+#endif
#if KMP_USE_INTERNODE_ALIGNMENT
char more_padding[INTERNODE_CACHE_LINE];
#endif
@@ -3543,7 +3553,17 @@ KMP_EXPORT void __kmpc_push_num_threads(
KMP_EXPORT void __kmpc_push_proc_bind( ident_t *loc, kmp_int32 global_tid, int proc_bind );
KMP_EXPORT void __kmpc_push_num_teams( ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads );
KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...);
-
+#endif
+#if OMP_41_ENABLED
+struct kmp_dim { // loop bounds info casted to kmp_int64
+ kmp_int64 lo; // lower
+ kmp_int64 up; // upper
+ kmp_int64 st; // stride
+};
+KMP_EXPORT void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 num_dims, struct kmp_dim * dims);
+KMP_EXPORT void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 *vec);
+KMP_EXPORT void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 *vec);
+KMP_EXPORT void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
#endif
KMP_EXPORT void*
Modified: openmp/trunk/runtime/src/kmp_csupport.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_csupport.c?rev=262532&r1=262531&r2=262532&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_csupport.c (original)
+++ openmp/trunk/runtime/src/kmp_csupport.c Wed Mar 2 16:42:06 2016
@@ -3049,5 +3049,294 @@ void __kmpc_place_threads(int nS, int sO
__kmp_place_num_threads_per_core = nT;
}
+#if OMP_41_ENABLED
+/*!
+ at ingroup WORK_SHARING
+ at param loc source location information.
+ at param gtid global thread number.
+ at param num_dims number of associated doacross loops.
+ at param dims info on loops bounds.
+
+Initialize doacross loop information.
+Expect compiler send us inclusive bounds,
+e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
+*/
+void
+__kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, struct kmp_dim * dims)
+{
+ int j, idx;
+ kmp_int64 last, trace_count;
+ kmp_info_t *th = __kmp_threads[gtid];
+ kmp_team_t *team = th->th.th_team;
+ kmp_uint32 *flags;
+ kmp_disp_t *pr_buf = th->th.th_dispatch;
+ dispatch_shared_info_t *sh_buf;
+
+ KA_TRACE(20,("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
+ gtid, num_dims, !team->t.t_serialized));
+ KMP_DEBUG_ASSERT(dims != NULL);
+ KMP_DEBUG_ASSERT(num_dims > 0);
+
+ if( team->t.t_serialized ) {
+ KA_TRACE(20,("__kmpc_doacross_init() exit: serialized team\n"));
+ return; // no dependencies if team is serialized
+ }
+ KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
+ idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for the next loop
+ sh_buf = &team->t.t_disp_buffer[idx % KMP_MAX_DISP_BUF];
+
+ // Save bounds info into allocated private buffer
+ KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
+ pr_buf->th_doacross_info =
+ (kmp_int64*)__kmp_thread_malloc(th, sizeof(kmp_int64)*(4 * num_dims + 1));
+ KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
+ pr_buf->th_doacross_info[0] = (kmp_int64)num_dims; // first element is number of dimensions
+ // Save also address of num_done in order to access it later without knowing the buffer index
+ pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
+ pr_buf->th_doacross_info[2] = dims[0].lo;
+ pr_buf->th_doacross_info[3] = dims[0].up;
+ pr_buf->th_doacross_info[4] = dims[0].st;
+ last = 5;
+ for( j = 1; j < num_dims; ++j ) {
+ kmp_int64 range_length; // To keep ranges of all dimensions but the first dims[0]
+ if( dims[j].st == 1 ) { // most common case
+ // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
+ range_length = dims[j].up - dims[j].lo + 1;
+ } else {
+ if( dims[j].st > 0 ) {
+ KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
+ range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
+ } else { // negative increment
+ KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
+ range_length = (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
+ }
+ }
+ pr_buf->th_doacross_info[last++] = range_length;
+ pr_buf->th_doacross_info[last++] = dims[j].lo;
+ pr_buf->th_doacross_info[last++] = dims[j].up;
+ pr_buf->th_doacross_info[last++] = dims[j].st;
+ }
+
+ // Compute total trip count.
+ // Start with range of dims[0] which we don't need to keep in the buffer.
+ if( dims[0].st == 1 ) { // most common case
+ trace_count = dims[0].up - dims[0].lo + 1;
+ } else if( dims[0].st > 0 ) {
+ KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
+ trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
+ } else { // negative increment
+ KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
+ trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
+ }
+ for( j = 1; j < num_dims; ++j ) {
+ trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
+ }
+ KMP_DEBUG_ASSERT(trace_count > 0);
+
+ // Check if shared buffer is not occupied by other loop (idx - KMP_MAX_DISP_BUF)
+ if( idx != sh_buf->doacross_buf_idx ) {
+ // Shared buffer is occupied, wait for it to be free
+ __kmp_wait_yield_4( (kmp_uint32*)&sh_buf->doacross_buf_idx, idx, __kmp_eq_4, NULL );
+ }
+ // Check if we are the first thread. After the CAS the first thread gets 0,
+ // others get 1 if initialization is in progress, allocated pointer otherwise.
+ flags = (kmp_uint32*)KMP_COMPARE_AND_STORE_RET64(
+ (kmp_int64*)&sh_buf->doacross_flags,NULL,(kmp_int64)1);
+ if( flags == NULL ) {
+ // we are the first thread, allocate the array of flags
+ kmp_int64 size = trace_count / 8 + 8; // in bytes, use single bit per iteration
+ sh_buf->doacross_flags = (kmp_uint32*)__kmp_thread_calloc(th, size, 1);
+ } else if( (kmp_int64)flags == 1 ) {
+ // initialization is still in progress, need to wait
+ while( (volatile kmp_int64)sh_buf->doacross_flags == 1 ) {
+ KMP_YIELD(TRUE);
+ }
+ }
+ KMP_DEBUG_ASSERT((kmp_int64)sh_buf->doacross_flags > 1); // check value of pointer
+ pr_buf->th_doacross_flags = sh_buf->doacross_flags; // save private copy in order to not
+ // touch shared buffer on each iteration
+ KA_TRACE(20,("__kmpc_doacross_init() exit: T#%d\n", gtid));
+}
+
+void
+__kmpc_doacross_wait(ident_t *loc, int gtid, long long *vec)
+{
+ kmp_int32 shft, num_dims, i;
+ kmp_uint32 flag;
+ kmp_int64 iter_number; // iteration number of "collapsed" loop nest
+ kmp_info_t *th = __kmp_threads[gtid];
+ kmp_team_t *team = th->th.th_team;
+ kmp_disp_t *pr_buf;
+ kmp_int64 lo, up, st;
+
+ KA_TRACE(20,("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
+ if( team->t.t_serialized ) {
+ KA_TRACE(20,("__kmpc_doacross_wait() exit: serialized team\n"));
+ return; // no dependencies if team is serialized
+ }
+
+ // calculate sequential iteration number and check out-of-bounds condition
+ pr_buf = th->th.th_dispatch;
+ KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
+ num_dims = pr_buf->th_doacross_info[0];
+ lo = pr_buf->th_doacross_info[2];
+ up = pr_buf->th_doacross_info[3];
+ st = pr_buf->th_doacross_info[4];
+ if( st == 1 ) { // most common case
+ if( vec[0] < lo || vec[0] > up ) {
+ KA_TRACE(20,(
+ "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
+ gtid, vec[0], lo, up));
+ return;
+ }
+ iter_number = vec[0] - lo;
+ } else if( st > 0 ) {
+ if( vec[0] < lo || vec[0] > up ) {
+ KA_TRACE(20,(
+ "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
+ gtid, vec[0], lo, up));
+ return;
+ }
+ iter_number = (kmp_uint64)(vec[0] - lo) / st;
+ } else { // negative increment
+ if( vec[0] > lo || vec[0] < up ) {
+ KA_TRACE(20,(
+ "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
+ gtid, vec[0], lo, up));
+ return;
+ }
+ iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
+ }
+ for( i = 1; i < num_dims; ++i ) {
+ kmp_int64 iter, ln;
+ kmp_int32 j = i * 4;
+ ln = pr_buf->th_doacross_info[j + 1];
+ lo = pr_buf->th_doacross_info[j + 2];
+ up = pr_buf->th_doacross_info[j + 3];
+ st = pr_buf->th_doacross_info[j + 4];
+ if( st == 1 ) {
+ if( vec[i] < lo || vec[i] > up ) {
+ KA_TRACE(20,(
+ "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
+ gtid, vec[i], lo, up));
+ return;
+ }
+ iter = vec[i] - lo;
+ } else if( st > 0 ) {
+ if( vec[i] < lo || vec[i] > up ) {
+ KA_TRACE(20,(
+ "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
+ gtid, vec[i], lo, up));
+ return;
+ }
+ iter = (kmp_uint64)(vec[i] - lo) / st;
+ } else { // st < 0
+ if( vec[i] > lo || vec[i] < up ) {
+ KA_TRACE(20,(
+ "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
+ gtid, vec[i], lo, up));
+ return;
+ }
+ iter = (kmp_uint64)(lo - vec[i]) / (-st);
+ }
+ iter_number = iter + ln * iter_number;
+ }
+ shft = iter_number % 32; // use 32-bit granularity
+ iter_number >>= 5; // divided by 32
+ flag = 1 << shft;
+ while( (flag & pr_buf->th_doacross_flags[iter_number]) == 0 ) {
+ KMP_YIELD(TRUE);
+ }
+ KA_TRACE(20,("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
+ gtid, (iter_number<<5)+shft));
+}
+
+void
+__kmpc_doacross_post(ident_t *loc, int gtid, long long *vec)
+{
+ kmp_int32 shft, num_dims, i;
+ kmp_uint32 flag;
+ kmp_int64 iter_number; // iteration number of "collapsed" loop nest
+ kmp_info_t *th = __kmp_threads[gtid];
+ kmp_team_t *team = th->th.th_team;
+ kmp_disp_t *pr_buf;
+ kmp_int64 lo, st;
+
+ KA_TRACE(20,("__kmpc_doacross_post() enter: called T#%d\n", gtid));
+ if( team->t.t_serialized ) {
+ KA_TRACE(20,("__kmpc_doacross_post() exit: serialized team\n"));
+ return; // no dependencies if team is serialized
+ }
+
+ // calculate sequential iteration number (same as in "wait" but no out-of-bounds checks)
+ pr_buf = th->th.th_dispatch;
+ KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
+ num_dims = pr_buf->th_doacross_info[0];
+ lo = pr_buf->th_doacross_info[2];
+ st = pr_buf->th_doacross_info[4];
+ if( st == 1 ) { // most common case
+ iter_number = vec[0] - lo;
+ } else if( st > 0 ) {
+ iter_number = (kmp_uint64)(vec[0] - lo) / st;
+ } else { // negative increment
+ iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
+ }
+ for( i = 1; i < num_dims; ++i ) {
+ kmp_int64 iter, ln;
+ kmp_int32 j = i * 4;
+ ln = pr_buf->th_doacross_info[j + 1];
+ lo = pr_buf->th_doacross_info[j + 2];
+ st = pr_buf->th_doacross_info[j + 4];
+ if( st == 1 ) {
+ iter = vec[i] - lo;
+ } else if( st > 0 ) {
+ iter = (kmp_uint64)(vec[i] - lo) / st;
+ } else { // st < 0
+ iter = (kmp_uint64)(lo - vec[i]) / (-st);
+ }
+ iter_number = iter + ln * iter_number;
+ }
+ shft = iter_number % 32; // use 32-bit granularity
+ iter_number >>= 5; // divided by 32
+ flag = 1 << shft;
+ if( (flag & pr_buf->th_doacross_flags[iter_number]) == 0 )
+ KMP_TEST_THEN_OR32( (kmp_int32*)&pr_buf->th_doacross_flags[iter_number], (kmp_int32)flag );
+ KA_TRACE(20,("__kmpc_doacross_post() exit: T#%d iter %lld posted\n",
+ gtid, (iter_number<<5)+shft));
+}
+
+void
+__kmpc_doacross_fini(ident_t *loc, int gtid)
+{
+ kmp_int64 num_done;
+ kmp_info_t *th = __kmp_threads[gtid];
+ kmp_team_t *team = th->th.th_team;
+ kmp_disp_t *pr_buf = th->th.th_dispatch;
+
+ KA_TRACE(20,("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
+ if( team->t.t_serialized ) {
+ KA_TRACE(20,("__kmpc_doacross_fini() exit: serialized team %p\n", team));
+ return; // nothing to do
+ }
+ num_done = KMP_TEST_THEN_INC64((kmp_int64*)pr_buf->th_doacross_info[1]) + 1;
+ if( num_done == th->th.th_team_nproc ) {
+ // we are the last thread, need to free shared resources
+ int idx = pr_buf->th_doacross_buf_idx - 1;
+ dispatch_shared_info_t *sh_buf = &team->t.t_disp_buffer[idx % KMP_MAX_DISP_BUF];
+ KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == (kmp_int64)&sh_buf->doacross_num_done);
+ KMP_DEBUG_ASSERT(num_done == (kmp_int64)sh_buf->doacross_num_done);
+ KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
+ __kmp_thread_free(th, (void*)sh_buf->doacross_flags);
+ sh_buf->doacross_flags = NULL;
+ sh_buf->doacross_num_done = 0;
+ sh_buf->doacross_buf_idx += KMP_MAX_DISP_BUF; // free buffer for future re-use
+ }
+ // free private resources (need to keep buffer index forever)
+ __kmp_thread_free(th, (void*)pr_buf->th_doacross_info);
+ pr_buf->th_doacross_info = NULL;
+ KA_TRACE(20,("__kmpc_doacross_fini() exit: T#%d\n", gtid));
+}
+#endif
+
// end of file //
Modified: openmp/trunk/runtime/src/kmp_dispatch.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_dispatch.cpp?rev=262532&r1=262531&r2=262532&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_dispatch.cpp (original)
+++ openmp/trunk/runtime/src/kmp_dispatch.cpp Wed Mar 2 16:42:06 2016
@@ -163,7 +163,7 @@ struct dispatch_shared_infoXX_template {
volatile UT iteration;
volatile UT num_done;
volatile UT ordered_iteration;
- UT ordered_dummy[KMP_MAX_ORDERED-1]; // to retain the structure size making ordered_iteration scalar
+ UT ordered_dummy[KMP_MAX_ORDERED-3]; // to retain the structure size making ordered_iteration scalar
};
// replaces dispatch_shared_info structure and dispatch_shared_info_t type
@@ -175,6 +175,11 @@ struct dispatch_shared_info_template {
dispatch_shared_info64_t s64;
} u;
volatile kmp_uint32 buffer_index;
+#if OMP_41_ENABLED
+ volatile kmp_int32 doacross_buf_idx; // teamwise index
+ kmp_uint32 *doacross_flags; // array of iteration flags (0/1)
+ kmp_int32 doacross_num_done; // count finished threads
+#endif
};
/* ------------------------------------------------------------------------ */
Modified: openmp/trunk/runtime/src/kmp_runtime.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_runtime.c?rev=262532&r1=262531&r2=262532&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_runtime.c (original)
+++ openmp/trunk/runtime/src/kmp_runtime.c Wed Mar 2 16:42:06 2016
@@ -3046,8 +3046,12 @@ __kmp_allocate_team_arrays(kmp_team_t *t
team->t.t_max_nproc = max_nth;
/* setup dispatch buffers */
- for(i = 0 ; i < num_disp_buff; ++i)
+ for(i = 0 ; i < num_disp_buff; ++i) {
team->t.t_disp_buffer[i].buffer_index = i;
+#if OMP_41_ENABLED
+ team->t.t_disp_buffer[i].doacross_buf_idx = i;
+#endif
+ }
}
static void
@@ -4121,7 +4125,9 @@ __kmp_initialize_info( kmp_info_t *this_
KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
dispatch->th_disp_index = 0;
-
+#if OMP_41_ENABLED
+ dispatch->th_doacross_buf_idx = 0;
+#endif
if( ! dispatch->th_disp_buffer ) {
dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
@@ -6813,7 +6819,9 @@ __kmp_run_before_invoked_task( int gtid,
//KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ this_thr->th.th_info.ds.ds_tid ] );
dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
-
+#if OMP_41_ENABLED
+ dispatch->th_doacross_buf_idx = 0; /* reset the doacross dispatch buffer counter */
+#endif
if( __kmp_env_consistency_check )
__kmp_push_parallel( gtid, team->t.t_ident );
@@ -7050,10 +7058,17 @@ __kmp_internal_fork( ident_t *id, int gt
KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
if ( team->t.t_max_nproc > 1 ) {
int i;
- for (i = 0; i < KMP_MAX_DISP_BUF; ++i)
+ for (i = 0; i < KMP_MAX_DISP_BUF; ++i) {
team->t.t_disp_buffer[ i ].buffer_index = i;
+#if OMP_41_ENABLED
+ team->t.t_disp_buffer[i].doacross_buf_idx = i;
+#endif
+ }
} else {
team->t.t_disp_buffer[ 0 ].buffer_index = 0;
+#if OMP_41_ENABLED
+ team->t.t_disp_buffer[0].doacross_buf_idx = 0;
+#endif
}
KMP_MB(); /* Flush all pending memory write invalidates. */
More information about the Openmp-commits
mailing list