[Openmp-commits] [openmp] 43d5c4d - [OpenMP] add 4 custom APIs supporting MSVC OMP codegen
Jonathan Peyton via Openmp-commits
openmp-commits at lists.llvm.org
Tue Jul 5 15:26:35 PDT 2022
Author: Vadim Paretsky
Date: 2022-07-05T17:26:18-05:00
New Revision: 43d5c4d5394e522be87a9a1dfda24f5ce0e3a855
URL: https://github.com/llvm/llvm-project/commit/43d5c4d5394e522be87a9a1dfda24f5ce0e3a855
DIFF: https://github.com/llvm/llvm-project/commit/43d5c4d5394e522be87a9a1dfda24f5ce0e3a855.diff
LOG: [OpenMP] add 4 custom APIs supporting MSVC OMP codegen
This check-in adds 4 APIs to support MSVC, specifically:
* 3 APIs (__kmpc_sections_init, __kmpc_next_section,
__kmpc_end_sections) to support the dynamic scheduling of OMP sections.
* 1 API (__kmpc_copyprivate_light, a light-weight version of
__kmpc_copyrprivate) to support the OMP single copyprivate clause.
Differential Revision: https://reviews.llvm.org/D128403
Added:
Modified:
openmp/runtime/src/dllexports
openmp/runtime/src/kmp.h
openmp/runtime/src/kmp_csupport.cpp
openmp/runtime/src/kmp_dispatch.cpp
Removed:
################################################################################
diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports
index 87989fe630929..92f9d24171593 100644
--- a/openmp/runtime/src/dllexports
+++ b/openmp/runtime/src/dllexports
@@ -397,6 +397,13 @@ kmpc_set_disp_num_buffers 267
__kmpc_end_scope 287
%endif
+%ifndef stub
+ __kmpc_copyprivate_light 288
+ __kmpc_sections_init 289
+ __kmpc_next_section 290
+ __kmpc_end_sections 291
+%endif
+
# User API entry points that have both lower- and upper- case versions for Fortran.
# Number for lowercase version is indicated. Number for uppercase is obtained by adding 1000.
# User API entry points are entry points that start with 'kmp_' or 'omp_'.
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index ac680e32d390e..61ec737f93943 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -3890,6 +3890,11 @@ KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait(ident_t *,
KMP_EXPORT kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
KMP_EXPORT void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
+KMP_EXPORT kmp_int32 __kmpc_sections_init(ident_t *loc, kmp_int32 global_tid);
+KMP_EXPORT kmp_int32 __kmpc_next_section(ident_t *loc, kmp_int32 global_tid,
+ kmp_int32 numberOfSections);
+KMP_EXPORT void __kmpc_end_sections(ident_t *loc, kmp_int32 global_tid);
+
KMP_EXPORT void KMPC_FOR_STATIC_INIT(ident_t *loc, kmp_int32 global_tid,
kmp_int32 schedtype, kmp_int32 *plastiter,
kmp_int *plower, kmp_int *pupper,
@@ -3903,6 +3908,9 @@ KMP_EXPORT void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
void (*cpy_func)(void *, void *),
kmp_int32 didit);
+KMP_EXPORT void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid,
+ void *cpy_data);
+
extern void KMPC_SET_NUM_THREADS(int arg);
extern void KMPC_SET_DYNAMIC(int flag);
extern void KMPC_SET_NESTED(int flag);
diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
index 863153bd5526c..b7bcc4c94148e 100644
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -2224,6 +2224,61 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
}
}
+/* --------------------------------------------------------------------------*/
+/*!
+ at ingroup THREADPRIVATE
+ at param loc source location information
+ at param gtid global thread number
+ at param cpy_data pointer to the data to be saved/copied or 0
+ at return the saved pointer to the data
+
+__kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate:
+__kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so
+coming from single), and returns that pointer in all calls (for single thread
+it's not needed). This version doesn't do any actual data copying. Data copying
+has to be done somewhere else, e.g. inline in the generated code. Due to this,
+this function doesn't have any barrier at the end of the function, like
+__kmpc_copyprivate does, so generated code needs barrier after copying of all
+data was done.
+*/
+void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
+ void **data_ptr;
+
+ KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));
+
+ KMP_MB();
+
+ data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
+
+ if (__kmp_env_consistency_check) {
+ if (loc == 0) {
+ KMP_WARNING(ConstructIdentInvalid);
+ }
+ }
+
+ // ToDo: Optimize the following barrier
+
+ if (cpy_data)
+ *data_ptr = cpy_data;
+
+#if OMPT_SUPPORT
+ ompt_frame_t *ompt_frame;
+ if (ompt_enabled.enabled) {
+ __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
+ if (ompt_frame->enter_frame.ptr == NULL)
+ ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
+ OMPT_STORE_RETURN_ADDRESS(gtid);
+ }
+#endif
+/* This barrier is not a barrier region boundary */
+#if USE_ITT_NOTIFY
+ __kmp_threads[gtid]->th.th_ident = loc;
+#endif
+ __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
+
+ return *data_ptr;
+}
+
/* -------------------------------------------------------------------------- */
#define INIT_LOCK __kmp_init_user_lock_with_checks
@@ -4348,7 +4403,7 @@ void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
omp_allocator_handle_t free_allocator) {
return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
- free_allocator);
+ free_allocator);
}
void omp_free(void *ptr, omp_allocator_handle_t allocator) {
diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp
index 624fbb04d7a88..e7d28c6587b15 100644
--- a/openmp/runtime/src/kmp_dispatch.cpp
+++ b/openmp/runtime/src/kmp_dispatch.cpp
@@ -2285,6 +2285,219 @@ static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last,
return status;
}
+/*!
+ at ingroup WORK_SHARING
+ at param loc source location information
+ at param global_tid global thread number
+ at return Zero if the parallel region is not active and this thread should execute
+all sections, non-zero otherwise.
+
+Beginning of sections construct.
+There are no implicit barriers in the "sections" calls, rather the compiler
+should introduce an explicit barrier if it is required.
+
+This implementation is based on __kmp_dispatch_init, using same constructs for
+shared data (we can't have sections nested directly in omp for loop, there
+should be a parallel region in between)
+*/
+kmp_int32 __kmpc_sections_init(ident_t *loc, kmp_int32 gtid) {
+
+ int active;
+ kmp_info_t *th;
+ kmp_team_t *team;
+ kmp_uint32 my_buffer_index;
+ dispatch_shared_info_template<kmp_int32> volatile *sh;
+
+ KMP_DEBUG_ASSERT(__kmp_init_serial);
+
+ if (!TCR_4(__kmp_init_parallel))
+ __kmp_parallel_initialize();
+ __kmp_resume_if_soft_paused();
+
+ /* setup data */
+ th = __kmp_threads[gtid];
+ team = th->th.th_team;
+ active = !team->t.t_serialized;
+ th->th.th_ident = loc;
+
+ KMP_COUNT_BLOCK(OMP_SECTIONS);
+ KD_TRACE(10, ("__kmpc_sections: called by T#%d\n", gtid));
+
+ if (active) {
+ // Setup sections in the same way as dynamic scheduled loops.
+ // We need one shared data: which section is to execute next.
+ // (in case parallel is not active, all sections will be executed on the
+ // same thread)
+ KMP_DEBUG_ASSERT(th->th.th_dispatch ==
+ &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
+
+ my_buffer_index = th->th.th_dispatch->th_disp_index++;
+
+ // reuse shared data structures from dynamic sched loops:
+ sh = reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>(
+ &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
+ KD_TRACE(10, ("__kmpc_sections_init: T#%d my_buffer_index:%d\n", gtid,
+ my_buffer_index));
+
+ th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error;
+ th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error;
+
+ KD_TRACE(100, ("__kmpc_sections_init: T#%d before wait: my_buffer_index:%d "
+ "sh->buffer_index:%d\n",
+ gtid, my_buffer_index, sh->buffer_index));
+ __kmp_wait<kmp_uint32>(&sh->buffer_index, my_buffer_index,
+ __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL));
+ // Note: KMP_WAIT() cannot be used there: buffer index and
+ // my_buffer_index are *always* 32-bit integers.
+ KMP_MB();
+ KD_TRACE(100, ("__kmpc_sections_init: T#%d after wait: my_buffer_index:%d "
+ "sh->buffer_index:%d\n",
+ gtid, my_buffer_index, sh->buffer_index));
+
+ th->th.th_dispatch->th_dispatch_pr_current =
+ nullptr; // sections construct doesn't need private data
+ th->th.th_dispatch->th_dispatch_sh_current =
+ CCAST(dispatch_shared_info_t *, (volatile dispatch_shared_info_t *)sh);
+ }
+
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ if (ompt_enabled.ompt_callback_work) {
+ ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
+ ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
+ ompt_callbacks.ompt_callback(ompt_callback_work)(
+ ompt_work_sections, ompt_scope_begin, &(team_info->parallel_data),
+ &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
+ }
+#endif
+ KMP_PUSH_PARTITIONED_TIMER(OMP_sections);
+
+ return active;
+}
+
+/*!
+ at ingroup WORK_SHARING
+ at param loc source location information
+ at param global_tid global thread number
+ at param numberOfSections number of sections in the 'sections' construct
+ at return unsigned [from 0 to n) - number (id) of the section to execute next on
+this thread. n (or any other number not in range) - nothing to execute on this
+thread
+*/
+
+kmp_int32 __kmpc_next_section(ident_t *loc, kmp_int32 gtid,
+ kmp_int32 numberOfSections) {
+
+ KMP_TIME_PARTITIONED_BLOCK(OMP_sections);
+
+ kmp_info_t *th = __kmp_threads[gtid];
+#ifdef KMP_DEBUG
+ kmp_team_t *team = th->th.th_team;
+#endif
+
+ KD_TRACE(1000, ("__kmp_dispatch_next: T#%d; number of sections:%d\n", gtid,
+ numberOfSections));
+
+ // For serialized case we should not call this function:
+ KMP_DEBUG_ASSERT(!team->t.t_serialized);
+
+ dispatch_shared_info_template<kmp_int32> volatile *sh;
+
+ KMP_DEBUG_ASSERT(th->th.th_dispatch ==
+ &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
+
+ KMP_DEBUG_ASSERT(!(th->th.th_dispatch->th_dispatch_pr_current));
+ sh = reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>(
+ th->th.th_dispatch->th_dispatch_sh_current);
+ KMP_DEBUG_ASSERT(sh);
+
+ kmp_int32 sectionIndex = 0;
+ bool moreSectionsToExecute = true;
+
+ // Find section to execute:
+ sectionIndex = test_then_inc<kmp_int32>((kmp_int32 *)&sh->u.s.iteration);
+ if (sectionIndex >= numberOfSections) {
+ moreSectionsToExecute = false;
+ }
+
+ // status == 0: no more sections to execute;
+ // OMPTODO: __kmpc_end_sections could be bypassed?
+ if (!moreSectionsToExecute) {
+ kmp_int32 num_done;
+
+ num_done = test_then_inc<kmp_int32>((kmp_int32 *)(&sh->u.s.num_done));
+
+ if (num_done == th->th.th_team_nproc - 1) {
+ /* NOTE: release this buffer to be reused */
+
+ KMP_MB(); /* Flush all pending memory write invalidates. */
+
+ sh->u.s.num_done = 0;
+ sh->u.s.iteration = 0;
+
+ KMP_MB(); /* Flush all pending memory write invalidates. */
+
+ sh->buffer_index += __kmp_dispatch_num_buffers;
+ KD_TRACE(100, ("__kmpc_next_section: T#%d change buffer_index:%d\n", gtid,
+ sh->buffer_index));
+
+ KMP_MB(); /* Flush all pending memory write invalidates. */
+
+ } // if
+
+ th->th.th_dispatch->th_deo_fcn = NULL;
+ th->th.th_dispatch->th_dxo_fcn = NULL;
+ th->th.th_dispatch->th_dispatch_sh_current = NULL;
+ th->th.th_dispatch->th_dispatch_pr_current = NULL;
+
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ if (ompt_enabled.ompt_callback_dispatch) {
+ ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
+ ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
+ ompt_data_t instance = ompt_data_none;
+ instance.ptr = OMPT_GET_RETURN_ADDRESS(0);
+ ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
+ &(team_info->parallel_data), &(task_info->task_data),
+ ompt_dispatch_section, instance);
+ }
+#endif
+ KMP_POP_PARTITIONED_TIMER();
+ }
+
+ return sectionIndex;
+}
+
+/*!
+ at ingroup WORK_SHARING
+ at param loc source location information
+ at param global_tid global thread number
+
+End of "sections" construct.
+Don't need to wait here: barrier is added separately when needed.
+*/
+void __kmpc_end_sections(ident_t *loc, kmp_int32 gtid) {
+
+ kmp_info_t *th = __kmp_threads[gtid];
+ int active = !th->th.th_team->t.t_serialized;
+
+ KD_TRACE(100, ("__kmpc_end_sections: T#%d called\n", gtid));
+
+ if (!active) {
+ // In active case call finalization is done in __kmpc_next_section
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ if (ompt_enabled.ompt_callback_work) {
+ ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
+ ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
+ ompt_callbacks.ompt_callback(ompt_callback_work)(
+ ompt_work_sections, ompt_scope_end, &(team_info->parallel_data),
+ &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
+ }
+#endif
+ KMP_POP_PARTITIONED_TIMER();
+ }
+
+ KD_TRACE(100, ("__kmpc_end_sections: T#%d returned\n", gtid));
+}
+
template <typename T>
static void __kmp_dist_get_bounds(ident_t *loc, kmp_int32 gtid,
kmp_int32 *plastiter, T *plower, T *pupper,
More information about the Openmp-commits
mailing list