[Openmp-commits] [openmp] r326950 - [OpenMP] Remove implicit data sharing using device shared memory from libomptarget
Gheorghe-Teodor Bercea via Openmp-commits
openmp-commits at lists.llvm.org
Wed Mar 7 14:10:10 PST 2018
Author: gbercea
Date: Wed Mar 7 14:10:10 2018
New Revision: 326950
URL: http://llvm.org/viewvc/llvm-project?rev=326950&view=rev
Log:
[OpenMP] Remove implicit data sharing using device shared memory from libomptarget
Summary:
This patch reverts the changes to libomptarget that were coupled with the changes to Clang code gen for data sharing using shared memory. A similar patch exists for Clang: D43625
Shared memory is meant to be used as an optimization on top of a more general scheme. So far we didn't have a global memory implementation ready so shared memory was a solution which applied to the current level of OpenMP complexity supported by trunk on GPU devices (due to the missing NVPTX backend patch this functionality has never been exercised). Now that we have a global memory solution this patch is "in the way" and needs to be removed (for now). This patch (or an equivalent version of it) will be put out for review once the global memory scheme is in place.
Reviewers: ABataev, grokos, carlo.bertolli, caomhin
Reviewed By: grokos
Subscribers: Hahnfeld, guansong, openmp-commits
Differential Revision: https://reviews.llvm.org/D43626
Modified:
openmp/trunk/libomptarget/deviceRTLs/nvptx/src/interface.h
openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
openmp/trunk/libomptarget/deviceRTLs/nvptx/src/option.h
openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu
Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/interface.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/interface.h?rev=326950&r1=326949&r2=326950&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/interface.h (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/interface.h Wed Mar 7 14:10:10 2018
@@ -470,9 +470,8 @@ EXTERN void __kmpc_spmd_kernel_init(int
int16_t RequiresDataSharing);
EXTERN void __kmpc_spmd_kernel_deinit();
EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn,
- void ***SharedArgs, int32_t nArgs,
int16_t IsOMPRuntimeInitialized);
-EXTERN bool __kmpc_kernel_parallel(void **WorkFn, void ***SharedArgs,
+EXTERN bool __kmpc_kernel_parallel(void **WorkFn,
int16_t IsOMPRuntimeInitialized);
EXTERN void __kmpc_kernel_end_parallel();
EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, uint32_t Mask,
Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omp_data.cu?rev=326950&r1=326949&r2=326950&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omp_data.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omp_data.cu Wed Mar 7 14:10:10 2018
@@ -46,8 +46,3 @@ __device__ __shared__ DataSharingStateTy
// Scratchpad for teams reduction.
////////////////////////////////////////////////////////////////////////////////
__device__ __shared__ void *ReductionScratchpadPtr;
-
-////////////////////////////////////////////////////////////////////////////////
-// Data sharing related variables.
-////////////////////////////////////////////////////////////////////////////////
-__device__ __shared__ omptarget_nvptx_SharedArgs omptarget_nvptx_sharedArgs;
Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu?rev=326950&r1=326949&r2=326950&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu Wed Mar 7 14:10:10 2018
@@ -54,9 +54,6 @@ EXTERN void __kmpc_kernel_init(int Threa
PRINT(LD_IO, "call to __kmpc_kernel_init with version %f\n",
OMPTARGET_NVPTX_VERSION);
- // init parallel work arguments
- omptarget_nvptx_sharedArgs.Init();
-
if (!RequiresOMPRuntime) {
// If OMP runtime is not required don't initialize OMP state.
setExecutionParameters(Generic, RuntimeUninitialized);
@@ -110,9 +107,6 @@ EXTERN void __kmpc_kernel_deinit(int16_t
}
// Done with work. Kill the workers.
omptarget_nvptx_workFn = 0;
-
- // Deinit parallel work arguments
- omptarget_nvptx_sharedArgs.DeInit();
}
EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h?rev=326950&r1=326949&r2=326950&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h Wed Mar 7 14:10:10 2018
@@ -62,46 +62,6 @@
#define __ACTIVEMASK() __ballot(1)
#endif
-// arguments needed for L0 parallelism only.
-class omptarget_nvptx_SharedArgs {
-public:
- // All these methods must be called by the master thread only.
- INLINE void Init() {
- args = buffer;
- nArgs = MAX_SHARED_ARGS;
- }
- INLINE void DeInit() {
- // Free any memory allocated for outlined parallel function with a large
- // number of arguments.
- if (nArgs > MAX_SHARED_ARGS) {
- SafeFree(args, (char *)"new extended args");
- Init();
- }
- }
- INLINE void EnsureSize(int size) {
- if (size > nArgs) {
- if (nArgs > MAX_SHARED_ARGS) {
- SafeFree(args, (char *)"new extended args");
- }
- args = (void **) SafeMalloc(size * sizeof(void *),
- (char *)"new extended args");
- nArgs = size;
- }
- }
- // Called by all threads.
- INLINE void **GetArgs() { return args; };
-private:
- // buffer of pre-allocated arguments.
- void *buffer[MAX_SHARED_ARGS];
- // pointer to arguments buffer.
- // starts off as a pointer to 'buffer' but can be dynamically allocated.
- void **args;
- // starts off as MAX_SHARED_ARGS but can increase in size.
- uint32_t nArgs;
-};
-
-extern __device__ __shared__ omptarget_nvptx_SharedArgs omptarget_nvptx_sharedArgs;
-
// Data sharing related quantities, need to match what is used in the compiler.
enum DATA_SHARING_SIZES {
// The maximum number of workers in a kernel.
Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/option.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/option.h?rev=326950&r1=326949&r2=326950&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/option.h (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/option.h Wed Mar 7 14:10:10 2018
@@ -28,10 +28,6 @@
// region to synchronize with each other.
#define L1_BARRIER (1)
-// Maximum number of preallocated arguments to an outlined parallel/simd function.
-// Anything more requires dynamic memory allocation.
-#define MAX_SHARED_ARGS 20
-
// Maximum number of omp state objects per SM allocated statically in global
// memory.
#if __CUDA_ARCH__ >= 600
Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu?rev=326950&r1=326949&r2=326950&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu Wed Mar 7 14:10:10 2018
@@ -214,16 +214,10 @@ EXTERN void __kmpc_kernel_end_convergent
//
// This routine is always called by the team master..
EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn,
- void ***SharedArgs, int32_t nArgs,
int16_t IsOMPRuntimeInitialized) {
PRINT0(LD_IO, "call to __kmpc_kernel_prepare_parallel\n");
omptarget_nvptx_workFn = WorkFn;
- if (nArgs > 0) {
- omptarget_nvptx_sharedArgs.EnsureSize(nArgs);
- *SharedArgs = omptarget_nvptx_sharedArgs.GetArgs();
- }
-
if (!IsOMPRuntimeInitialized)
return;
@@ -323,13 +317,11 @@ EXTERN void __kmpc_kernel_prepare_parall
//
// Only the worker threads call this routine.
EXTERN bool __kmpc_kernel_parallel(void **WorkFn,
- void ***SharedArgs,
int16_t IsOMPRuntimeInitialized) {
PRINT0(LD_IO | LD_PAR, "call to __kmpc_kernel_parallel\n");
// Work function and arguments for L1 parallel region.
*WorkFn = omptarget_nvptx_workFn;
- *SharedArgs = omptarget_nvptx_sharedArgs.GetArgs();
if (!IsOMPRuntimeInitialized)
return true;
More information about the Openmp-commits
mailing list