[Openmp-commits] [openmp] 1ab1f04 - [OpenMP] Simplify variable sharing and increase shared memory size
Johannes Doerfert via Openmp-commits
openmp-commits at lists.llvm.org
Sun Jul 11 17:18:35 PDT 2021
Author: Johannes Doerfert
Date: 2021-07-11T19:18:03-05:00
New Revision: 1ab1f04a2be34bea2fb34df0f5ff0bd75bdc7aa0
URL: https://github.com/llvm/llvm-project/commit/1ab1f04a2be34bea2fb34df0f5ff0bd75bdc7aa0
DIFF: https://github.com/llvm/llvm-project/commit/1ab1f04a2be34bea2fb34df0f5ff0bd75bdc7aa0.diff
LOG: [OpenMP] Simplify variable sharing and increase shared memory size
In order to avoid malloc/free, up to NUM_SHARED_VARIABLES_IN_SHARED_MEM
(=64) variables are communicated in dedicated shared memory instead. The
simplification does avoid the need for an "init" and requires "deinit"
only if we ever communicate more than NUM_SHARED_VARIABLES_IN_SHARED_MEM
variables.
Differential Revision: https://reviews.llvm.org/D105767
Added:
Modified:
openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
openmp/libomptarget/deviceRTLs/common/omptarget.h
openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu
openmp/libomptarget/deviceRTLs/common/src/omp_data.cu
openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
Removed:
################################################################################
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
index 6af40a7d507c..615335df5488 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -40,10 +40,6 @@
#define WARPSIZE 64
-// Maximum number of preallocated arguments to an outlined parallel/simd
-// function. Anything more requires dynamic memory allocation.
-#define MAX_SHARED_ARGS 20
-
// Maximum number of omp state objects per SM allocated statically in global
// memory.
#define OMP_STATE_COUNT 32
diff --git a/openmp/libomptarget/deviceRTLs/common/omptarget.h b/openmp/libomptarget/deviceRTLs/common/omptarget.h
index 47d2848ec7b0..d8ea6a396697 100644
--- a/openmp/libomptarget/deviceRTLs/common/omptarget.h
+++ b/openmp/libomptarget/deviceRTLs/common/omptarget.h
@@ -35,46 +35,6 @@
#define BARRIER_COUNTER 0
#define ORDERED_COUNTER 1
-// arguments needed for L0 parallelism only.
-class omptarget_nvptx_SharedArgs {
-public:
- // All these methods must be called by the master thread only.
- INLINE void Init() {
- args = buffer;
- nArgs = MAX_SHARED_ARGS;
- }
- INLINE void DeInit() {
- // Free any memory allocated for outlined parallel function with a large
- // number of arguments.
- if (nArgs > MAX_SHARED_ARGS) {
- SafeFree(args, "new extended args");
- Init();
- }
- }
- INLINE void EnsureSize(size_t size) {
- if (size > nArgs) {
- if (nArgs > MAX_SHARED_ARGS) {
- SafeFree(args, "new extended args");
- }
- args = (void **)SafeMalloc(size * sizeof(void *), "new extended args");
- nArgs = size;
- }
- }
- // Called by all threads.
- INLINE void **GetArgs() const { return args; };
-
-private:
- // buffer of pre-allocated arguments.
- void *buffer[MAX_SHARED_ARGS];
- // pointer to arguments buffer.
- // starts off as a pointer to 'buffer' but can be dynamically allocated.
- void **args;
- // starts off as MAX_SHARED_ARGS but can increase in size.
- uint32_t nArgs;
-};
-
-extern omptarget_nvptx_SharedArgs EXTERN_SHARED(omptarget_nvptx_globalArgs);
-
// Worker slot type which is initialized with the default worker slot
// size of 4*32 bytes.
struct __kmpc_data_sharing_slot {
diff --git a/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu
index 3b95ca88aad2..65db27b63bdf 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu
@@ -135,14 +135,32 @@ EXTERN void __kmpc_data_sharing_init_stack() {
}
}
+/// Allocate storage in shared memory to communicate arguments from the main
+/// thread to the workers in generic mode. If we exceed
+/// NUM_SHARED_VARIABLES_IN_SHARED_MEM we will malloc space for communication.
+#define NUM_SHARED_VARIABLES_IN_SHARED_MEM 64
+
+[[clang::loader_uninitialized]] static void
+ *SharedMemVariableSharingSpace[NUM_SHARED_VARIABLES_IN_SHARED_MEM];
+#pragma omp allocate(SharedMemVariableSharingSpace) \
+ allocator(omp_pteam_mem_alloc)
+[[clang::loader_uninitialized]] static void **SharedMemVariableSharingSpacePtr;
+#pragma omp allocate(SharedMemVariableSharingSpacePtr) \
+ allocator(omp_pteam_mem_alloc)
+
// Begin a data sharing context. Maintain a list of references to shared
// variables. This list of references to shared variables will be passed
// to one or more threads.
// In L0 data sharing this is called by master thread.
// In L1 data sharing this is called by active warp master thread.
EXTERN void __kmpc_begin_sharing_variables(void ***GlobalArgs, size_t nArgs) {
- omptarget_nvptx_globalArgs.EnsureSize(nArgs);
- *GlobalArgs = omptarget_nvptx_globalArgs.GetArgs();
+ if (nArgs <= NUM_SHARED_VARIABLES_IN_SHARED_MEM) {
+ SharedMemVariableSharingSpacePtr = &SharedMemVariableSharingSpace[0];
+ } else {
+ SharedMemVariableSharingSpacePtr =
+ (void **)SafeMalloc(nArgs * sizeof(void *), "new extended args");
+ }
+ *GlobalArgs = SharedMemVariableSharingSpacePtr;
}
// End a data sharing context. There is no need to have a list of refs
@@ -152,7 +170,8 @@ EXTERN void __kmpc_begin_sharing_variables(void ***GlobalArgs, size_t nArgs) {
// In L0 data sharing this is called by master thread.
// In L1 data sharing this is called by active warp master thread.
EXTERN void __kmpc_end_sharing_variables() {
- omptarget_nvptx_globalArgs.DeInit();
+ if (SharedMemVariableSharingSpacePtr != &SharedMemVariableSharingSpace[0])
+ SafeFree(SharedMemVariableSharingSpacePtr, "new extended args");
}
// This function will return a list of references to global variables. This
@@ -161,7 +180,7 @@ EXTERN void __kmpc_end_sharing_variables() {
// preserving the order.
// Called by all workers.
EXTERN void __kmpc_get_shared_variables(void ***GlobalArgs) {
- *GlobalArgs = omptarget_nvptx_globalArgs.GetArgs();
+ *GlobalArgs = SharedMemVariableSharingSpacePtr;
}
// This function is used to init static memory manager. This manager is used to
diff --git a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu
index d46571f650f4..876ec7bb3a43 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu
@@ -62,9 +62,4 @@ uint32_t SHARED(execution_param);
////////////////////////////////////////////////////////////////////////////////
void *SHARED(ReductionScratchpadPtr);
-////////////////////////////////////////////////////////////////////////////////
-// Data sharing related variables.
-////////////////////////////////////////////////////////////////////////////////
-omptarget_nvptx_SharedArgs SHARED(omptarget_nvptx_globalArgs);
-
#pragma omp end declare target
diff --git a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
index 34af243fab54..2a5cc312376a 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
@@ -68,8 +68,6 @@ static void __kmpc_generic_kernel_init() {
nThreads = GetNumberOfWorkersInTeam();
threadLimit = nThreads;
- omptarget_nvptx_globalArgs.Init();
-
__kmpc_data_sharing_init_stack();
__kmpc_impl_target_init();
}
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index d0d7127aac7d..9e69f6016ea5 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -33,10 +33,6 @@
#define WARPSIZE 32
-// Maximum number of preallocated arguments to an outlined parallel/simd
-// function. Anything more requires dynamic memory allocation.
-#define MAX_SHARED_ARGS 20
-
// Maximum number of omp state objects per SM allocated statically in global
// memory.
#if __CUDA_ARCH__ >= 600
More information about the Openmp-commits
mailing list