[Openmp-commits] [openmp] r350747 - [OpenMP][libomptarget] Use shared memory variable for tracking parallel level
Gheorghe-Teodor Bercea via Openmp-commits
openmp-commits at lists.llvm.org
Wed Jan 9 10:30:14 PST 2019
Author: gbercea
Date: Wed Jan 9 10:30:14 2019
New Revision: 350747
URL: http://llvm.org/viewvc/llvm-project?rev=350747&view=rev
Log:
[OpenMP][libomptarget] Use shared memory variable for tracking parallel level
Summary: Replace existing infrastructure for tracking parallel level using global memory with a per-team shared memory variable. This minimizes the impact of the overhead of tracking the parallel level for non-nested cases.
Reviewers: ABataev, caomhin
Reviewed By: ABataev
Subscribers: guansong, openmp-commits
Differential Revision: https://reviews.llvm.org/D55773
Modified:
openmp/trunk/libomptarget/deviceRTLs/nvptx/src/libcall.cu
openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu
openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h
Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/libcall.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/libcall.cu?rev=350747&r1=350746&r2=350747&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/libcall.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/libcall.cu Wed Jan 9 10:30:14 2019
@@ -165,7 +165,7 @@ EXTERN int omp_get_level(void) {
if (isRuntimeUninitialized()) {
ASSERT0(LT_FUSSY, isSPMDMode(),
"Expected SPMD mode only with uninitialized runtime.");
- return omptarget_nvptx_simpleThreadPrivateContext->GetParallelLevel();
+ return parallelLevel;
}
int level = 0;
omptarget_nvptx_TaskDescr *currTaskDescr =
Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omp_data.cu?rev=350747&r1=350746&r2=350747&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omp_data.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omp_data.cu Wed Jan 9 10:30:14 2019
@@ -27,22 +27,17 @@ __device__
omptarget_nvptx_Queue<omptarget_nvptx_ThreadPrivateContext, OMP_STATE_COUNT>
omptarget_nvptx_device_State[MAX_SM];
-__device__ omptarget_nvptx_Queue<omptarget_nvptx_SimpleThreadPrivateContext,
- OMP_STATE_COUNT>
- omptarget_nvptx_device_simpleState[MAX_SM];
-
__device__ omptarget_nvptx_SimpleMemoryManager
omptarget_nvptx_simpleMemoryManager;
__device__ __shared__ uint32_t usedMemIdx;
__device__ __shared__ uint32_t usedSlotIdx;
+__device__ __shared__ uint8_t parallelLevel;
+
// Pointer to this team's OpenMP state object
__device__ __shared__
omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext;
-__device__ __shared__ omptarget_nvptx_SimpleThreadPrivateContext
- *omptarget_nvptx_simpleThreadPrivateContext;
-
////////////////////////////////////////////////////////////////////////////////
// The team master sets the outlined parallel function in this variable to
// communicate with the workers. Since it is in shared memory, there is one
Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu?rev=350747&r1=350746&r2=350747&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu Wed Jan 9 10:30:14 2019
@@ -21,10 +21,6 @@ extern __device__
omptarget_nvptx_Queue<omptarget_nvptx_ThreadPrivateContext, OMP_STATE_COUNT>
omptarget_nvptx_device_State[MAX_SM];
-extern __device__ omptarget_nvptx_Queue<
- omptarget_nvptx_SimpleThreadPrivateContext, OMP_STATE_COUNT>
- omptarget_nvptx_device_simpleState[MAX_SM];
-
////////////////////////////////////////////////////////////////////////////////
// init entry points
////////////////////////////////////////////////////////////////////////////////
@@ -100,14 +96,10 @@ EXTERN void __kmpc_spmd_kernel_init(int
// If OMP runtime is not required don't initialize OMP state.
setExecutionParameters(Spmd, RuntimeUninitialized);
if (GetThreadIdInBlock() == 0) {
- int slot = smid() % MAX_SM;
- usedSlotIdx = slot;
- omptarget_nvptx_simpleThreadPrivateContext =
- omptarget_nvptx_device_simpleState[slot].Dequeue();
+ parallelLevel = 0;
+ usedSlotIdx = smid() % MAX_SM;
}
- // FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
__SYNCTHREADS();
- omptarget_nvptx_simpleThreadPrivateContext->Init();
return;
}
setExecutionParameters(Spmd, RuntimeInitialized);
@@ -172,18 +164,12 @@ EXTERN __attribute__((deprecated)) void
EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime) {
// We're not going to pop the task descr stack of each thread since
// there are no more parallel regions in SPMD mode.
+ if (!RequiresOMPRuntime)
+ return;
+
// FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
__SYNCTHREADS();
int threadId = GetThreadIdInBlock();
- if (!RequiresOMPRuntime) {
- if (threadId == 0) {
- // Enqueue omp state object for use by another team.
- int slot = usedSlotIdx;
- omptarget_nvptx_device_simpleState[slot].Enqueue(
- omptarget_nvptx_simpleThreadPrivateContext);
- }
- return;
- }
if (threadId == 0) {
// Enqueue omp state object for use by another team.
int slot = usedSlotIdx;
Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h?rev=350747&r1=350746&r2=350747&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h Wed Jan 9 10:30:14 2019
@@ -391,39 +391,6 @@ public:
INLINE const void *Acquire(const void *buf, size_t size);
};
-class omptarget_nvptx_SimpleThreadPrivateContext {
- uint16_t par_level[MAX_THREADS_PER_TEAM];
-
-public:
- INLINE void Init() {
- ASSERT0(LT_FUSSY, isSPMDMode() && isRuntimeUninitialized(),
- "Expected SPMD + uninitialized runtime modes.");
- par_level[GetThreadIdInBlock()] = 0;
- }
- INLINE void IncParLevel() {
- ASSERT0(LT_FUSSY, isSPMDMode() && isRuntimeUninitialized(),
- "Expected SPMD + uninitialized runtime modes.");
- ++par_level[GetThreadIdInBlock()];
- }
- INLINE void DecParLevel() {
- ASSERT0(LT_FUSSY, isSPMDMode() && isRuntimeUninitialized(),
- "Expected SPMD + uninitialized runtime modes.");
- ASSERT0(LT_FUSSY, par_level[GetThreadIdInBlock()] > 0,
- "Expected parallel level >0.");
- --par_level[GetThreadIdInBlock()];
- }
- INLINE bool InL2OrHigherParallelRegion() const {
- ASSERT0(LT_FUSSY, isSPMDMode() && isRuntimeUninitialized(),
- "Expected SPMD + uninitialized runtime modes.");
- return par_level[GetThreadIdInBlock()] > 0;
- }
- INLINE uint16_t GetParallelLevel() const {
- ASSERT0(LT_FUSSY, isSPMDMode() && isRuntimeUninitialized(),
- "Expected SPMD + uninitialized runtime modes.");
- return par_level[GetThreadIdInBlock()] + 1;
- }
-};
-
////////////////////////////////////////////////////////////////////////////////
// global device envrionment
////////////////////////////////////////////////////////////////////////////////
@@ -440,10 +407,9 @@ extern __device__ omptarget_nvptx_Simple
omptarget_nvptx_simpleMemoryManager;
extern __device__ __shared__ uint32_t usedMemIdx;
extern __device__ __shared__ uint32_t usedSlotIdx;
+extern __device__ __shared__ uint8_t parallelLevel;
extern __device__ __shared__
omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext;
-extern __device__ __shared__ omptarget_nvptx_SimpleThreadPrivateContext
- *omptarget_nvptx_simpleThreadPrivateContext;
extern __device__ __shared__ uint32_t execution_param;
extern __device__ __shared__ void *ReductionScratchpadPtr;
Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu?rev=350747&r1=350746&r2=350747&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu Wed Jan 9 10:30:14 2019
@@ -340,7 +340,11 @@ EXTERN void __kmpc_serialized_parallel(k
if (checkRuntimeUninitialized(loc)) {
ASSERT0(LT_FUSSY, checkSPMDMode(loc),
"Expected SPMD mode with uninitialized runtime.");
- omptarget_nvptx_simpleThreadPrivateContext->IncParLevel();
+ __SYNCTHREADS();
+ if (GetThreadIdInBlock() == 0)
+ ++parallelLevel;
+ __SYNCTHREADS();
+
return;
}
@@ -379,7 +383,10 @@ EXTERN void __kmpc_end_serialized_parall
if (checkRuntimeUninitialized(loc)) {
ASSERT0(LT_FUSSY, checkSPMDMode(loc),
"Expected SPMD mode with uninitialized runtime.");
- omptarget_nvptx_simpleThreadPrivateContext->DecParLevel();
+ __SYNCTHREADS();
+ if (GetThreadIdInBlock() == 0)
+ --parallelLevel;
+ __SYNCTHREADS();
return;
}
@@ -401,7 +408,7 @@ EXTERN uint16_t __kmpc_parallel_level(km
if (checkRuntimeUninitialized(loc)) {
ASSERT0(LT_FUSSY, checkSPMDMode(loc),
"Expected SPMD mode with uninitialized runtime.");
- return omptarget_nvptx_simpleThreadPrivateContext->GetParallelLevel();
+ return parallelLevel;
}
int threadId = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h?rev=350747&r1=350746&r2=350747&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h Wed Jan 9 10:30:14 2019
@@ -155,8 +155,7 @@ INLINE int GetOmpThreadId(int threadId,
ASSERT0(LT_FUSSY, isSPMDExecutionMode,
"Uninitialized runtime with non-SPMD mode.");
// For level 2 parallelism all parallel regions are executed sequentially.
- if (omptarget_nvptx_simpleThreadPrivateContext
- ->InL2OrHigherParallelRegion())
+ if (parallelLevel > 0)
rc = 0;
else
rc = GetThreadIdInBlock();
@@ -177,8 +176,7 @@ INLINE int GetNumberOfOmpThreads(int thr
ASSERT0(LT_FUSSY, isSPMDExecutionMode,
"Uninitialized runtime with non-SPMD mode.");
// For level 2 parallelism all parallel regions are executed sequentially.
- if (omptarget_nvptx_simpleThreadPrivateContext
- ->InL2OrHigherParallelRegion())
+ if (parallelLevel > 0)
rc = 1;
else
rc = GetNumberOfThreadsInBlock();
More information about the Openmp-commits
mailing list