[Openmp-commits] [openmp] r350747 - [OpenMP][libomptarget] Use shared memory variable for tracking parallel level

Gheorghe-Teodor Bercea via Openmp-commits openmp-commits at lists.llvm.org
Wed Jan 9 10:30:14 PST 2019


Author: gbercea
Date: Wed Jan  9 10:30:14 2019
New Revision: 350747

URL: http://llvm.org/viewvc/llvm-project?rev=350747&view=rev
Log:
[OpenMP][libomptarget] Use shared memory variable for tracking parallel level

Summary: Replace existing infrastructure for tracking parallel level using global memory with a per-team shared memory variable. This minimizes the impact of the overhead of tracking the parallel level for non-nested cases.

Reviewers: ABataev, caomhin

Reviewed By: ABataev

Subscribers: guansong, openmp-commits

Differential Revision: https://reviews.llvm.org/D55773

Modified:
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/libcall.cu
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/libcall.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/libcall.cu?rev=350747&r1=350746&r2=350747&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/libcall.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/libcall.cu Wed Jan  9 10:30:14 2019
@@ -165,7 +165,7 @@ EXTERN int omp_get_level(void) {
   if (isRuntimeUninitialized()) {
     ASSERT0(LT_FUSSY, isSPMDMode(),
             "Expected SPMD mode only with uninitialized runtime.");
-    return omptarget_nvptx_simpleThreadPrivateContext->GetParallelLevel();
+    return parallelLevel;
   }
   int level = 0;
   omptarget_nvptx_TaskDescr *currTaskDescr =

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omp_data.cu?rev=350747&r1=350746&r2=350747&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omp_data.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omp_data.cu Wed Jan  9 10:30:14 2019
@@ -27,22 +27,17 @@ __device__
     omptarget_nvptx_Queue<omptarget_nvptx_ThreadPrivateContext, OMP_STATE_COUNT>
         omptarget_nvptx_device_State[MAX_SM];
 
-__device__ omptarget_nvptx_Queue<omptarget_nvptx_SimpleThreadPrivateContext,
-                                 OMP_STATE_COUNT>
-    omptarget_nvptx_device_simpleState[MAX_SM];
-
 __device__ omptarget_nvptx_SimpleMemoryManager
     omptarget_nvptx_simpleMemoryManager;
 __device__ __shared__ uint32_t usedMemIdx;
 __device__ __shared__ uint32_t usedSlotIdx;
 
+__device__ __shared__ uint8_t parallelLevel;
+
 // Pointer to this team's OpenMP state object
 __device__ __shared__
     omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext;
 
-__device__ __shared__ omptarget_nvptx_SimpleThreadPrivateContext
-    *omptarget_nvptx_simpleThreadPrivateContext;
-
 ////////////////////////////////////////////////////////////////////////////////
 // The team master sets the outlined parallel function in this variable to
 // communicate with the workers.  Since it is in shared memory, there is one

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu?rev=350747&r1=350746&r2=350747&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu Wed Jan  9 10:30:14 2019
@@ -21,10 +21,6 @@ extern __device__
     omptarget_nvptx_Queue<omptarget_nvptx_ThreadPrivateContext, OMP_STATE_COUNT>
         omptarget_nvptx_device_State[MAX_SM];
 
-extern __device__ omptarget_nvptx_Queue<
-    omptarget_nvptx_SimpleThreadPrivateContext, OMP_STATE_COUNT>
-    omptarget_nvptx_device_simpleState[MAX_SM];
-
 ////////////////////////////////////////////////////////////////////////////////
 // init entry points
 ////////////////////////////////////////////////////////////////////////////////
@@ -100,14 +96,10 @@ EXTERN void __kmpc_spmd_kernel_init(int
     // If OMP runtime is not required don't initialize OMP state.
     setExecutionParameters(Spmd, RuntimeUninitialized);
     if (GetThreadIdInBlock() == 0) {
-      int slot = smid() % MAX_SM;
-      usedSlotIdx = slot;
-      omptarget_nvptx_simpleThreadPrivateContext =
-          omptarget_nvptx_device_simpleState[slot].Dequeue();
+      parallelLevel = 0;
+      usedSlotIdx = smid() % MAX_SM;
     }
-    // FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
     __SYNCTHREADS();
-    omptarget_nvptx_simpleThreadPrivateContext->Init();
     return;
   }
   setExecutionParameters(Spmd, RuntimeInitialized);
@@ -172,18 +164,12 @@ EXTERN __attribute__((deprecated)) void
 EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime) {
   // We're not going to pop the task descr stack of each thread since
   // there are no more parallel regions in SPMD mode.
+  if (!RequiresOMPRuntime)
+    return;
+
   // FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
   __SYNCTHREADS();
   int threadId = GetThreadIdInBlock();
-  if (!RequiresOMPRuntime) {
-    if (threadId == 0) {
-      // Enqueue omp state object for use by another team.
-      int slot = usedSlotIdx;
-      omptarget_nvptx_device_simpleState[slot].Enqueue(
-          omptarget_nvptx_simpleThreadPrivateContext);
-    }
-    return;
-  }
   if (threadId == 0) {
     // Enqueue omp state object for use by another team.
     int slot = usedSlotIdx;

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h?rev=350747&r1=350746&r2=350747&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h Wed Jan  9 10:30:14 2019
@@ -391,39 +391,6 @@ public:
   INLINE const void *Acquire(const void *buf, size_t size);
 };
 
-class omptarget_nvptx_SimpleThreadPrivateContext {
-  uint16_t par_level[MAX_THREADS_PER_TEAM];
-
-public:
-  INLINE void Init() {
-    ASSERT0(LT_FUSSY, isSPMDMode() && isRuntimeUninitialized(),
-            "Expected SPMD + uninitialized runtime modes.");
-    par_level[GetThreadIdInBlock()] = 0;
-  }
-  INLINE void IncParLevel() {
-    ASSERT0(LT_FUSSY, isSPMDMode() && isRuntimeUninitialized(),
-            "Expected SPMD + uninitialized runtime modes.");
-    ++par_level[GetThreadIdInBlock()];
-  }
-  INLINE void DecParLevel() {
-    ASSERT0(LT_FUSSY, isSPMDMode() && isRuntimeUninitialized(),
-            "Expected SPMD + uninitialized runtime modes.");
-    ASSERT0(LT_FUSSY, par_level[GetThreadIdInBlock()] > 0,
-            "Expected parallel level >0.");
-    --par_level[GetThreadIdInBlock()];
-  }
-  INLINE bool InL2OrHigherParallelRegion() const {
-    ASSERT0(LT_FUSSY, isSPMDMode() && isRuntimeUninitialized(),
-            "Expected SPMD + uninitialized runtime modes.");
-    return par_level[GetThreadIdInBlock()] > 0;
-  }
-  INLINE uint16_t GetParallelLevel() const {
-    ASSERT0(LT_FUSSY, isSPMDMode() && isRuntimeUninitialized(),
-            "Expected SPMD + uninitialized runtime modes.");
-    return par_level[GetThreadIdInBlock()] + 1;
-  }
-};
-
 ////////////////////////////////////////////////////////////////////////////////
 // global device envrionment
 ////////////////////////////////////////////////////////////////////////////////
@@ -440,10 +407,9 @@ extern __device__ omptarget_nvptx_Simple
     omptarget_nvptx_simpleMemoryManager;
 extern __device__ __shared__ uint32_t usedMemIdx;
 extern __device__ __shared__ uint32_t usedSlotIdx;
+extern __device__ __shared__ uint8_t parallelLevel;
 extern __device__ __shared__
     omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext;
-extern __device__ __shared__ omptarget_nvptx_SimpleThreadPrivateContext
-    *omptarget_nvptx_simpleThreadPrivateContext;
 
 extern __device__ __shared__ uint32_t execution_param;
 extern __device__ __shared__ void *ReductionScratchpadPtr;

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu?rev=350747&r1=350746&r2=350747&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu Wed Jan  9 10:30:14 2019
@@ -340,7 +340,11 @@ EXTERN void __kmpc_serialized_parallel(k
   if (checkRuntimeUninitialized(loc)) {
     ASSERT0(LT_FUSSY, checkSPMDMode(loc),
             "Expected SPMD mode with uninitialized runtime.");
-    omptarget_nvptx_simpleThreadPrivateContext->IncParLevel();
+    __SYNCTHREADS();
+    if (GetThreadIdInBlock() == 0)
+      ++parallelLevel;
+    __SYNCTHREADS();
+
     return;
   }
 
@@ -379,7 +383,10 @@ EXTERN void __kmpc_end_serialized_parall
   if (checkRuntimeUninitialized(loc)) {
     ASSERT0(LT_FUSSY, checkSPMDMode(loc),
             "Expected SPMD mode with uninitialized runtime.");
-    omptarget_nvptx_simpleThreadPrivateContext->DecParLevel();
+    __SYNCTHREADS();
+    if (GetThreadIdInBlock() == 0)
+      --parallelLevel;
+    __SYNCTHREADS();
     return;
   }
 
@@ -401,7 +408,7 @@ EXTERN uint16_t __kmpc_parallel_level(km
   if (checkRuntimeUninitialized(loc)) {
     ASSERT0(LT_FUSSY, checkSPMDMode(loc),
             "Expected SPMD mode with uninitialized runtime.");
-    return omptarget_nvptx_simpleThreadPrivateContext->GetParallelLevel();
+    return parallelLevel;
   }
 
   int threadId = GetLogicalThreadIdInBlock(checkSPMDMode(loc));

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h?rev=350747&r1=350746&r2=350747&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h Wed Jan  9 10:30:14 2019
@@ -155,8 +155,7 @@ INLINE int GetOmpThreadId(int threadId,
     ASSERT0(LT_FUSSY, isSPMDExecutionMode,
             "Uninitialized runtime with non-SPMD mode.");
     // For level 2 parallelism all parallel regions are executed sequentially.
-    if (omptarget_nvptx_simpleThreadPrivateContext
-            ->InL2OrHigherParallelRegion())
+    if (parallelLevel > 0)
       rc = 0;
     else
       rc = GetThreadIdInBlock();
@@ -177,8 +176,7 @@ INLINE int GetNumberOfOmpThreads(int thr
     ASSERT0(LT_FUSSY, isSPMDExecutionMode,
             "Uninitialized runtime with non-SPMD mode.");
     // For level 2 parallelism all parallel regions are executed sequentially.
-    if (omptarget_nvptx_simpleThreadPrivateContext
-            ->InL2OrHigherParallelRegion())
+    if (parallelLevel > 0)
       rc = 1;
     else
       rc = GetNumberOfThreadsInBlock();




More information about the Openmp-commits mailing list