[Openmp-commits] [openmp] r337691 - [OPNEMP, NVPTX] Fixed sychronization construct + code cleanup.

Alexey Bataev via Openmp-commits openmp-commits at lists.llvm.org
Mon Jul 23 06:52:12 PDT 2018


Author: abataev
Date: Mon Jul 23 06:52:12 2018
New Revision: 337691

URL: http://llvm.org/viewvc/llvm-project?rev=337691&view=rev
Log:
[OPNEMP, NVPTX] Fixed sychronization construct + code cleanup.

Summary:
1. Fixed internal problem in `__kmpc_barrier` function: SPMD mode
synchronization function should be called only in L1 parallel level.
2. Removed some extra code for synchronization inside of the code, used
`__kmpc_barrier` instead.
3. Some code cleanup.

Reviewers: gtbercea, grokos

Subscribers: openmp-commits

Differential Revision: https://reviews.llvm.org/D49564

Modified:
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/reduction.cu
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/sync.cu

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu?rev=337691&r1=337690&r2=337691&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu Mon Jul 23 06:52:12 2018
@@ -240,12 +240,8 @@ public:
 
     // Process schedule.
     if (tnum == 1 || tripCount <= 1 || OrderedSchedule(schedule)) {
-      if (OrderedSchedule(schedule)) {
-        if (isSPMDMode())
-          __syncthreads();
-        else
-          __kmpc_barrier(loc, threadId);
-      }
+      if (OrderedSchedule(schedule))
+        __kmpc_barrier(loc, threadId);
       PRINT(LD_LOOP,
             "go sequential as tnum=%ld, trip count %lld, ordered sched=%d\n",
             (long)tnum, P64(tripCount), schedule);
@@ -338,10 +334,7 @@ public:
             omptarget_nvptx_threadPrivateContext->Stride(tid));
 
     } else if (schedule == kmp_sched_dynamic || schedule == kmp_sched_guided) {
-      if (isSPMDMode())
-        __syncthreads();
-      else
-        __kmpc_barrier(loc, threadId);
+      __kmpc_barrier(loc, threadId);
       // save sched state
       int teamId = GetOmpTeamId();
       omptarget_nvptx_threadPrivateContext->ScheduleType(tid) = schedule;
@@ -352,10 +345,7 @@ public:
         omptarget_nvptx_threadPrivateContext->LoopUpperBound(teamId) = ub;
         omptarget_nvptx_threadPrivateContext->NextLowerBound(teamId) = lb;
       }
-      if (isSPMDMode())
-        __syncthreads();
-      else
-        __kmpc_barrier(loc, threadId);
+      __kmpc_barrier(loc, threadId);
       PRINT(LD_LOOP,
             "dispatch init (dyn) : num threads = %d, lb = %llu, ub = %" PRId64
             ", chunk %" PRIu64 "\n",

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/reduction.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/reduction.cu?rev=337691&r1=337690&r2=337691&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/reduction.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/reduction.cu Mon Jul 23 06:52:12 2018
@@ -25,9 +25,8 @@ int32_t __gpu_block_reduce() {
   if (nt != blockDim.x)
     return 0;
   unsigned tnum = __ACTIVEMASK();
-  if (tnum != (~0x0)) { // assume swapSize is 32
+  if (tnum != (~0x0)) // assume swapSize is 32
     return 0;
-  }
   return 1;
 }
 
@@ -48,32 +47,21 @@ int32_t __kmpc_reduce_gpu(kmp_Indent *lo
 
   if (numthread == 1)
     return 1;
-  else if (!__gpu_block_reduce())
+  if (!__gpu_block_reduce())
     return 2;
-  else {
-    if (threadIdx.x == 0)
-      return 1;
-    else
-      return 0;
-  }
+  if (threadIdx.x == 0)
+    return 1;
+  return 0;
 }
 
 EXTERN
 int32_t __kmpc_reduce_combined(kmp_Indent *loc) {
-  if (threadIdx.x == 0) {
-    return 2;
-  } else {
-    return 0;
-  }
+  return threadIdx.x == 0 ? 2 : 0;
 }
 
 EXTERN
 int32_t __kmpc_reduce_simd(kmp_Indent *loc) {
-  if (threadIdx.x % 32 == 0) {
-    return 1;
-  } else {
-    return 0;
-  }
+  return (threadIdx.x % 32 == 0) ? 1 : 0;
 }
 
 EXTERN

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h?rev=337691&r1=337690&r2=337691&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h Mon Jul 23 06:52:12 2018
@@ -155,8 +155,7 @@ INLINE int IsTeamMaster(int ompThreadId)
 INLINE int GetNumberOfProcsInDevice() {
   if (isGenericMode())
     return GetNumberOfWorkersInTeam();
-  else
-    return GetNumberOfThreadsInBlock();
+  return GetNumberOfThreadsInBlock();
 }
 
 INLINE int GetNumberOfProcsInTeam() { return GetNumberOfProcsInDevice(); }

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/sync.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/sync.cu?rev=337691&r1=337690&r2=337691&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/sync.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/sync.cu Mon Jul 23 06:52:12 2018
@@ -41,25 +41,21 @@ EXTERN int32_t __kmpc_cancel_barrier(kmp
 }
 
 EXTERN void __kmpc_barrier(kmp_Indent *loc_ref, int32_t tid) {
-  if (isSPMDMode()) {
-    __kmpc_barrier_simple_spmd(loc_ref, tid);
-  } else if (isRuntimeUninitialized()) {
-    __kmpc_barrier_simple_generic(loc_ref, tid);
+  if (isRuntimeUninitialized()) {
+    if (isSPMDMode())
+      __kmpc_barrier_simple_spmd(loc_ref, tid);
+    else
+      __kmpc_barrier_simple_generic(loc_ref, tid);
   } else {
     tid = GetLogicalThreadIdInBlock();
     omptarget_nvptx_TaskDescr *currTaskDescr =
         omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(tid);
-    if (!currTaskDescr->InL2OrHigherParallelRegion()) {
-      int numberOfActiveOMPThreads =
-          GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized());
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
-      // On Volta and newer architectures we require that all lanes in
-      // a warp (at least, all present for the kernel launch) participate in the
-      // barrier.  This is enforced when launching the parallel region.  An
-      // exception is when there are < WARPSIZE workers.  In this case only 1
-      // worker is started, so we don't need a barrier.
-      if (numberOfActiveOMPThreads > 1) {
-#endif
+    int numberOfActiveOMPThreads = GetNumberOfOmpThreads(
+        tid, isSPMDMode(), /*isRuntimeUninitialized=*/false);
+    if (numberOfActiveOMPThreads > 1) {
+      if (isSPMDMode()) {
+        __kmpc_barrier_simple_spmd(loc_ref, tid);
+      } else {
         // The #threads parameter must be rounded up to the WARPSIZE.
         int threads =
             WARPSIZE * ((numberOfActiveOMPThreads + WARPSIZE - 1) / WARPSIZE);
@@ -69,10 +65,8 @@ EXTERN void __kmpc_barrier(kmp_Indent *l
               numberOfActiveOMPThreads, threads);
         // Barrier #1 is for synchronization among active threads.
         named_sync(L1_BARRIER, threads);
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
-      } // numberOfActiveOMPThreads > 1
-#endif
-    }
+      }
+    } // numberOfActiveOMPThreads > 1
     PRINT0(LD_SYNC, "completed kmpc_barrier\n");
   }
 }




More information about the Openmp-commits mailing list