[Openmp-commits] [openmp] 8548e2f - [nfc][libomptarget] Move named_sync() into target_impl

via Openmp-commits openmp-commits at lists.llvm.org
Wed Oct 30 09:25:14 PDT 2019


Author: Jon Chesterfield
Date: 2019-10-30T16:25:05Z
New Revision: 8548e2f543ce74854da3ef0e3ae7d02b3e71b785

URL: https://github.com/llvm/llvm-project/commit/8548e2f543ce74854da3ef0e3ae7d02b3e71b785
DIFF: https://github.com/llvm/llvm-project/commit/8548e2f543ce74854da3ef0e3ae7d02b3e71b785.diff

LOG: [nfc][libomptarget] Move named_sync() into target_impl

Summary: [nfc][libomptarget] Move named_sync() into target_impl

Reviewers: ABataev, jdoerfert, grokos

Reviewed By: ABataev

Subscribers: openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D69487

Added: 
    

Modified: 
    openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu
    openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
    openmp/libomptarget/deviceRTLs/nvptx/src/support.h
    openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
    openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
    openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu
index 39dfebd92fef..3eff3a15bd46 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu
@@ -765,7 +765,7 @@ INLINE void syncWorkersInGenericMode(uint32_t NumThreads) {
   // is started, so we don't need a barrier.
   if (NumThreads > 1) {
 #endif
-    named_sync(L1_BARRIER, WARPSIZE * NumWarps);
+    __kmpc_impl_named_sync(L1_BARRIER, WARPSIZE * NumWarps);
 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
   }
 #endif

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
index cee3e5d6dd3b..da7204df4122 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
@@ -256,7 +256,7 @@ static int32_t nvptx_teams_reduce_nowait(int32_t global_tid, int32_t num_vars,
   // If we guard this barrier as follows it leads to deadlock, probably
   // because of a compiler bug: if (!IsGenericMode()) __syncthreads();
   uint16_t SyncWarps = (NumThreads + WARPSIZE - 1) / WARPSIZE;
-  named_sync(L1_BARRIER, SyncWarps * WARPSIZE);
+  __kmpc_impl_named_sync(L1_BARRIER, SyncWarps * WARPSIZE);
 
   // If this team is not the last, quit.
   if (/* Volatile read by all threads */ !IsLastTeam)

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/support.h b/openmp/libomptarget/deviceRTLs/nvptx/src/support.h
index e10f2a19d324..de685b894509 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/support.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/support.h
@@ -83,11 +83,6 @@ INLINE unsigned long PadBytes(unsigned long size, unsigned long alignment);
 #define SUB_BYTES(_addr, _bytes)                                               \
   ((void *)((char *)((void *)(_addr)) - (_bytes)))
 
-////////////////////////////////////////////////////////////////////////////////
-// Named Barrier Routines
-////////////////////////////////////////////////////////////////////////////////
-INLINE void named_sync(const int barrier, const int num_threads);
-
 ////////////////////////////////////////////////////////////////////////////////
 // Teams Reduction Scratchpad Helpers
 ////////////////////////////////////////////////////////////////////////////////

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h b/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
index d4da6ad73fa2..6fa857899905 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
@@ -268,17 +268,6 @@ INLINE void *SafeFree(void *ptr, const char *msg) {
   return NULL;
 }
 
-////////////////////////////////////////////////////////////////////////////////
-// Named Barrier Routines
-////////////////////////////////////////////////////////////////////////////////
-
-INLINE void named_sync(const int barrier, const int num_threads) {
-  asm volatile("bar.sync %0, %1;"
-               :
-               : "r"(barrier), "r"(num_threads)
-               : "memory");
-}
-
 ////////////////////////////////////////////////////////////////////////////////
 // Teams Reduction Scratchpad Helpers
 ////////////////////////////////////////////////////////////////////////////////

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
index 28a541901c3d..f36e877bff1c 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
@@ -61,7 +61,7 @@ EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
               "call kmpc_barrier with %d omp threads, sync parameter %d\n",
               (int)numberOfActiveOMPThreads, (int)threads);
         // Barrier #1 is for synchronization among active threads.
-        named_sync(L1_BARRIER, threads);
+        __kmpc_impl_named_sync(L1_BARRIER, threads);
       }
     } else {
       // Still need to flush the memory per the standard.
@@ -92,7 +92,7 @@ EXTERN void __kmpc_barrier_simple_generic(kmp_Ident *loc_ref, int32_t tid) {
         "%d\n",
         (int)numberOfActiveOMPThreads, (int)threads);
   // Barrier #1 is for synchronization among active threads.
-  named_sync(L1_BARRIER, threads);
+  __kmpc_impl_named_sync(L1_BARRIER, threads);
   PRINT0(LD_SYNC, "completed kmpc_barrier_simple_generic\n");
 }
 

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index bbce9f1c5119..95fe2ad3d3d5 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -153,4 +153,11 @@ INLINE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t Mask) {
 #endif // CUDA_VERSION
 }
 
+INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
+  asm volatile("bar.sync %0, %1;"
+               :
+               : "r"(barrier), "r"(num_threads)
+               : "memory");
+}
+
 #endif


        


More information about the Openmp-commits mailing list