[Openmp-commits] [openmp] 0dd62c5 - [libomptarget][nfc] Move cuda threadfence functions behind kmpc_impl

Jon Chesterfield via Openmp-commits openmp-commits at lists.llvm.org
Fri Dec 6 07:41:58 PST 2019


Author: JonChesterfield
Date: 2019-12-06T15:41:18Z
New Revision: 0dd62c5c2ec854997ca45f810175c5d1426b474e

URL: https://github.com/llvm/llvm-project/commit/0dd62c5c2ec854997ca45f810175c5d1426b474e
DIFF: https://github.com/llvm/llvm-project/commit/0dd62c5c2ec854997ca45f810175c5d1426b474e.diff

LOG: [libomptarget][nfc] Move cuda threadfence functions behind kmpc_impl

Summary:
[libomptarget][nfc] Move cuda threadfence functions behind kmpc_impl

Part of building code under common/ without requiring a cuda compiler

Reviewers: ABataev, jdoerfert, grokos

Reviewed By: ABataev

Subscribers: jvesely, jfb, openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D71102

Added: 
    

Modified: 
    openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
    openmp/libomptarget/deviceRTLs/common/src/loop.cu
    openmp/libomptarget/deviceRTLs/common/src/support.cu
    openmp/libomptarget/deviceRTLs/common/src/sync.cu
    openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
    openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
    openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
index 475851ac9af3..b10f34a16642 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -124,6 +124,10 @@ INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
   __builtin_amdgcn_s_barrier();
 }
 
+EXTERN void __kmpc_impl_threadfence(void);
+EXTERN void __kmpc_impl_threadfence_block(void);
+EXTERN void __kmpc_impl_threadfence_system(void);
+
 // DEVICE versions of part of libc
 extern "C" {
 DEVICE __attribute__((noreturn)) void

diff  --git a/openmp/libomptarget/deviceRTLs/common/src/loop.cu b/openmp/libomptarget/deviceRTLs/common/src/loop.cu
index 45bf8f40a929..59970a6db41c 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/loop.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/loop.cu
@@ -363,7 +363,7 @@ public:
       __kmpc_barrier(loc, threadId);
       if (tid == 0) {
         omptarget_nvptx_threadPrivateContext->Cnt() = 0;
-        __threadfence_block();
+        __kmpc_impl_threadfence_block();
       }
       __kmpc_barrier(loc, threadId);
       PRINT(LD_LOOP,

diff  --git a/openmp/libomptarget/deviceRTLs/common/src/support.cu b/openmp/libomptarget/deviceRTLs/common/src/support.cu
index 44a42e172f21..2f992f2778e3 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/support.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/support.cu
@@ -212,7 +212,7 @@ DEVICE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
   if (Rank == 0) {
     parallelLevel[GetWarpId()] +=
         (1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0));
-    __threadfence();
+    __kmpc_impl_threadfence();
   }
   __kmpc_impl_syncwarp(Mask);
 }
@@ -224,7 +224,7 @@ DEVICE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
   if (Rank == 0) {
     parallelLevel[GetWarpId()] -=
         (1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0));
-    __threadfence();
+    __kmpc_impl_threadfence();
   }
   __kmpc_impl_syncwarp(Mask);
 }

diff  --git a/openmp/libomptarget/deviceRTLs/common/src/sync.cu b/openmp/libomptarget/deviceRTLs/common/src/sync.cu
index 0ee29bf316b3..691e3436a382 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/sync.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/sync.cu
@@ -133,7 +133,7 @@ EXTERN void __kmpc_end_single(kmp_Ident *loc, int32_t global_tid) {
 
 EXTERN void __kmpc_flush(kmp_Ident *loc) {
   PRINT0(LD_IO, "call kmpc_flush\n");
-  __threadfence();
+  __kmpc_impl_threadfence();
 }
 
 ////////////////////////////////////////////////////////////////////////////////

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
index 75068c7814ac..6549d76def7c 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
@@ -211,7 +211,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
   }
 
   // FIXME: Need to see the impact of doing it here.
-  __threadfence_block();
+  __kmpc_impl_threadfence_block();
 
   DSPRINT0(DSFLAG, "Exiting __kmpc_data_sharing_environment_begin\n");
 
@@ -289,7 +289,7 @@ EXTERN void __kmpc_data_sharing_environment_end(
   }
 
   // FIXME: Need to see the impact of doing it here.
-  __threadfence_block();
+  __kmpc_impl_threadfence_block();
 
   DSPRINT0(DSFLAG, "Exiting __kmpc_data_sharing_environment_end\n");
   return;
@@ -357,7 +357,7 @@ EXTERN void __kmpc_data_sharing_init_stack_spmd() {
   if (GetThreadIdInBlock() == 0)
     data_sharing_init_stack_common();
 
-  __threadfence_block();
+  __kmpc_impl_threadfence_block();
 }
 
 INLINE static void* data_sharing_push_stack_common(size_t PushSize) {
@@ -474,7 +474,7 @@ EXTERN void *__kmpc_data_sharing_push_stack(size_t DataSize,
 EXTERN void __kmpc_data_sharing_pop_stack(void *FrameStart) {
   ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Expected initialized runtime.");
 
-  __threadfence_block();
+  __kmpc_impl_threadfence_block();
 
   if (GetThreadIdInBlock() % WARPSIZE == 0) {
     unsigned WID = GetWarpId();
@@ -555,7 +555,7 @@ EXTERN void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
   ASSERT0(LT_FUSSY, GetThreadIdInBlock() == GetMasterThreadID(),
           "Must be called only in the target master thread.");
   *frame = omptarget_nvptx_simpleMemoryManager.Acquire(buf, size);
-  __threadfence();
+  __kmpc_impl_threadfence();
 }
 
 EXTERN void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode,
@@ -569,7 +569,7 @@ EXTERN void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode,
     }
     return;
   }
-  __threadfence();
+  __kmpc_impl_threadfence();
   ASSERT0(LT_FUSSY, GetThreadIdInBlock() == GetMasterThreadID(),
           "Must be called only in the target master thread.");
   omptarget_nvptx_simpleMemoryManager.Release();

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
index dfa7c4db1a66..cfccf78c377a 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
@@ -241,7 +241,7 @@ static int32_t nvptx_teams_reduce_nowait(int32_t global_tid, int32_t num_vars,
     char *scratchpad = GetTeamsReductionScratchpad();
 
     scratchFct(reduce_data, scratchpad, TeamId, NumTeams);
-    __threadfence();
+    __kmpc_impl_threadfence();
 
     // atomicInc increments 'timestamp' and has a range [0, NumTeams-1].
     // It resets 'timestamp' back to 0 once the last team increments
@@ -389,7 +389,7 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_simple(kmp_Ident *loc,
 EXTERN void
 __kmpc_nvptx_teams_end_reduce_nowait_simple(kmp_Ident *loc, int32_t global_tid,
                                             kmp_CriticalName *crit) {
-  __threadfence_system();
+  __kmpc_impl_threadfence_system();
   (void)atomicExch((uint32_t *)crit, 0);
 }
 
@@ -446,7 +446,7 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
       lgcpyFct(global_buffer, ModBockId, reduce_data);
     else
       lgredFct(global_buffer, ModBockId, reduce_data);
-    __threadfence_system();
+    __kmpc_impl_threadfence_system();
 
     // Increment team counter.
     // This counter is incremented by all teams in the current

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index 5daeb5ca8291..fe36a46c5cdd 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -163,4 +163,8 @@ INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
                : "memory");
 }
 
+INLINE void __kmpc_impl_threadfence(void) { __threadfence(); }
+INLINE void __kmpc_impl_threadfence_block(void) { __threadfence_block(); }
+INLINE void __kmpc_impl_threadfence_system(void) { __threadfence_system(); }
+
 #endif


        


More information about the Openmp-commits mailing list