[Openmp-commits] [openmp] 0dd62c5 - [libomptarget][nfc] Move cuda threadfence functions behind kmpc_impl
Jon Chesterfield via Openmp-commits
openmp-commits at lists.llvm.org
Fri Dec 6 07:41:58 PST 2019
Author: JonChesterfield
Date: 2019-12-06T15:41:18Z
New Revision: 0dd62c5c2ec854997ca45f810175c5d1426b474e
URL: https://github.com/llvm/llvm-project/commit/0dd62c5c2ec854997ca45f810175c5d1426b474e
DIFF: https://github.com/llvm/llvm-project/commit/0dd62c5c2ec854997ca45f810175c5d1426b474e.diff
LOG: [libomptarget][nfc] Move cuda threadfence functions behind kmpc_impl
Summary:
[libomptarget][nfc] Move cuda threadfence functions behind kmpc_impl
Part of building code under common/ without requiring a cuda compiler
Reviewers: ABataev, jdoerfert, grokos
Reviewed By: ABataev
Subscribers: jvesely, jfb, openmp-commits
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D71102
Added:
Modified:
openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
openmp/libomptarget/deviceRTLs/common/src/loop.cu
openmp/libomptarget/deviceRTLs/common/src/support.cu
openmp/libomptarget/deviceRTLs/common/src/sync.cu
openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
Removed:
################################################################################
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
index 475851ac9af3..b10f34a16642 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -124,6 +124,10 @@ INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
__builtin_amdgcn_s_barrier();
}
+EXTERN void __kmpc_impl_threadfence(void);
+EXTERN void __kmpc_impl_threadfence_block(void);
+EXTERN void __kmpc_impl_threadfence_system(void);
+
// DEVICE versions of part of libc
extern "C" {
DEVICE __attribute__((noreturn)) void
diff --git a/openmp/libomptarget/deviceRTLs/common/src/loop.cu b/openmp/libomptarget/deviceRTLs/common/src/loop.cu
index 45bf8f40a929..59970a6db41c 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/loop.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/loop.cu
@@ -363,7 +363,7 @@ public:
__kmpc_barrier(loc, threadId);
if (tid == 0) {
omptarget_nvptx_threadPrivateContext->Cnt() = 0;
- __threadfence_block();
+ __kmpc_impl_threadfence_block();
}
__kmpc_barrier(loc, threadId);
PRINT(LD_LOOP,
diff --git a/openmp/libomptarget/deviceRTLs/common/src/support.cu b/openmp/libomptarget/deviceRTLs/common/src/support.cu
index 44a42e172f21..2f992f2778e3 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/support.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/support.cu
@@ -212,7 +212,7 @@ DEVICE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
if (Rank == 0) {
parallelLevel[GetWarpId()] +=
(1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0));
- __threadfence();
+ __kmpc_impl_threadfence();
}
__kmpc_impl_syncwarp(Mask);
}
@@ -224,7 +224,7 @@ DEVICE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
if (Rank == 0) {
parallelLevel[GetWarpId()] -=
(1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0));
- __threadfence();
+ __kmpc_impl_threadfence();
}
__kmpc_impl_syncwarp(Mask);
}
diff --git a/openmp/libomptarget/deviceRTLs/common/src/sync.cu b/openmp/libomptarget/deviceRTLs/common/src/sync.cu
index 0ee29bf316b3..691e3436a382 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/sync.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/sync.cu
@@ -133,7 +133,7 @@ EXTERN void __kmpc_end_single(kmp_Ident *loc, int32_t global_tid) {
EXTERN void __kmpc_flush(kmp_Ident *loc) {
PRINT0(LD_IO, "call kmpc_flush\n");
- __threadfence();
+ __kmpc_impl_threadfence();
}
////////////////////////////////////////////////////////////////////////////////
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
index 75068c7814ac..6549d76def7c 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
@@ -211,7 +211,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
}
// FIXME: Need to see the impact of doing it here.
- __threadfence_block();
+ __kmpc_impl_threadfence_block();
DSPRINT0(DSFLAG, "Exiting __kmpc_data_sharing_environment_begin\n");
@@ -289,7 +289,7 @@ EXTERN void __kmpc_data_sharing_environment_end(
}
// FIXME: Need to see the impact of doing it here.
- __threadfence_block();
+ __kmpc_impl_threadfence_block();
DSPRINT0(DSFLAG, "Exiting __kmpc_data_sharing_environment_end\n");
return;
@@ -357,7 +357,7 @@ EXTERN void __kmpc_data_sharing_init_stack_spmd() {
if (GetThreadIdInBlock() == 0)
data_sharing_init_stack_common();
- __threadfence_block();
+ __kmpc_impl_threadfence_block();
}
INLINE static void* data_sharing_push_stack_common(size_t PushSize) {
@@ -474,7 +474,7 @@ EXTERN void *__kmpc_data_sharing_push_stack(size_t DataSize,
EXTERN void __kmpc_data_sharing_pop_stack(void *FrameStart) {
ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Expected initialized runtime.");
- __threadfence_block();
+ __kmpc_impl_threadfence_block();
if (GetThreadIdInBlock() % WARPSIZE == 0) {
unsigned WID = GetWarpId();
@@ -555,7 +555,7 @@ EXTERN void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
ASSERT0(LT_FUSSY, GetThreadIdInBlock() == GetMasterThreadID(),
"Must be called only in the target master thread.");
*frame = omptarget_nvptx_simpleMemoryManager.Acquire(buf, size);
- __threadfence();
+ __kmpc_impl_threadfence();
}
EXTERN void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode,
@@ -569,7 +569,7 @@ EXTERN void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode,
}
return;
}
- __threadfence();
+ __kmpc_impl_threadfence();
ASSERT0(LT_FUSSY, GetThreadIdInBlock() == GetMasterThreadID(),
"Must be called only in the target master thread.");
omptarget_nvptx_simpleMemoryManager.Release();
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
index dfa7c4db1a66..cfccf78c377a 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
@@ -241,7 +241,7 @@ static int32_t nvptx_teams_reduce_nowait(int32_t global_tid, int32_t num_vars,
char *scratchpad = GetTeamsReductionScratchpad();
scratchFct(reduce_data, scratchpad, TeamId, NumTeams);
- __threadfence();
+ __kmpc_impl_threadfence();
// atomicInc increments 'timestamp' and has a range [0, NumTeams-1].
// It resets 'timestamp' back to 0 once the last team increments
@@ -389,7 +389,7 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_simple(kmp_Ident *loc,
EXTERN void
__kmpc_nvptx_teams_end_reduce_nowait_simple(kmp_Ident *loc, int32_t global_tid,
kmp_CriticalName *crit) {
- __threadfence_system();
+ __kmpc_impl_threadfence_system();
(void)atomicExch((uint32_t *)crit, 0);
}
@@ -446,7 +446,7 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
lgcpyFct(global_buffer, ModBockId, reduce_data);
else
lgredFct(global_buffer, ModBockId, reduce_data);
- __threadfence_system();
+ __kmpc_impl_threadfence_system();
// Increment team counter.
// This counter is incremented by all teams in the current
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index 5daeb5ca8291..fe36a46c5cdd 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -163,4 +163,8 @@ INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
: "memory");
}
+INLINE void __kmpc_impl_threadfence(void) { __threadfence(); }
+INLINE void __kmpc_impl_threadfence_block(void) { __threadfence_block(); }
+INLINE void __kmpc_impl_threadfence_system(void) { __threadfence_system(); }
+
#endif
More information about the Openmp-commits
mailing list