[Openmp-commits] [openmp] 2caeaf2 - [libomptarget][nfc] Introduce atomic wrapper function
Jon Chesterfield via Openmp-commits
openmp-commits at lists.llvm.org
Wed Dec 18 12:07:09 PST 2019
Author: Jon Chesterfield
Date: 2019-12-18T20:06:17Z
New Revision: 2caeaf2f455db468cc5a5505d90b4919ae37c915
URL: https://github.com/llvm/llvm-project/commit/2caeaf2f455db468cc5a5505d90b4919ae37c915
DIFF: https://github.com/llvm/llvm-project/commit/2caeaf2f455db468cc5a5505d90b4919ae37c915.diff
LOG: [libomptarget][nfc] Introduce atomic wrapper function
Summary:
[libomptarget][nfc] Introduce atomic wrapper function
Wraps atomic functions in a template prefixed __kmpc_atomic that
dispatches to cuda or hip atomic functions. Intended to be easily extended
to dispatch to OpenCL or C++ atomics for a third target.
Reviewers: ABataev, jdoerfert, grokos
Reviewed By: jdoerfert
Subscribers: Anastasia, jvesely, mgrang, dexonsmith, llvm-commits, mgorny, jfb, openmp-commits
Tags: #openmp, #llvm
Differential Revision: https://reviews.llvm.org/D71404
Added:
openmp/libomptarget/deviceRTLs/common/target_atomic.h
Modified:
openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
openmp/libomptarget/deviceRTLs/common/omptargeti.h
openmp/libomptarget/deviceRTLs/common/src/libcall.cu
openmp/libomptarget/deviceRTLs/common/src/loop.cu
openmp/libomptarget/deviceRTLs/common/src/reduction.cu
openmp/libomptarget/deviceRTLs/common/state-queuei.h
openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
Removed:
################################################################################
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
index ebea0a049b6e..d3df65b734df 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
@@ -76,6 +76,7 @@ set(h_files
${devicertl_base_directory}/common/omptarget.h
${devicertl_base_directory}/common/omptargeti.h
${devicertl_base_directory}/common/state-queue.h
+ ${devicertl_base_directory}/common/target_atomic.h
${devicertl_base_directory}/common/state-queuei.h
${devicertl_base_directory}/common/support.h)
diff --git a/openmp/libomptarget/deviceRTLs/common/omptargeti.h b/openmp/libomptarget/deviceRTLs/common/omptargeti.h
index b952a8dc484a..379a870bfe36 100644
--- a/openmp/libomptarget/deviceRTLs/common/omptargeti.h
+++ b/openmp/libomptarget/deviceRTLs/common/omptargeti.h
@@ -11,6 +11,8 @@
//
//===----------------------------------------------------------------------===//
+#include "common/target_atomic.h"
+
////////////////////////////////////////////////////////////////////////////////
// Task Descriptor
////////////////////////////////////////////////////////////////////////////////
@@ -207,7 +209,7 @@ INLINE void omptarget_nvptx_SimpleMemoryManager::Release() {
ASSERT0(LT_FUSSY, usedMemIdx < OMP_STATE_COUNT,
"MemIdx is too big or uninitialized.");
MemDataTy &MD = MemData[usedSlotIdx];
- atomicExch((unsigned *)&MD.keys[usedMemIdx], 0);
+ __kmpc_atomic_exchange((unsigned *)&MD.keys[usedMemIdx], 0u);
}
INLINE const void *omptarget_nvptx_SimpleMemoryManager::Acquire(const void *buf,
@@ -217,7 +219,7 @@ INLINE const void *omptarget_nvptx_SimpleMemoryManager::Acquire(const void *buf,
const unsigned sm = usedSlotIdx;
MemDataTy &MD = MemData[sm];
unsigned i = hash(GetBlockIdInKernel());
- while (atomicCAS((unsigned *)&MD.keys[i], 0, 1) != 0) {
+ while (__kmpc_atomic_cas((unsigned *)&MD.keys[i], 0u, 1u) != 0) {
i = hash(i + 1);
}
usedSlotIdx = sm;
diff --git a/openmp/libomptarget/deviceRTLs/common/src/libcall.cu b/openmp/libomptarget/deviceRTLs/common/src/libcall.cu
index 00eec92d71ef..c125d82372f7 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/libcall.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/libcall.cu
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "common/omptarget.h"
+#include "common/target_atomic.h"
#include "target_impl.h"
EXTERN double omp_get_wtick(void) {
diff --git a/openmp/libomptarget/deviceRTLs/common/src/loop.cu b/openmp/libomptarget/deviceRTLs/common/src/loop.cu
index 59970a6db41c..017af67ba1f2 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/loop.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/loop.cu
@@ -14,6 +14,7 @@
#include "common/omptarget.h"
#include "target_impl.h"
+#include "common/target_atomic.h"
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
@@ -397,9 +398,9 @@ public:
unsigned int rank = __kmpc_impl_popc(active & lane_mask_lt);
uint64_t warp_res;
if (rank == 0) {
- warp_res = atomicAdd(
+ warp_res = __kmpc_atomic_add(
(unsigned long long *)&omptarget_nvptx_threadPrivateContext->Cnt(),
- change);
+ (unsigned long long)change);
}
warp_res = Shuffle(active, warp_res, leader);
return warp_res + rank;
@@ -792,8 +793,8 @@ EXTERN void __kmpc_reduce_conditional_lastprivate(kmp_Ident *loc, int32_t gtid,
// Atomic max of iterations.
uint64_t *varArray = (uint64_t *)array;
uint64_t elem = varArray[i];
- (void)atomicMax((unsigned long long int *)Buffer,
- (unsigned long long int)elem);
+ (void)__kmpc_atomic_max((unsigned long long int *)Buffer,
+ (unsigned long long int)elem);
// Barrier.
syncWorkersInGenericMode(NumThreads);
diff --git a/openmp/libomptarget/deviceRTLs/common/src/reduction.cu b/openmp/libomptarget/deviceRTLs/common/src/reduction.cu
index 5db194866ea5..7f6ee2e39c7d 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/reduction.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/reduction.cu
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "common/omptarget.h"
+#include "common/target_atomic.h"
#include "target_impl.h"
EXTERN
@@ -242,7 +243,7 @@ static int32_t nvptx_teams_reduce_nowait(int32_t global_tid, int32_t num_vars,
// atomicInc increments 'timestamp' and has a range [0, NumTeams-1].
// It resets 'timestamp' back to 0 once the last team increments
// this counter.
- unsigned val = atomicInc(timestamp, NumTeams - 1);
+ unsigned val = __kmpc_atomic_inc(timestamp, NumTeams - 1);
IsLastTeam = val == NumTeams - 1;
}
@@ -377,7 +378,7 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_simple(kmp_Ident *loc,
if (checkSPMDMode(loc) && GetThreadIdInBlock() != 0)
return 0;
// The master thread of the team actually does the reduction.
- while (atomicCAS((uint32_t *)crit, 0, 1))
+ while (__kmpc_atomic_cas((uint32_t *)crit, 0u, 1u))
;
return 1;
}
@@ -386,7 +387,7 @@ EXTERN void
__kmpc_nvptx_teams_end_reduce_nowait_simple(kmp_Ident *loc, int32_t global_tid,
kmp_CriticalName *crit) {
__kmpc_impl_threadfence_system();
- (void)atomicExch((uint32_t *)crit, 0);
+ (void)__kmpc_atomic_exchange((uint32_t *)crit, 0u);
}
INLINE static bool isMaster(kmp_Ident *loc, uint32_t ThreadId) {
@@ -431,7 +432,7 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
bool IsMaster = isMaster(loc, ThreadId);
while (IsMaster) {
// Atomic read
- Bound = atomicAdd((uint32_t *)&IterCnt, 0);
+ Bound = __kmpc_atomic_add((uint32_t *)&IterCnt, 0u);
if (TeamId < Bound + num_of_records)
break;
}
@@ -447,7 +448,7 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
// Increment team counter.
// This counter is incremented by all teams in the current
// BUFFER_SIZE chunk.
- ChunkTeamCount = atomicInc((uint32_t *)&Cnt, num_of_records - 1);
+ ChunkTeamCount = __kmpc_atomic_inc((uint32_t *)&Cnt, num_of_records - 1u);
}
// Synchronize
if (checkSPMDMode(loc))
@@ -522,7 +523,7 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
if (IsMaster && ChunkTeamCount == num_of_records - 1) {
// Allow SIZE number of teams to proceed writing their
// intermediate results to the global buffer.
- atomicAdd((uint32_t *)&IterCnt, num_of_records);
+ __kmpc_atomic_add((uint32_t *)&IterCnt, uint32_t(num_of_records));
}
return 0;
diff --git a/openmp/libomptarget/deviceRTLs/common/state-queuei.h b/openmp/libomptarget/deviceRTLs/common/state-queuei.h
index 3c3be113e733..1bd261f2826a 100644
--- a/openmp/libomptarget/deviceRTLs/common/state-queuei.h
+++ b/openmp/libomptarget/deviceRTLs/common/state-queuei.h
@@ -1,4 +1,4 @@
-//===------- state-queue.cu - NVPTX OpenMP GPU State Queue ------- CUDA -*-===//
+//===------- state-queuei.h - OpenMP GPU State Queue ------------- CUDA -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -17,15 +17,16 @@
//===----------------------------------------------------------------------===//
#include "state-queue.h"
+#include "common/target_atomic.h"
template <typename ElementType, uint32_t SIZE>
INLINE uint32_t omptarget_nvptx_Queue<ElementType, SIZE>::ENQUEUE_TICKET() {
- return atomicAdd((unsigned int *)&tail, 1);
+ return __kmpc_atomic_add((unsigned int *)&tail, 1u);
}
template <typename ElementType, uint32_t SIZE>
INLINE uint32_t omptarget_nvptx_Queue<ElementType, SIZE>::DEQUEUE_TICKET() {
- return atomicAdd((unsigned int *)&head, 1);
+ return __kmpc_atomic_add((unsigned int *)&head, 1u);
}
template <typename ElementType, uint32_t SIZE>
@@ -37,28 +38,28 @@ omptarget_nvptx_Queue<ElementType, SIZE>::ID(uint32_t ticket) {
template <typename ElementType, uint32_t SIZE>
INLINE bool omptarget_nvptx_Queue<ElementType, SIZE>::IsServing(uint32_t slot,
uint32_t id) {
- return atomicAdd((unsigned int *)&ids[slot], 0) == id;
+ return __kmpc_atomic_add((unsigned int *)&ids[slot], 0u) == id;
}
template <typename ElementType, uint32_t SIZE>
INLINE void
omptarget_nvptx_Queue<ElementType, SIZE>::PushElement(uint32_t slot,
ElementType *element) {
- atomicExch((unsigned long long *)&elementQueue[slot],
- (unsigned long long)element);
+ __kmpc_atomic_exchange((unsigned long long *)&elementQueue[slot],
+ (unsigned long long)element);
}
template <typename ElementType, uint32_t SIZE>
INLINE ElementType *
omptarget_nvptx_Queue<ElementType, SIZE>::PopElement(uint32_t slot) {
- return (ElementType *)atomicAdd((unsigned long long *)&elementQueue[slot],
- (unsigned long long)0);
+ return (ElementType *)__kmpc_atomic_add(
+ (unsigned long long *)&elementQueue[slot], (unsigned long long)0);
}
template <typename ElementType, uint32_t SIZE>
INLINE void omptarget_nvptx_Queue<ElementType, SIZE>::DoneServing(uint32_t slot,
uint32_t id) {
- atomicExch((unsigned int *)&ids[slot], (id + 1) % MAX_ID);
+ __kmpc_atomic_exchange((unsigned int *)&ids[slot], (id + 1) % MAX_ID);
}
template <typename ElementType, uint32_t SIZE>
diff --git a/openmp/libomptarget/deviceRTLs/common/target_atomic.h b/openmp/libomptarget/deviceRTLs/common/target_atomic.h
new file mode 100644
index 000000000000..3c905d3cbbf2
--- /dev/null
+++ b/openmp/libomptarget/deviceRTLs/common/target_atomic.h
@@ -0,0 +1,38 @@
+//===---- target_atomic.h - OpenMP GPU target atomic functions ---- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Declarations of atomic functions provided by each target
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef OMPTARGET_TARGET_ATOMIC_H
+#define OMPTARGET_TARGET_ATOMIC_H
+
+#include "target_impl.h"
+
+template <typename T> INLINE T __kmpc_atomic_add(T *address, T val) {
+ return atomicAdd(address, val);
+}
+
+template <typename T> INLINE T __kmpc_atomic_inc(T *address, T val) {
+ return atomicInc(address, val);
+}
+
+template <typename T> INLINE T __kmpc_atomic_max(T *address, T val) {
+ return atomicMax(address, val);
+}
+
+template <typename T> INLINE T __kmpc_atomic_exchange(T *address, T val) {
+ return atomicExch(address, val);
+}
+
+template <typename T> INLINE T __kmpc_atomic_cas(T *address, T compare, T val) {
+ return atomicCAS(address, compare, val);
+}
+
+#endif
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
index 11f60e65173a..97a5ce34962c 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
@@ -12,10 +12,11 @@
#include "target_impl.h"
#include "common/debug.h"
+#include "common/target_atomic.h"
#define __OMP_SPIN 1000
-#define UNSET 0
-#define SET 1
+#define UNSET 0u
+#define SET 1u
EXTERN void __kmpc_impl_init_lock(omp_lock_t *lock) {
omp_unset_lock(lock);
@@ -30,7 +31,7 @@ EXTERN void __kmpc_impl_set_lock(omp_lock_t *lock) {
// (old == compare ? val : old)
// TODO: not sure spinning is a good idea here..
- while (atomicCAS(lock, UNSET, SET) != UNSET) {
+ while (__kmpc_atomic_cas(lock, UNSET, SET) != UNSET) {
clock_t start = clock();
clock_t now;
for (;;) {
@@ -44,7 +45,7 @@ EXTERN void __kmpc_impl_set_lock(omp_lock_t *lock) {
}
EXTERN void __kmpc_impl_unset_lock(omp_lock_t *lock) {
- (void)atomicExch(lock, UNSET);
+ (void)__kmpc_atomic_exchange(lock, UNSET);
}
EXTERN int __kmpc_impl_test_lock(omp_lock_t *lock) {
More information about the Openmp-commits
mailing list