[Openmp-commits] [openmp] f85c1f3 - [OpenMP] Replace __ATOMIC_XYZ with atomic::xyz for style
Johannes Doerfert via Openmp-commits
openmp-commits at lists.llvm.org
Tue Oct 4 19:45:11 PDT 2022
Author: Johannes Doerfert
Date: 2022-10-04T19:43:30-07:00
New Revision: f85c1f3b7c0bda64aef12201e2f5bbad6028582d
URL: https://github.com/llvm/llvm-project/commit/f85c1f3b7c0bda64aef12201e2f5bbad6028582d
DIFF: https://github.com/llvm/llvm-project/commit/f85c1f3b7c0bda64aef12201e2f5bbad6028582d.diff
LOG: [OpenMP] Replace __ATOMIC_XYZ with atomic::xyz for style
Also fixes one ordering argument not used.
Differential Revision: https://reviews.llvm.org/D135035
Added:
Modified:
openmp/libomptarget/DeviceRTL/include/Synchronization.h
openmp/libomptarget/DeviceRTL/src/Reduction.cpp
openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
openmp/libomptarget/DeviceRTL/src/Workshare.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/DeviceRTL/include/Synchronization.h b/openmp/libomptarget/DeviceRTL/include/Synchronization.h
index e33f37a659af..438b0221dd08 100644
--- a/openmp/libomptarget/DeviceRTL/include/Synchronization.h
+++ b/openmp/libomptarget/DeviceRTL/include/Synchronization.h
@@ -44,38 +44,46 @@ __attribute__((noinline)) void threadsAligned();
} // namespace synchronize
-namespace fence {
-
-/// Memory fence with \p Ordering semantics for the team.
-void team(int Ordering);
-
-/// Memory fence with \p Ordering semantics for the contention group.
-void kernel(int Ordering);
-
-/// Memory fence with \p Ordering semantics for the system.
-void system(int Ordering);
-
-} // namespace fence
-
namespace atomic {
+enum OrderingTy {
+ relaxed = __ATOMIC_RELAXED,
+ aquire = __ATOMIC_ACQUIRE,
+ release = __ATOMIC_RELEASE,
+ acq_rel = __ATOMIC_ACQ_REL,
+ seq_cst = __ATOMIC_SEQ_CST,
+};
+
/// Atomically load \p Addr with \p Ordering semantics.
-uint32_t load(uint32_t *Addr, int Ordering);
+uint32_t load(uint32_t *Addr, atomic::OrderingTy Ordering);
/// Atomically store \p V to \p Addr with \p Ordering semantics.
-void store(uint32_t *Addr, uint32_t V, int Ordering);
+void store(uint32_t *Addr, uint32_t V, atomic::OrderingTy Ordering);
/// Atomically increment \p *Addr and wrap at \p V with \p Ordering semantics.
-uint32_t inc(uint32_t *Addr, uint32_t V, int Ordering);
+uint32_t inc(uint32_t *Addr, uint32_t V, atomic::OrderingTy Ordering);
/// Atomically add \p V to \p *Addr with \p Ordering semantics.
-uint32_t add(uint32_t *Addr, uint32_t V, int Ordering);
+uint32_t add(uint32_t *Addr, uint32_t V, atomic::OrderingTy Ordering);
/// Atomically add \p V to \p *Addr with \p Ordering semantics.
-uint64_t add(uint64_t *Addr, uint64_t V, int Ordering);
+uint64_t add(uint64_t *Addr, uint64_t V, atomic::OrderingTy Ordering);
} // namespace atomic
+namespace fence {
+
+/// Memory fence with \p Ordering semantics for the team.
+void team(atomic::OrderingTy Ordering);
+
+/// Memory fence with \p Ordering semantics for the contention group.
+void kernel(atomic::OrderingTy Ordering);
+
+/// Memory fence with \p Ordering semantics for the system.
+void system(atomic::OrderingTy Ordering);
+
+} // namespace fence
+
} // namespace _OMP
#endif
diff --git a/openmp/libomptarget/DeviceRTL/src/Reduction.cpp b/openmp/libomptarget/DeviceRTL/src/Reduction.cpp
index e65cdd3d250a..523a4e451d18 100644
--- a/openmp/libomptarget/DeviceRTL/src/Reduction.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Reduction.cpp
@@ -211,7 +211,7 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
// to the number of slots in the buffer.
bool IsMaster = (ThreadId == 0);
while (IsMaster) {
- Bound = atomic::load(&IterCnt, __ATOMIC_SEQ_CST);
+ Bound = atomic::load(&IterCnt, atomic::seq_cst);
if (TeamId < Bound + num_of_records)
break;
}
@@ -223,12 +223,12 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
} else
lgredFct(GlobalBuffer, ModBockId, reduce_data);
- fence::system(__ATOMIC_SEQ_CST);
+ fence::system(atomic::seq_cst);
// Increment team counter.
// This counter is incremented by all teams in the current
// BUFFER_SIZE chunk.
- ChunkTeamCount = atomic::inc(&Cnt, num_of_records - 1u, __ATOMIC_SEQ_CST);
+ ChunkTeamCount = atomic::inc(&Cnt, num_of_records - 1u, atomic::seq_cst);
}
// Synchronize
if (mapping::isSPMDMode())
@@ -304,7 +304,7 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
if (IsMaster && ChunkTeamCount == num_of_records - 1) {
// Allow SIZE number of teams to proceed writing their
// intermediate results to the global buffer.
- atomic::add(&IterCnt, uint32_t(num_of_records), __ATOMIC_SEQ_CST);
+ atomic::add(&IterCnt, uint32_t(num_of_records), atomic::seq_cst);
}
return 0;
diff --git a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
index 43278715be8d..a1556410d5ff 100644
--- a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
@@ -29,47 +29,52 @@ namespace impl {
///
///{
/// NOTE: This function needs to be implemented by every target.
-uint32_t atomicInc(uint32_t *Address, uint32_t Val, int Ordering);
+uint32_t atomicInc(uint32_t *Address, uint32_t Val,
+ atomic::OrderingTy Ordering);
-uint32_t atomicLoad(uint32_t *Address, int Ordering) {
- return __atomic_fetch_add(Address, 0U, __ATOMIC_SEQ_CST);
+uint32_t atomicLoad(uint32_t *Address, atomic::OrderingTy Ordering) {
+ return __atomic_fetch_add(Address, 0U, Ordering);
}
-void atomicStore(uint32_t *Address, uint32_t Val, int Ordering) {
+void atomicStore(uint32_t *Address, uint32_t Val, atomic::OrderingTy Ordering) {
__atomic_store_n(Address, Val, Ordering);
}
-uint32_t atomicAdd(uint32_t *Address, uint32_t Val, int Ordering) {
+uint32_t atomicAdd(uint32_t *Address, uint32_t Val,
+ atomic::OrderingTy Ordering) {
return __atomic_fetch_add(Address, Val, Ordering);
}
-uint32_t atomicMax(uint32_t *Address, uint32_t Val, int Ordering) {
+uint32_t atomicMax(uint32_t *Address, uint32_t Val,
+ atomic::OrderingTy Ordering) {
return __atomic_fetch_max(Address, Val, Ordering);
}
-uint32_t atomicExchange(uint32_t *Address, uint32_t Val, int Ordering) {
+uint32_t atomicExchange(uint32_t *Address, uint32_t Val,
+ atomic::OrderingTy Ordering) {
uint32_t R;
__atomic_exchange(Address, &Val, &R, Ordering);
return R;
}
uint32_t atomicCAS(uint32_t *Address, uint32_t Compare, uint32_t Val,
- int Ordering) {
+ atomic::OrderingTy Ordering) {
(void)__atomic_compare_exchange(Address, &Compare, &Val, false, Ordering,
Ordering);
return Compare;
}
-uint64_t atomicAdd(uint64_t *Address, uint64_t Val, int Ordering) {
+uint64_t atomicAdd(uint64_t *Address, uint64_t Val,
+ atomic::OrderingTy Ordering) {
return __atomic_fetch_add(Address, Val, Ordering);
}
///}
// Forward declarations defined to be defined for AMDGCN and NVPTX.
-uint32_t atomicInc(uint32_t *A, uint32_t V, int Ordering);
+uint32_t atomicInc(uint32_t *A, uint32_t V, atomic::OrderingTy Ordering);
void namedBarrierInit();
void namedBarrier();
-void fenceTeam(int Ordering);
-void fenceKernel(int Ordering);
-void fenceSystem(int Ordering);
+void fenceTeam(atomic::OrderingTy Ordering);
+void fenceKernel(atomic::OrderingTy Ordering);
+void fenceSystem(atomic::OrderingTy Ordering);
void syncWarp(__kmpc_impl_lanemask_t);
void syncThreads();
void syncThreadsAligned() { syncThreads(); }
@@ -84,22 +89,22 @@ void setLock(omp_lock_t *);
///{
#pragma omp begin declare variant match(device = {arch(amdgcn)})
-uint32_t atomicInc(uint32_t *A, uint32_t V, int Ordering) {
+uint32_t atomicInc(uint32_t *A, uint32_t V, atomic::OrderingTy Ordering) {
// builtin_amdgcn_atomic_inc32 should expand to this switch when
// passed a runtime value, but does not do so yet. Workaround here.
switch (Ordering) {
default:
__builtin_unreachable();
- case __ATOMIC_RELAXED:
- return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_RELAXED, "");
- case __ATOMIC_ACQUIRE:
- return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_ACQUIRE, "");
- case __ATOMIC_RELEASE:
- return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_RELEASE, "");
- case __ATOMIC_ACQ_REL:
- return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_ACQ_REL, "");
- case __ATOMIC_SEQ_CST:
- return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_SEQ_CST, "");
+ case atomic::relaxed:
+ return __builtin_amdgcn_atomic_inc32(A, V, atomic::relaxed, "");
+ case atomic::aquire:
+ return __builtin_amdgcn_atomic_inc32(A, V, atomic::aquire, "");
+ case atomic::release:
+ return __builtin_amdgcn_atomic_inc32(A, V, atomic::release, "");
+ case atomic::acq_rel:
+ return __builtin_amdgcn_atomic_inc32(A, V, atomic::acq_rel, "");
+ case atomic::seq_cst:
+ return __builtin_amdgcn_atomic_inc32(A, V, atomic::seq_cst, "");
}
}
@@ -107,7 +112,7 @@ uint32_t SHARED(namedBarrierTracker);
void namedBarrierInit() {
// Don't have global ctors, and shared memory is not zero init
- atomic::store(&namedBarrierTracker, 0u, __ATOMIC_RELEASE);
+ atomic::store(&namedBarrierTracker, 0u, atomic::release);
}
void namedBarrier() {
@@ -117,7 +122,7 @@ void namedBarrier() {
uint32_t WarpSize = mapping::getWarpSize();
uint32_t NumWaves = NumThreads / WarpSize;
- fence::team(__ATOMIC_ACQUIRE);
+ fence::team(atomic::aquire);
// named barrier implementation for amdgcn.
// Uses two 16 bit unsigned counters. One for the number of waves to have
@@ -133,7 +138,7 @@ void namedBarrier() {
// Increment the low 16 bits once, using the lowest active thread.
if (mapping::isLeaderInWarp()) {
uint32_t load = atomic::add(&namedBarrierTracker, 1,
- __ATOMIC_RELAXED); // commutative
+ atomic::relaxed); // commutative
// Record the number of times the barrier has been passed
uint32_t generation = load & 0xffff0000u;
@@ -145,61 +150,61 @@ void namedBarrier() {
load &= 0xffff0000u; // because bits zeroed second
// Reset the wave counter and release the waiting waves
- atomic::store(&namedBarrierTracker, load, __ATOMIC_RELAXED);
+ atomic::store(&namedBarrierTracker, load, atomic::relaxed);
} else {
// more waves still to go, spin until generation counter changes
do {
__builtin_amdgcn_s_sleep(0);
- load = atomic::load(&namedBarrierTracker, __ATOMIC_RELAXED);
+ load = atomic::load(&namedBarrierTracker, atomic::relaxed);
} while ((load & 0xffff0000u) == generation);
}
}
- fence::team(__ATOMIC_RELEASE);
+ fence::team(atomic::release);
}
// sema checking of amdgcn_fence is aggressive. Intention is to patch clang
// so that it is usable within a template environment and so that a runtime
// value of the memory order is expanded to this switch within clang/llvm.
-void fenceTeam(int Ordering) {
+void fenceTeam(atomic::OrderingTy Ordering) {
switch (Ordering) {
default:
__builtin_unreachable();
- case __ATOMIC_ACQUIRE:
- return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup");
- case __ATOMIC_RELEASE:
- return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "workgroup");
- case __ATOMIC_ACQ_REL:
- return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "workgroup");
- case __ATOMIC_SEQ_CST:
- return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
+ case atomic::aquire:
+ return __builtin_amdgcn_fence(atomic::aquire, "workgroup");
+ case atomic::release:
+ return __builtin_amdgcn_fence(atomic::release, "workgroup");
+ case atomic::acq_rel:
+ return __builtin_amdgcn_fence(atomic::acq_rel, "workgroup");
+ case atomic::seq_cst:
+ return __builtin_amdgcn_fence(atomic::seq_cst, "workgroup");
}
}
-void fenceKernel(int Ordering) {
+void fenceKernel(atomic::OrderingTy Ordering) {
switch (Ordering) {
default:
__builtin_unreachable();
- case __ATOMIC_ACQUIRE:
- return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
- case __ATOMIC_RELEASE:
- return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "agent");
- case __ATOMIC_ACQ_REL:
- return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "agent");
- case __ATOMIC_SEQ_CST:
- return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent");
+ case atomic::aquire:
+ return __builtin_amdgcn_fence(atomic::aquire, "agent");
+ case atomic::release:
+ return __builtin_amdgcn_fence(atomic::release, "agent");
+ case atomic::acq_rel:
+ return __builtin_amdgcn_fence(atomic::acq_rel, "agent");
+ case atomic::seq_cst:
+ return __builtin_amdgcn_fence(atomic::seq_cst, "agent");
}
}
-void fenceSystem(int Ordering) {
+void fenceSystem(atomic::OrderingTy Ordering) {
switch (Ordering) {
default:
__builtin_unreachable();
- case __ATOMIC_ACQUIRE:
- return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "");
- case __ATOMIC_RELEASE:
- return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "");
- case __ATOMIC_ACQ_REL:
- return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "");
- case __ATOMIC_SEQ_CST:
- return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
+ case atomic::aquire:
+ return __builtin_amdgcn_fence(atomic::aquire, "");
+ case atomic::release:
+ return __builtin_amdgcn_fence(atomic::release, "");
+ case atomic::acq_rel:
+ return __builtin_amdgcn_fence(atomic::acq_rel, "");
+ case atomic::seq_cst:
+ return __builtin_amdgcn_fence(atomic::seq_cst, "");
}
}
@@ -226,7 +231,8 @@ void setLock(omp_lock_t *) { __builtin_trap(); }
#pragma omp begin declare variant match( \
device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
-uint32_t atomicInc(uint32_t *Address, uint32_t Val, int Ordering) {
+uint32_t atomicInc(uint32_t *Address, uint32_t Val,
+ atomic::OrderingTy Ordering) {
return __nvvm_atom_inc_gen_ui(Address, Val);
}
@@ -268,11 +274,11 @@ constexpr uint32_t SET = 1;
// called before it is defined
// here the overload won't happen. Investigate lalter!
void unsetLock(omp_lock_t *Lock) {
- (void)atomicExchange((uint32_t *)Lock, UNSET, __ATOMIC_SEQ_CST);
+ (void)atomicExchange((uint32_t *)Lock, UNSET, atomic::seq_cst);
}
int testLock(omp_lock_t *Lock) {
- return atomicAdd((uint32_t *)Lock, 0u, __ATOMIC_SEQ_CST);
+ return atomicAdd((uint32_t *)Lock, 0u, atomic::seq_cst);
}
void initLock(omp_lock_t *Lock) { unsetLock(Lock); }
@@ -281,7 +287,7 @@ void destroyLock(omp_lock_t *Lock) { unsetLock(Lock); }
void setLock(omp_lock_t *Lock) {
// TODO: not sure spinning is a good idea here..
- while (atomicCAS((uint32_t *)Lock, UNSET, SET, __ATOMIC_SEQ_CST) != UNSET) {
+ while (atomicCAS((uint32_t *)Lock, UNSET, SET, atomic::seq_cst) != UNSET) {
int32_t start = __nvvm_read_ptx_sreg_clock();
int32_t now;
for (;;) {
@@ -310,29 +316,29 @@ void synchronize::threads() { impl::syncThreads(); }
void synchronize::threadsAligned() { impl::syncThreadsAligned(); }
-void fence::team(int Ordering) { impl::fenceTeam(Ordering); }
+void fence::team(atomic::OrderingTy Ordering) { impl::fenceTeam(Ordering); }
-void fence::kernel(int Ordering) { impl::fenceKernel(Ordering); }
+void fence::kernel(atomic::OrderingTy Ordering) { impl::fenceKernel(Ordering); }
-void fence::system(int Ordering) { impl::fenceSystem(Ordering); }
+void fence::system(atomic::OrderingTy Ordering) { impl::fenceSystem(Ordering); }
-uint32_t atomic::load(uint32_t *Addr, int Ordering) {
+uint32_t atomic::load(uint32_t *Addr, atomic::OrderingTy Ordering) {
return impl::atomicLoad(Addr, Ordering);
}
-void atomic::store(uint32_t *Addr, uint32_t V, int Ordering) {
+void atomic::store(uint32_t *Addr, uint32_t V, atomic::OrderingTy Ordering) {
impl::atomicStore(Addr, V, Ordering);
}
-uint32_t atomic::inc(uint32_t *Addr, uint32_t V, int Ordering) {
+uint32_t atomic::inc(uint32_t *Addr, uint32_t V, atomic::OrderingTy Ordering) {
return impl::atomicInc(Addr, V, Ordering);
}
-uint32_t atomic::add(uint32_t *Addr, uint32_t V, int Ordering) {
+uint32_t atomic::add(uint32_t *Addr, uint32_t V, atomic::OrderingTy Ordering) {
return impl::atomicAdd(Addr, V, Ordering);
}
-uint64_t atomic::add(uint64_t *Addr, uint64_t V, int Ordering) {
+uint64_t atomic::add(uint64_t *Addr, uint64_t V, atomic::OrderingTy Ordering) {
return impl::atomicAdd(Addr, V, Ordering);
}
@@ -389,7 +395,7 @@ void __kmpc_end_single(IdentTy *Loc, int32_t TId) {
void __kmpc_flush(IdentTy *Loc) {
FunctionTracingRAII();
- fence::kernel(__ATOMIC_SEQ_CST);
+ fence::kernel(atomic::seq_cst);
}
uint64_t __kmpc_warp_active_thread_mask(void) {
diff --git a/openmp/libomptarget/DeviceRTL/src/Workshare.cpp b/openmp/libomptarget/DeviceRTL/src/Workshare.cpp
index d168f219c987..ddcb41ca7cf7 100644
--- a/openmp/libomptarget/DeviceRTL/src/Workshare.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Workshare.cpp
@@ -329,7 +329,7 @@ template <typename T, typename ST> struct omptarget_nvptx_LoopSupport {
__kmpc_barrier(loc, threadId);
if (tid == 0) {
Cnt = 0;
- fence::team(__ATOMIC_SEQ_CST);
+ fence::team(atomic::seq_cst);
}
__kmpc_barrier(loc, threadId);
}
@@ -346,7 +346,7 @@ template <typename T, typename ST> struct omptarget_nvptx_LoopSupport {
unsigned int rank = utils::popc(active & lane_mask_lt);
uint64_t warp_res = 0;
if (rank == 0) {
- warp_res = atomic::add(&Cnt, change, __ATOMIC_SEQ_CST);
+ warp_res = atomic::add(&Cnt, change, atomic::seq_cst);
}
warp_res = utils::shuffle(active, warp_res, leader);
return warp_res + rank;
More information about the Openmp-commits
mailing list