[Openmp-commits] [PATCH] D102016: [libomptarget][nfc] Refactor amdgpu partial barrier to simplify adding a second one
Jon Chesterfield via Phabricator via Openmp-commits
openmp-commits at lists.llvm.org
Thu May 6 12:43:55 PDT 2021
JonChesterfield created this revision.
JonChesterfield added a reviewer: jdoerfert.
Herald added subscribers: jfb, t-tye, tpr, dstuttard, yaxunl, jvesely, kzhuravl.
JonChesterfield requested review of this revision.
Herald added subscribers: openmp-commits, sstefan1, wdng.
Herald added a project: OpenMP.
[libomptarget][nfc] Refactor amdgpu partial barrier to simplify adding a second one
D101976 <https://reviews.llvm.org/D101976> would require a second barrier instance. This NFC to amdgpu makes it
simpler to add one (an extra global, one more line in init). Also renames the
current barrier to L0.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D102016
Files:
openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
Index: openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
===================================================================
--- openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
+++ openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
@@ -52,15 +52,8 @@
return __builtin_amdgcn_read_exec();
}
-uint32_t __kmpc_L1_Barrier [[clang::loader_uninitialized]];
-#pragma allocate(__kmpc_L1_Barrier) allocator(omp_pteam_mem_alloc)
-
-EXTERN void __kmpc_impl_target_init() {
- // Don't have global ctors, and shared memory is not zero init
- __atomic_store_n(&__kmpc_L1_Barrier, 0u, __ATOMIC_RELEASE);
-}
-
-EXTERN void __kmpc_impl_named_sync(uint32_t num_threads) {
+static void pteam_mem_barrier(uint32_t num_threads, uint32_t * barrier_state)
+{
__atomic_thread_fence(__ATOMIC_ACQUIRE);
uint32_t num_waves = num_threads / WARPSIZE;
@@ -81,7 +74,7 @@
bool isLowest = GetLaneId() == lowestActiveThread;
if (isLowest) {
- uint32_t load = __atomic_fetch_add(&__kmpc_L1_Barrier, 1,
+ uint32_t load = __atomic_fetch_add(barrier_state, 1,
__ATOMIC_RELAXED); // commutative
// Record the number of times the barrier has been passed
@@ -94,18 +87,30 @@
load &= 0xffff0000u; // because bits zeroed second
// Reset the wave counter and release the waiting waves
- __atomic_store_n(&__kmpc_L1_Barrier, load, __ATOMIC_RELAXED);
+ __atomic_store_n(barrier_state, load, __ATOMIC_RELAXED);
} else {
// more waves still to go, spin until generation counter changes
do {
__builtin_amdgcn_s_sleep(0);
- load = __atomic_load_n(&__kmpc_L1_Barrier, __ATOMIC_RELAXED);
+ load = __atomic_load_n(barrier_state, __ATOMIC_RELAXED);
} while ((load & 0xffff0000u) == generation);
}
}
__atomic_thread_fence(__ATOMIC_RELEASE);
}
+uint32_t __kmpc_L0_Barrier [[clang::loader_uninitialized]];
+#pragma allocate(__kmpc_L0_Barrier) allocator(omp_pteam_mem_alloc)
+
+EXTERN void __kmpc_impl_target_init() {
+ // Don't have global ctors, and shared memory is not zero init
+ __atomic_store_n(&__kmpc_L0_Barrier, 0u, __ATOMIC_RELEASE);
+}
+
+EXTERN void __kmpc_impl_named_sync(uint32_t num_threads) {
+ pteam_mem_barrier(num_threads, &__kmpc_L0_Barrier);
+}
+
namespace {
uint32_t get_grid_dim(uint32_t n, uint16_t d) {
uint32_t q = n / d;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D102016.343479.patch
Type: text/x-patch
Size: 2390 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20210506/c26a0e11/attachment.bin>
More information about the Openmp-commits
mailing list