[Openmp-commits] [openmp] [OFFLOAD] Add spirv implementation for named barrier (PR #180393)
via Openmp-commits
openmp-commits at lists.llvm.org
Sat Feb 7 23:23:11 PST 2026
https://github.com/fineg74 updated https://github.com/llvm/llvm-project/pull/180393
>From 2ddf1f718c7319c5b18efb9102ca4267b481e10e Mon Sep 17 00:00:00 2001
From: "Fine, Gregory" <gregory.fine at intel.com>
Date: Sat, 7 Feb 2026 23:16:41 -0800
Subject: [PATCH 1/2] Add spirv implementation for named barrier
---
openmp/device/src/Synchronization.cpp | 43 +++++++++++++++++++++++++--
1 file changed, 41 insertions(+), 2 deletions(-)
diff --git a/openmp/device/src/Synchronization.cpp b/openmp/device/src/Synchronization.cpp
index 3554226d2ee75..7a065b0a27fcf 100644
--- a/openmp/device/src/Synchronization.cpp
+++ b/openmp/device/src/Synchronization.cpp
@@ -181,8 +181,47 @@ void setCriticalLock(omp_lock_t *Lock) { setLock(Lock); }
///}
#if defined(__SPIRV__)
-void namedBarrierInit() { __builtin_trap(); } // TODO
-void namedBarrier() { __builtin_trap(); } // TODO
+
+[[clang::loader_uninitialized]] Local<uint32_t> namedBarrierTracker;
+
+void namedBarrierInit() {
+ atomic::store(&namedBarrierTracker, 0u, atomic::seq_cst);
+}
+
+void namedBarrier() {
+ uint32_t NumThreads = omp_get_num_threads();
+
+ // Uses two 16 bit unsigned counters. One for the number of threads to have
+ // reached the barrier, and one to count how many times the barrier has been
+ // passed. These are packed in a single atomically accessed 32 bit integer.
+ // Low bits for the number of threads, assumed zero before this call.
+ // High bits to count the number of times the barrier has been passed.
+
+ // Increment the low 16 bits once.
+
+ uint32_t load = atomic::add(&namedBarrierTracker, 1,
+ atomic::seq_cst);
+
+ // Record the number of times the barrier has been passed
+ uint32_t generation = load & 0xffff0000u;
+
+ if ((load & 0x0000ffffu) == (NumThreads - 1)) {
+ // Reached NumWaves in low bits so this is the last wave.
+ // Set low bits to zero and increment high bits
+ load += 0x00010000u; // wrap is safe
+ load &= 0xffff0000u; // because bits zeroed second
+
+ // Reset the wave counter and release the waiting waves
+ atomic::store(&namedBarrierTracker, load, atomic::seq_cst);
+ } else {
+ // more waves still to go, spin until generation counter changes
+ do {
+ load = atomic::load(&namedBarrierTracker, atomic::seq_cst);
+ } while ((load & 0xffff0000u) == generation);
+ }
+ __gpu_sync_threads();
+
+}
void unsetLock(omp_lock_t *Lock) {
atomic::store((int32_t *)Lock, 0, atomic::seq_cst);
>From 7ada5b0b4a848192b914873639feb54211f314f6 Mon Sep 17 00:00:00 2001
From: "Fine, Gregory" <gregory.fine at intel.com>
Date: Sat, 7 Feb 2026 23:23:01 -0800
Subject: [PATCH 2/2] Fix formatting
---
openmp/device/src/Synchronization.cpp | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/openmp/device/src/Synchronization.cpp b/openmp/device/src/Synchronization.cpp
index 7a065b0a27fcf..cebc9ea2e5796 100644
--- a/openmp/device/src/Synchronization.cpp
+++ b/openmp/device/src/Synchronization.cpp
@@ -199,8 +199,7 @@ void namedBarrier() {
// Increment the low 16 bits once.
- uint32_t load = atomic::add(&namedBarrierTracker, 1,
- atomic::seq_cst);
+ uint32_t load = atomic::add(&namedBarrierTracker, 1, atomic::seq_cst);
// Record the number of times the barrier has been passed
uint32_t generation = load & 0xffff0000u;
@@ -220,7 +219,6 @@ void namedBarrier() {
} while ((load & 0xffff0000u) == generation);
}
__gpu_sync_threads();
-
}
void unsetLock(omp_lock_t *Lock) {
More information about the Openmp-commits
mailing list