[libclc] 6577520 - libclc: Use separate acquire and release fences in work_group_barrier (#185190)
via cfe-commits
cfe-commits at lists.llvm.org
Sat Mar 7 23:49:33 PST 2026
Author: Matt Arsenault
Date: 2026-03-08T08:49:30+01:00
New Revision: 65775204b7af5ca9a65a03876a162e98d452beee
URL: https://github.com/llvm/llvm-project/commit/65775204b7af5ca9a65a03876a162e98d452beee
DIFF: https://github.com/llvm/llvm-project/commit/65775204b7af5ca9a65a03876a162e98d452beee.diff
LOG: libclc: Use separate acquire and release fences in work_group_barrier (#185190)
Added:
Modified:
libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
libclc/opencl/lib/generic/synchronization/work_group_barrier.cl
Removed:
################################################################################
diff --git a/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
index 34745bd47c068..e98dc38e1b0b3 100644
--- a/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
+++ b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
@@ -13,7 +13,7 @@
#include <clc/mem_fence/clc_mem_semantic.h>
_CLC_OVERLOAD _CLC_DECL void
-__clc_work_group_barrier(int memory_scope, int memory_order,
+__clc_work_group_barrier(int memory_scope,
__CLC_MemorySemantics memory_semantics);
#endif // __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__
diff --git a/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
index 034e6e7bd8ed4..67b3d9b2f308b 100644
--- a/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
+++ b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
@@ -10,8 +10,22 @@
#include <clc/synchronization/clc_work_group_barrier.h>
_CLC_OVERLOAD _CLC_DEF void
-__clc_work_group_barrier(int memory_scope, int memory_order,
+__clc_work_group_barrier(int memory_scope,
__CLC_MemorySemantics memory_semantics) {
- __clc_mem_fence(memory_scope, memory_order, memory_semantics);
- __builtin_amdgcn_s_barrier();
+ if (memory_semantics == 0) {
+ __builtin_amdgcn_s_barrier();
+ } else {
+ int memory_order_before =
+ memory_semantics & (__CLC_MEMORY_GLOBAL | __CLC_MEMORY_LOCAL)
+ ? __ATOMIC_SEQ_CST
+ : __ATOMIC_RELEASE;
+ int memory_order_after =
+ memory_semantics & (__CLC_MEMORY_GLOBAL | __CLC_MEMORY_LOCAL)
+ ? __ATOMIC_SEQ_CST
+ : __ATOMIC_ACQUIRE;
+
+ __clc_mem_fence(memory_scope, memory_order_before, memory_semantics);
+ __builtin_amdgcn_s_barrier();
+ __clc_mem_fence(memory_scope, memory_order_after, memory_semantics);
+ }
}
diff --git a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
index 3afc88ca50b15..35b381052367d 100644
--- a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
+++ b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
@@ -9,10 +9,9 @@
#include <clc/synchronization/clc_work_group_barrier.h>
_CLC_OVERLOAD _CLC_DEF void
-__clc_work_group_barrier(int memory_scope, int memory_order,
+__clc_work_group_barrier(int memory_scope,
__CLC_MemorySemantics memory_semantics) {
(void)memory_scope;
- (void)memory_order;
(void)memory_semantics;
__syncthreads();
}
diff --git a/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl b/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl
index 14de313c4f582..595c7f8cd95a6 100644
--- a/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl
+++ b/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl
@@ -12,9 +12,8 @@
_CLC_DEF _CLC_OVERLOAD void work_group_barrier(cl_mem_fence_flags flags,
memory_scope scope) {
- int memory_order = __ATOMIC_SEQ_CST;
__CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
- __clc_work_group_barrier(__opencl_get_clang_memory_scope(scope), memory_order,
+ __clc_work_group_barrier(__opencl_get_clang_memory_scope(scope),
memory_semantics);
}
More information about the cfe-commits
mailing list