[libclc] 6577520 - libclc: Use separate acquire and release fences in work_group_barrier (#185190)

via cfe-commits cfe-commits at lists.llvm.org
Sat Mar 7 23:49:33 PST 2026


Author: Matt Arsenault
Date: 2026-03-08T08:49:30+01:00
New Revision: 65775204b7af5ca9a65a03876a162e98d452beee

URL: https://github.com/llvm/llvm-project/commit/65775204b7af5ca9a65a03876a162e98d452beee
DIFF: https://github.com/llvm/llvm-project/commit/65775204b7af5ca9a65a03876a162e98d452beee.diff

LOG: libclc: Use separate acquire and release fences in work_group_barrier (#185190)

Added: 
    

Modified: 
    libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
    libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
    libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
    libclc/opencl/lib/generic/synchronization/work_group_barrier.cl

Removed: 
    


################################################################################
diff  --git a/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
index 34745bd47c068..e98dc38e1b0b3 100644
--- a/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
+++ b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
@@ -13,7 +13,7 @@
 #include <clc/mem_fence/clc_mem_semantic.h>
 
 _CLC_OVERLOAD _CLC_DECL void
-__clc_work_group_barrier(int memory_scope, int memory_order,
+__clc_work_group_barrier(int memory_scope,
                          __CLC_MemorySemantics memory_semantics);
 
 #endif // __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__

diff  --git a/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
index 034e6e7bd8ed4..67b3d9b2f308b 100644
--- a/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
+++ b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
@@ -10,8 +10,22 @@
 #include <clc/synchronization/clc_work_group_barrier.h>
 
 _CLC_OVERLOAD _CLC_DEF void
-__clc_work_group_barrier(int memory_scope, int memory_order,
+__clc_work_group_barrier(int memory_scope,
                          __CLC_MemorySemantics memory_semantics) {
-  __clc_mem_fence(memory_scope, memory_order, memory_semantics);
-  __builtin_amdgcn_s_barrier();
+  if (memory_semantics == 0) {
+    __builtin_amdgcn_s_barrier();
+  } else {
+    int memory_order_before =
+        memory_semantics & (__CLC_MEMORY_GLOBAL | __CLC_MEMORY_LOCAL)
+            ? __ATOMIC_SEQ_CST
+            : __ATOMIC_RELEASE;
+    int memory_order_after =
+        memory_semantics & (__CLC_MEMORY_GLOBAL | __CLC_MEMORY_LOCAL)
+            ? __ATOMIC_SEQ_CST
+            : __ATOMIC_ACQUIRE;
+
+    __clc_mem_fence(memory_scope, memory_order_before, memory_semantics);
+    __builtin_amdgcn_s_barrier();
+    __clc_mem_fence(memory_scope, memory_order_after, memory_semantics);
+  }
 }

diff  --git a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
index 3afc88ca50b15..35b381052367d 100644
--- a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
+++ b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
@@ -9,10 +9,9 @@
 #include <clc/synchronization/clc_work_group_barrier.h>
 
 _CLC_OVERLOAD _CLC_DEF void
-__clc_work_group_barrier(int memory_scope, int memory_order,
+__clc_work_group_barrier(int memory_scope,
                          __CLC_MemorySemantics memory_semantics) {
   (void)memory_scope;
-  (void)memory_order;
   (void)memory_semantics;
   __syncthreads();
 }

diff  --git a/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl b/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl
index 14de313c4f582..595c7f8cd95a6 100644
--- a/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl
+++ b/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl
@@ -12,9 +12,8 @@
 
 _CLC_DEF _CLC_OVERLOAD void work_group_barrier(cl_mem_fence_flags flags,
                                                memory_scope scope) {
-  int memory_order = __ATOMIC_SEQ_CST;
   __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
-  __clc_work_group_barrier(__opencl_get_clang_memory_scope(scope), memory_order,
+  __clc_work_group_barrier(__opencl_get_clang_memory_scope(scope),
                            memory_semantics);
 }
 


        


More information about the cfe-commits mailing list