[libclc] libclc: Use separate acquire and release fences in work_group_barrier (PR #185190)

Matt Arsenault via cfe-commits cfe-commits at lists.llvm.org
Sat Mar 7 05:33:35 PST 2026


https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/185190

None

>From 9c3c86223234e46f65e30a5a055e9da2acf3b69f Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sat, 7 Mar 2026 14:32:18 +0100
Subject: [PATCH] libclc: Use separate acquire and release fences in
 work_group_barrier

---
 .../synchronization/clc_work_group_barrier.h  |  2 +-
 .../synchronization/clc_work_group_barrier.cl | 20 ++++++++++++++++---
 .../synchronization/clc_work_group_barrier.cl |  3 +--
 .../synchronization/work_group_barrier.cl     |  3 +--
 4 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
index 34745bd47c068..e98dc38e1b0b3 100644
--- a/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
+++ b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
@@ -13,7 +13,7 @@
 #include <clc/mem_fence/clc_mem_semantic.h>
 
 _CLC_OVERLOAD _CLC_DECL void
-__clc_work_group_barrier(int memory_scope, int memory_order,
+__clc_work_group_barrier(int memory_scope,
                          __CLC_MemorySemantics memory_semantics);
 
 #endif // __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__
diff --git a/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
index 034e6e7bd8ed4..67b3d9b2f308b 100644
--- a/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
+++ b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
@@ -10,8 +10,22 @@
 #include <clc/synchronization/clc_work_group_barrier.h>
 
 _CLC_OVERLOAD _CLC_DEF void
-__clc_work_group_barrier(int memory_scope, int memory_order,
+__clc_work_group_barrier(int memory_scope,
                          __CLC_MemorySemantics memory_semantics) {
-  __clc_mem_fence(memory_scope, memory_order, memory_semantics);
-  __builtin_amdgcn_s_barrier();
+  if (memory_semantics == 0) {
+    __builtin_amdgcn_s_barrier();
+  } else {
+    int memory_order_before =
+        memory_semantics & (__CLC_MEMORY_GLOBAL | __CLC_MEMORY_LOCAL)
+            ? __ATOMIC_SEQ_CST
+            : __ATOMIC_RELEASE;
+    int memory_order_after =
+        memory_semantics & (__CLC_MEMORY_GLOBAL | __CLC_MEMORY_LOCAL)
+            ? __ATOMIC_SEQ_CST
+            : __ATOMIC_ACQUIRE;
+
+    __clc_mem_fence(memory_scope, memory_order_before, memory_semantics);
+    __builtin_amdgcn_s_barrier();
+    __clc_mem_fence(memory_scope, memory_order_after, memory_semantics);
+  }
 }
diff --git a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
index 3afc88ca50b15..35b381052367d 100644
--- a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
+++ b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
@@ -9,10 +9,9 @@
 #include <clc/synchronization/clc_work_group_barrier.h>
 
 _CLC_OVERLOAD _CLC_DEF void
-__clc_work_group_barrier(int memory_scope, int memory_order,
+__clc_work_group_barrier(int memory_scope,
                          __CLC_MemorySemantics memory_semantics) {
   (void)memory_scope;
-  (void)memory_order;
   (void)memory_semantics;
   __syncthreads();
 }
diff --git a/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl b/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl
index 14de313c4f582..595c7f8cd95a6 100644
--- a/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl
+++ b/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl
@@ -12,9 +12,8 @@
 
 _CLC_DEF _CLC_OVERLOAD void work_group_barrier(cl_mem_fence_flags flags,
                                                memory_scope scope) {
-  int memory_order = __ATOMIC_SEQ_CST;
   __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
-  __clc_work_group_barrier(__opencl_get_clang_memory_scope(scope), memory_order,
+  __clc_work_group_barrier(__opencl_get_clang_memory_scope(scope),
                            memory_semantics);
 }
 



More information about the cfe-commits mailing list