[libclc] libclc: Use separate acquire and release fences in work_group_barrier (PR #185190)
Matt Arsenault via cfe-commits
cfe-commits at lists.llvm.org
Sat Mar 7 05:33:35 PST 2026
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/185190
None
>From 9c3c86223234e46f65e30a5a055e9da2acf3b69f Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sat, 7 Mar 2026 14:32:18 +0100
Subject: [PATCH] libclc: Use separate acquire and release fences in
work_group_barrier
---
.../synchronization/clc_work_group_barrier.h | 2 +-
.../synchronization/clc_work_group_barrier.cl | 20 ++++++++++++++++---
.../synchronization/clc_work_group_barrier.cl | 3 +--
.../synchronization/work_group_barrier.cl | 3 +--
4 files changed, 20 insertions(+), 8 deletions(-)
diff --git a/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
index 34745bd47c068..e98dc38e1b0b3 100644
--- a/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
+++ b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
@@ -13,7 +13,7 @@
#include <clc/mem_fence/clc_mem_semantic.h>
_CLC_OVERLOAD _CLC_DECL void
-__clc_work_group_barrier(int memory_scope, int memory_order,
+__clc_work_group_barrier(int memory_scope,
__CLC_MemorySemantics memory_semantics);
#endif // __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__
diff --git a/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
index 034e6e7bd8ed4..67b3d9b2f308b 100644
--- a/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
+++ b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
@@ -10,8 +10,22 @@
#include <clc/synchronization/clc_work_group_barrier.h>
_CLC_OVERLOAD _CLC_DEF void
-__clc_work_group_barrier(int memory_scope, int memory_order,
+__clc_work_group_barrier(int memory_scope,
__CLC_MemorySemantics memory_semantics) {
- __clc_mem_fence(memory_scope, memory_order, memory_semantics);
- __builtin_amdgcn_s_barrier();
+ if (memory_semantics == 0) {
+ __builtin_amdgcn_s_barrier();
+ } else {
+ int memory_order_before =
+ memory_semantics & (__CLC_MEMORY_GLOBAL | __CLC_MEMORY_LOCAL)
+ ? __ATOMIC_SEQ_CST
+ : __ATOMIC_RELEASE;
+ int memory_order_after =
+ memory_semantics & (__CLC_MEMORY_GLOBAL | __CLC_MEMORY_LOCAL)
+ ? __ATOMIC_SEQ_CST
+ : __ATOMIC_ACQUIRE;
+
+ __clc_mem_fence(memory_scope, memory_order_before, memory_semantics);
+ __builtin_amdgcn_s_barrier();
+ __clc_mem_fence(memory_scope, memory_order_after, memory_semantics);
+ }
}
diff --git a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
index 3afc88ca50b15..35b381052367d 100644
--- a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
+++ b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
@@ -9,10 +9,9 @@
#include <clc/synchronization/clc_work_group_barrier.h>
_CLC_OVERLOAD _CLC_DEF void
-__clc_work_group_barrier(int memory_scope, int memory_order,
+__clc_work_group_barrier(int memory_scope,
__CLC_MemorySemantics memory_semantics) {
(void)memory_scope;
- (void)memory_order;
(void)memory_semantics;
__syncthreads();
}
diff --git a/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl b/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl
index 14de313c4f582..595c7f8cd95a6 100644
--- a/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl
+++ b/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl
@@ -12,9 +12,8 @@
_CLC_DEF _CLC_OVERLOAD void work_group_barrier(cl_mem_fence_flags flags,
memory_scope scope) {
- int memory_order = __ATOMIC_SEQ_CST;
__CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
- __clc_work_group_barrier(__opencl_get_clang_memory_scope(scope), memory_order,
+ __clc_work_group_barrier(__opencl_get_clang_memory_scope(scope),
memory_semantics);
}
More information about the cfe-commits
mailing list