[libclc] [libclc] Fix memory_scope and memory_order of *mem_fence builtins (PR #181311)

Wenju He via cfe-commits cfe-commits at lists.llvm.org
Thu Feb 12 22:04:42 PST 2026


https://github.com/wenju-he updated https://github.com/llvm/llvm-project/pull/181311

>From b025d155d33c7e5bc1561efb6819de94f17a04cf Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Fri, 13 Feb 2026 06:56:53 +0100
Subject: [PATCH 1/3] [libclc] Fix memory_scope and memory_order of *mem_fence
 builtins

See OpenCL spec 6.15.12.5.
https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#fences

// Older syntax memory fences are equivalent to atomic_work_item_fence with the
// same flags parameter, memory_scope_work_group scope, and ordering as follows:
void mem_fence(cl_mem_fence_flags flags)        // memory_order_acq_rel
void read_mem_fence(cl_mem_fence_flags flags)   // memory_order_acquire
void write_mem_fence(cl_mem_fence_flags flags)  // memory_order_release
---
 libclc/opencl/lib/amdgcn/mem_fence/fence.cl       | 15 ++++++++++-----
 libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl | 13 ++++++++++---
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
index 7e5d97bc6de62..38fb15c2c1de8 100644
--- a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
+++ b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
@@ -10,17 +10,22 @@
 #include <clc/opencl/synchronization/utils.h>
 
 _CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
-  int memory_scope = __opencl_get_memory_scope(flags);
-  int memory_order = __ATOMIC_SEQ_CST;
+  int memory_scope = __MEMORY_SCOPE_WRKGRP;
+  int memory_order = __ATOMIC_ACQ_REL;
   __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
   __clc_mem_fence(memory_scope, memory_order, memory_semantics);
 }
 
-// We don't have separate mechanism for read and write fences
 _CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) {
-  mem_fence(flags);
+  int memory_scope = __MEMORY_SCOPE_WRKGRP;
+  int memory_order = __ATOMIC_ACQUIRE;
+  __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
+  __clc_mem_fence(memory_scope, memory_order, memory_semantics);
 }
 
 _CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) {
-  mem_fence(flags);
+  int memory_scope = __MEMORY_SCOPE_WRKGRP;
+  int memory_order = __ATOMIC_RELEASE;
+  __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
+  __clc_mem_fence(memory_scope, memory_order, memory_semantics);
 }
diff --git a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
index c799cf2ad7dde..5226f0c6edf40 100644
--- a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
+++ b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
@@ -10,17 +10,24 @@
 #include <clc/opencl/synchronization/utils.h>
 
 _CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
-  int memory_scope = __opencl_get_memory_scope(flags);
-  int memory_order = __ATOMIC_SEQ_CST;
+  int memory_scope = __MEMORY_SCOPE_WRKGRP;
+  int memory_order = __ATOMIC_ACQ_REL;
   __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
   __clc_mem_fence(memory_scope, memory_order, memory_semantics);
 }
 
-// We do not have separate mechanism for read and write fences.
 _CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) {
+  int memory_scope = __MEMORY_SCOPE_WRKGRP;
+  int memory_order = __ATOMIC_ACQUIRE;
+  __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
+  __clc_mem_fence(memory_scope, memory_order, memory_semantics);
   mem_fence(flags);
 }
 
 _CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) {
+  int memory_scope = __MEMORY_SCOPE_WRKGRP;
+  int memory_order = __ATOMIC_RELEASE;
+  __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
+  __clc_mem_fence(memory_scope, memory_order, memory_semantics);
   mem_fence(flags);
 }

>From b0a29573d451733a32a2c2b1a5cb947200cd5bbb Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Fri, 13 Feb 2026 14:04:23 +0800
Subject: [PATCH 2/3] Update libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl

Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
 libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
index 5226f0c6edf40..fa7f3c931370d 100644
--- a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
+++ b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
@@ -21,7 +21,6 @@ _CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) {
   int memory_order = __ATOMIC_ACQUIRE;
   __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
   __clc_mem_fence(memory_scope, memory_order, memory_semantics);
-  mem_fence(flags);
 }
 
 _CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) {

>From b8dd75f3b3487e0447effb24374eee67a745b02d Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Fri, 13 Feb 2026 14:04:33 +0800
Subject: [PATCH 3/3] Update libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl

Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
 libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
index fa7f3c931370d..38fb15c2c1de8 100644
--- a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
+++ b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
@@ -28,5 +28,4 @@ _CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) {
   int memory_order = __ATOMIC_RELEASE;
   __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
   __clc_mem_fence(memory_scope, memory_order, memory_semantics);
-  mem_fence(flags);
 }



More information about the cfe-commits mailing list