[libclc] [libclc] Fix memory_scope and memory_order of *mem_fence builtins (PR #181311)
Wenju He via cfe-commits
cfe-commits at lists.llvm.org
Thu Feb 12 22:04:42 PST 2026
https://github.com/wenju-he updated https://github.com/llvm/llvm-project/pull/181311
>From b025d155d33c7e5bc1561efb6819de94f17a04cf Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Fri, 13 Feb 2026 06:56:53 +0100
Subject: [PATCH 1/3] [libclc] Fix memory_scope and memory_order of *mem_fence
builtins
See OpenCL spec 6.15.12.5.
https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#fences
// Older syntax memory fences are equivalent to atomic_work_item_fence with the
// same flags parameter, memory_scope_work_group scope, and ordering as follows:
void mem_fence(cl_mem_fence_flags flags) // memory_order_acq_rel
void read_mem_fence(cl_mem_fence_flags flags) // memory_order_acquire
void write_mem_fence(cl_mem_fence_flags flags) // memory_order_release
---
libclc/opencl/lib/amdgcn/mem_fence/fence.cl | 15 ++++++++++-----
libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl | 13 ++++++++++---
2 files changed, 20 insertions(+), 8 deletions(-)
diff --git a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
index 7e5d97bc6de62..38fb15c2c1de8 100644
--- a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
+++ b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
@@ -10,17 +10,22 @@
#include <clc/opencl/synchronization/utils.h>
_CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
- int memory_scope = __opencl_get_memory_scope(flags);
- int memory_order = __ATOMIC_SEQ_CST;
+ int memory_scope = __MEMORY_SCOPE_WRKGRP;
+ int memory_order = __ATOMIC_ACQ_REL;
__CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
__clc_mem_fence(memory_scope, memory_order, memory_semantics);
}
-// We don't have separate mechanism for read and write fences
_CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) {
- mem_fence(flags);
+ int memory_scope = __MEMORY_SCOPE_WRKGRP;
+ int memory_order = __ATOMIC_ACQUIRE;
+ __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
+ __clc_mem_fence(memory_scope, memory_order, memory_semantics);
}
_CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) {
- mem_fence(flags);
+ int memory_scope = __MEMORY_SCOPE_WRKGRP;
+ int memory_order = __ATOMIC_RELEASE;
+ __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
+ __clc_mem_fence(memory_scope, memory_order, memory_semantics);
}
diff --git a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
index c799cf2ad7dde..5226f0c6edf40 100644
--- a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
+++ b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
@@ -10,17 +10,24 @@
#include <clc/opencl/synchronization/utils.h>
_CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
- int memory_scope = __opencl_get_memory_scope(flags);
- int memory_order = __ATOMIC_SEQ_CST;
+ int memory_scope = __MEMORY_SCOPE_WRKGRP;
+ int memory_order = __ATOMIC_ACQ_REL;
__CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
__clc_mem_fence(memory_scope, memory_order, memory_semantics);
}
-// We do not have separate mechanism for read and write fences.
_CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) {
+ int memory_scope = __MEMORY_SCOPE_WRKGRP;
+ int memory_order = __ATOMIC_ACQUIRE;
+ __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
+ __clc_mem_fence(memory_scope, memory_order, memory_semantics);
mem_fence(flags);
}
_CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) {
+ int memory_scope = __MEMORY_SCOPE_WRKGRP;
+ int memory_order = __ATOMIC_RELEASE;
+ __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
+ __clc_mem_fence(memory_scope, memory_order, memory_semantics);
mem_fence(flags);
}
>From b0a29573d451733a32a2c2b1a5cb947200cd5bbb Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Fri, 13 Feb 2026 14:04:23 +0800
Subject: [PATCH 2/3] Update libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl | 1 -
1 file changed, 1 deletion(-)
diff --git a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
index 5226f0c6edf40..fa7f3c931370d 100644
--- a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
+++ b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
@@ -21,7 +21,6 @@ _CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) {
int memory_order = __ATOMIC_ACQUIRE;
__CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
__clc_mem_fence(memory_scope, memory_order, memory_semantics);
- mem_fence(flags);
}
_CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) {
>From b8dd75f3b3487e0447effb24374eee67a745b02d Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Fri, 13 Feb 2026 14:04:33 +0800
Subject: [PATCH 3/3] Update libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl | 1 -
1 file changed, 1 deletion(-)
diff --git a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
index fa7f3c931370d..38fb15c2c1de8 100644
--- a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
+++ b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
@@ -28,5 +28,4 @@ _CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) {
int memory_order = __ATOMIC_RELEASE;
__CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
__clc_mem_fence(memory_scope, memory_order, memory_semantics);
- mem_fence(flags);
}
More information about the cfe-commits
mailing list