[libclc] [libclc] Fix memory_scope and memory_order of *mem_fence builtins (PR #181311)
Wenju He via cfe-commits
cfe-commits at lists.llvm.org
Thu Feb 12 21:58:25 PST 2026
https://github.com/wenju-he created https://github.com/llvm/llvm-project/pull/181311
See OpenCL spec 6.15.12.5.
https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#fences
// Older syntax memory fences are equivalent to atomic_work_item_fence with the // same flags parameter, memory_scope_work_group scope, and ordering as follows:
void mem_fence(cl_mem_fence_flags flags) // memory_order_acq_rel
void read_mem_fence(cl_mem_fence_flags flags) // memory_order_acquire
void write_mem_fence(cl_mem_fence_flags flags) // memory_order_release
>From b025d155d33c7e5bc1561efb6819de94f17a04cf Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Fri, 13 Feb 2026 06:56:53 +0100
Subject: [PATCH] [libclc] Fix memory_scope and memory_order of *mem_fence
builtins
See OpenCL spec 6.15.12.5.
https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#fences
// Older syntax memory fences are equivalent to atomic_work_item_fence with the
// same flags parameter, memory_scope_work_group scope, and ordering as follows:
void mem_fence(cl_mem_fence_flags flags) // memory_order_acq_rel
void read_mem_fence(cl_mem_fence_flags flags) // memory_order_acquire
void write_mem_fence(cl_mem_fence_flags flags) // memory_order_release
---
libclc/opencl/lib/amdgcn/mem_fence/fence.cl | 15 ++++++++++-----
libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl | 13 ++++++++++---
2 files changed, 20 insertions(+), 8 deletions(-)
diff --git a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
index 7e5d97bc6de62..38fb15c2c1de8 100644
--- a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
+++ b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
@@ -10,17 +10,22 @@
#include <clc/opencl/synchronization/utils.h>
_CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
- int memory_scope = __opencl_get_memory_scope(flags);
- int memory_order = __ATOMIC_SEQ_CST;
+ int memory_scope = __MEMORY_SCOPE_WRKGRP;
+ int memory_order = __ATOMIC_ACQ_REL;
__CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
__clc_mem_fence(memory_scope, memory_order, memory_semantics);
}
-// We don't have separate mechanism for read and write fences
_CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) {
- mem_fence(flags);
+ int memory_scope = __MEMORY_SCOPE_WRKGRP;
+ int memory_order = __ATOMIC_ACQUIRE;
+ __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
+ __clc_mem_fence(memory_scope, memory_order, memory_semantics);
}
_CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) {
- mem_fence(flags);
+ int memory_scope = __MEMORY_SCOPE_WRKGRP;
+ int memory_order = __ATOMIC_RELEASE;
+ __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
+ __clc_mem_fence(memory_scope, memory_order, memory_semantics);
}
diff --git a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
index c799cf2ad7dde..5226f0c6edf40 100644
--- a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
+++ b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
@@ -10,17 +10,24 @@
#include <clc/opencl/synchronization/utils.h>
_CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
- int memory_scope = __opencl_get_memory_scope(flags);
- int memory_order = __ATOMIC_SEQ_CST;
+ int memory_scope = __MEMORY_SCOPE_WRKGRP;
+ int memory_order = __ATOMIC_ACQ_REL;
__CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
__clc_mem_fence(memory_scope, memory_order, memory_semantics);
}
-// We do not have separate mechanism for read and write fences.
_CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) {
+ int memory_scope = __MEMORY_SCOPE_WRKGRP;
+ int memory_order = __ATOMIC_ACQUIRE;
+ __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
+ __clc_mem_fence(memory_scope, memory_order, memory_semantics);
mem_fence(flags);
}
_CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) {
+ int memory_scope = __MEMORY_SCOPE_WRKGRP;
+ int memory_order = __ATOMIC_RELEASE;
+ __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
+ __clc_mem_fence(memory_scope, memory_order, memory_semantics);
mem_fence(flags);
}
More information about the cfe-commits
mailing list