[llvm-branch-commits] [libclc] af16fc2 - [libclc] Move mem_fence and barrier to clc library (#151446)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Aug 5 19:05:08 PDT 2025
Author: Wenju He
Date: 2025-08-06T09:49:28+08:00
New Revision: af16fc2e2a50c1cbac49726ea70739ad6e193729
URL: https://github.com/llvm/llvm-project/commit/af16fc2e2a50c1cbac49726ea70739ad6e193729
DIFF: https://github.com/llvm/llvm-project/commit/af16fc2e2a50c1cbac49726ea70739ad6e193729.diff
LOG: [libclc] Move mem_fence and barrier to clc library (#151446)
__clc_mem_fence and __clc_work_group_barrier function have two
parameters memory_scope and memory_order. The design allows the clc
functions to implement SPIR-V ControlBarrier and MemoryBarrier
functions in the future.
The default memory ordering in clc is set to __ATOMIC_SEQ_CST, which is
also the default and strongest ordering in OpenCL and C++.
OpenCL cl_mem_fence_flags parameter is converted to combination of
__MEMORY_SCOPE_DEVICE and __MEMORY_SCOPE_WRKGRP, which is passed to clc.
llvm-diff shows no change to nvptx64--nvidiacl.bc.
llvm-diff show a small change to amdgcn--amdhsa.bc and the number of
LLVM IR instruction is reduced by 1: https://alive2.llvm.org/ce/z/_Uhqvt
Added:
libclc/clc/include/clc/mem_fence/clc_mem_fence.h
libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl
libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl
libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
libclc/opencl/include/clc/opencl/synchronization/utils.h
Modified:
libclc/clc/lib/amdgcn/SOURCES
libclc/clc/lib/ptx-nvidiacl/SOURCES
libclc/opencl/include/clc/opencl/synchronization/cl_mem_fence_flags.h
libclc/opencl/lib/amdgcn/mem_fence/fence.cl
libclc/opencl/lib/amdgcn/synchronization/barrier.cl
libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl
Removed:
################################################################################
diff --git a/libclc/clc/include/clc/mem_fence/clc_mem_fence.h b/libclc/clc/include/clc/mem_fence/clc_mem_fence.h
new file mode 100644
index 0000000000000..2321634c76842
--- /dev/null
+++ b/libclc/clc/include/clc/mem_fence/clc_mem_fence.h
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MEM_FENCE_CLC_MEM_FENCE_H__
+#define __CLC_MEM_FENCE_CLC_MEM_FENCE_H__
+
+#include <clc/internal/clc.h>
+
+_CLC_OVERLOAD _CLC_DECL void __clc_mem_fence(int memory_scope,
+ int memory_order);
+
+#endif // __CLC_MEM_FENCE_CLC_MEM_FENCE_H__
diff --git a/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
new file mode 100644
index 0000000000000..5f864e1057b8b
--- /dev/null
+++ b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__
+#define __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__
+
+#include <clc/internal/clc.h>
+
+_CLC_OVERLOAD _CLC_DECL void __clc_work_group_barrier(int memory_scope,
+ int memory_order);
+
+#endif // __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__
diff --git a/libclc/clc/lib/amdgcn/SOURCES b/libclc/clc/lib/amdgcn/SOURCES
index d91f08533e149..76c3266e3af7b 100644
--- a/libclc/clc/lib/amdgcn/SOURCES
+++ b/libclc/clc/lib/amdgcn/SOURCES
@@ -1,4 +1,6 @@
math/clc_ldexp_override.cl
+mem_fence/clc_mem_fence.cl
+synchronization/clc_work_group_barrier.cl
workitem/clc_get_global_offset.cl
workitem/clc_get_global_size.cl
workitem/clc_get_group_id.cl
diff --git a/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl b/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl
new file mode 100644
index 0000000000000..9e6460313718e
--- /dev/null
+++ b/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl
@@ -0,0 +1,37 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/mem_fence/clc_mem_fence.h>
+
+void __clc_amdgcn_s_waitcnt(unsigned flags);
+
+// s_waitcnt takes 16bit argument with a combined number of maximum allowed
+// pending operations:
+// [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages
+// [7] -- undefined
+// [6:4] -- exports, GDS, and mem write
+// [3:0] -- vector memory operations
+
+// Newer clang supports __builtin_amdgcn_s_waitcnt
+#if __clang_major__ >= 5
+#define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x)
+#else
+#define __waitcnt(x) __clc_amdgcn_s_waitcnt(x)
+_CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned) __asm("llvm.amdgcn.s.waitcnt");
+#endif
+
+_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(int memory_scope,
+ int memory_order) {
+ if (memory_scope & __MEMORY_SCOPE_DEVICE) {
+ // scalar loads are counted with LGKM but we don't know whether
+ // the compiler turned any loads to scalar
+ __waitcnt(0);
+ } else if (memory_scope & __MEMORY_SCOPE_WRKGRP)
+ __waitcnt(0xff); // LGKM is [12:8]
+}
+#undef __waitcnt
diff --git a/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
new file mode 100644
index 0000000000000..ff3628fa7c339
--- /dev/null
+++ b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
@@ -0,0 +1,16 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/mem_fence/clc_mem_fence.h>
+#include <clc/synchronization/clc_work_group_barrier.h>
+
+_CLC_OVERLOAD _CLC_DEF void __clc_work_group_barrier(int memory_scope,
+ int memory_order) {
+ __clc_mem_fence(memory_scope, memory_order);
+ __builtin_amdgcn_s_barrier();
+}
diff --git a/libclc/clc/lib/ptx-nvidiacl/SOURCES b/libclc/clc/lib/ptx-nvidiacl/SOURCES
index 05368c5e4d4e3..b6f50654f89c5 100644
--- a/libclc/clc/lib/ptx-nvidiacl/SOURCES
+++ b/libclc/clc/lib/ptx-nvidiacl/SOURCES
@@ -1,3 +1,5 @@
+mem_fence/clc_mem_fence.cl
+synchronization/clc_work_group_barrier.cl
workitem/clc_get_global_id.cl
workitem/clc_get_group_id.cl
workitem/clc_get_local_id.cl
diff --git a/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl b/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl
new file mode 100644
index 0000000000000..b3e2375e755a2
--- /dev/null
+++ b/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl
@@ -0,0 +1,15 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/mem_fence/clc_mem_fence.h>
+
+_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(int memory_scope,
+ int memory_order) {
+ if (memory_scope & (__MEMORY_SCOPE_DEVICE | __MEMORY_SCOPE_WRKGRP))
+ __nvvm_membar_cta();
+}
diff --git a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
new file mode 100644
index 0000000000000..6cb37a38f06ac
--- /dev/null
+++ b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
@@ -0,0 +1,14 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/synchronization/clc_work_group_barrier.h>
+
+_CLC_OVERLOAD _CLC_DEF void __clc_work_group_barrier(int memory_scope,
+ int memory_order) {
+ __syncthreads();
+}
diff --git a/libclc/opencl/include/clc/opencl/synchronization/cl_mem_fence_flags.h b/libclc/opencl/include/clc/opencl/synchronization/cl_mem_fence_flags.h
index 6636515fca47d..7b2f701c1ff99 100644
--- a/libclc/opencl/include/clc/opencl/synchronization/cl_mem_fence_flags.h
+++ b/libclc/opencl/include/clc/opencl/synchronization/cl_mem_fence_flags.h
@@ -13,5 +13,6 @@ typedef uint cl_mem_fence_flags;
#define CLK_LOCAL_MEM_FENCE 1
#define CLK_GLOBAL_MEM_FENCE 2
+#define CLK_IMAGE_MEM_FENCE 4
#endif // __CLC_OPENCL_SYNCHRONIZATION_CL_MEM_FENCE_FLAGS_H__
diff --git a/libclc/opencl/include/clc/opencl/synchronization/utils.h b/libclc/opencl/include/clc/opencl/synchronization/utils.h
new file mode 100644
index 0000000000000..cf3baf28cb5f1
--- /dev/null
+++ b/libclc/opencl/include/clc/opencl/synchronization/utils.h
@@ -0,0 +1,24 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__
+#define __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__
+
+#include <clc/internal/clc.h>
+#include <clc/opencl/synchronization/cl_mem_fence_flags.h>
+
+_CLC_INLINE int getCLCMemoryScope(cl_mem_fence_flags flag) {
+ int memory_scope = 0;
+ if (flag & CLK_GLOBAL_MEM_FENCE)
+ memory_scope |= __MEMORY_SCOPE_DEVICE;
+ if (flag & CLK_LOCAL_MEM_FENCE)
+ memory_scope |= __MEMORY_SCOPE_WRKGRP;
+ return memory_scope;
+}
+
+#endif // __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__
diff --git a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
index 88b953005aae6..81216d6a26cf2 100644
--- a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
+++ b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
@@ -6,34 +6,15 @@
//
//===----------------------------------------------------------------------===//
+#include <clc/mem_fence/clc_mem_fence.h>
#include <clc/opencl/explicit_fence/explicit_memory_fence.h>
-
-void __clc_amdgcn_s_waitcnt(unsigned flags);
-
-// s_waitcnt takes 16bit argument with a combined number of maximum allowed
-// pending operations:
-// [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages
-// [7] -- undefined
-// [6:4] -- exports, GDS, and mem write
-// [3:0] -- vector memory operations
-
-// Newer clang supports __builtin_amdgcn_s_waitcnt
-#if __clang_major__ >= 5
-#define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x)
-#else
-#define __waitcnt(x) __clc_amdgcn_s_waitcnt(x)
-_CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned) __asm("llvm.amdgcn.s.waitcnt");
-#endif
+#include <clc/opencl/synchronization/utils.h>
_CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
- if (flags & CLK_GLOBAL_MEM_FENCE) {
- // scalar loads are counted with LGKM but we don't know whether
- // the compiler turned any loads to scalar
- __waitcnt(0);
- } else if (flags & CLK_LOCAL_MEM_FENCE)
- __waitcnt(0xff); // LGKM is [12:8]
+ int memory_scope = getCLCMemoryScope(flags);
+ int memory_order = __ATOMIC_SEQ_CST;
+ __clc_mem_fence(memory_scope, memory_order);
}
-#undef __waitcnt
// We don't have separate mechanism for read and write fences
_CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) {
diff --git a/libclc/opencl/lib/amdgcn/synchronization/barrier.cl b/libclc/opencl/lib/amdgcn/synchronization/barrier.cl
index 5203db72f484c..c8322e602302c 100644
--- a/libclc/opencl/lib/amdgcn/synchronization/barrier.cl
+++ b/libclc/opencl/lib/amdgcn/synchronization/barrier.cl
@@ -6,10 +6,12 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/opencl/explicit_fence/explicit_memory_fence.h>
#include <clc/opencl/synchronization/barrier.h>
+#include <clc/opencl/synchronization/utils.h>
+#include <clc/synchronization/clc_work_group_barrier.h>
_CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) {
- mem_fence(flags);
- __builtin_amdgcn_s_barrier();
+ int memory_scope = getCLCMemoryScope(flags);
+ int memory_order = __ATOMIC_SEQ_CST;
+ __clc_work_group_barrier(memory_scope, memory_order);
}
diff --git a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
index d24569ecda1bc..e22ed870a7e6b 100644
--- a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
+++ b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
@@ -6,11 +6,14 @@
//
//===----------------------------------------------------------------------===//
+#include <clc/mem_fence/clc_mem_fence.h>
#include <clc/opencl/explicit_fence/explicit_memory_fence.h>
+#include <clc/opencl/synchronization/utils.h>
_CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
- if (flags & (CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE))
- __nvvm_membar_cta();
+ int memory_scope = getCLCMemoryScope(flags);
+ int memory_order = __ATOMIC_SEQ_CST;
+ __clc_mem_fence(memory_scope, memory_order);
}
// We do not have separate mechanism for read and write fences.
diff --git a/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl b/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl
index 7c57478795dda..c8322e602302c 100644
--- a/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl
+++ b/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl
@@ -7,7 +7,11 @@
//===----------------------------------------------------------------------===//
#include <clc/opencl/synchronization/barrier.h>
+#include <clc/opencl/synchronization/utils.h>
+#include <clc/synchronization/clc_work_group_barrier.h>
_CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) {
- __syncthreads();
+ int memory_scope = getCLCMemoryScope(flags);
+ int memory_order = __ATOMIC_SEQ_CST;
+ __clc_work_group_barrier(memory_scope, memory_order);
}
More information about the llvm-branch-commits
mailing list