[Libclc-dev] [PATCH 1/2] amdgcn: Implement {read_, write_, }mem_fence builtin

Jan Vesely via Libclc-dev libclc-dev at lists.llvm.org
Fri Aug 11 18:59:02 PDT 2017


Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
---

__builtin_amdgcn_s_waitcnt path is compile tested only. I currently
don't have machine with GCN hw and LLVM > 4

Jan

 amdgcn/lib/SOURCES                                 |  2 ++
 amdgcn/lib/mem_fence/fence.cl                      | 32 ++++++++++++++++++++++
 amdgcn/lib/mem_fence/waitcnt.ll                    | 11 ++++++++
 generic/include/clc/clc.h                          |  3 ++
 .../clc/explicit_fence/explicit_memory_fence.h     |  3 ++
 5 files changed, 51 insertions(+)
 create mode 100644 amdgcn/lib/mem_fence/fence.cl
 create mode 100644 amdgcn/lib/mem_fence/waitcnt.ll
 create mode 100644 generic/include/clc/explicit_fence/explicit_memory_fence.h

diff --git a/amdgcn/lib/SOURCES b/amdgcn/lib/SOURCES
index 1ff5fd1..24f5949 100644
--- a/amdgcn/lib/SOURCES
+++ b/amdgcn/lib/SOURCES
@@ -1,4 +1,6 @@
 math/ldexp.cl
+mem_fence/fence.cl
+mem_fence/waitcnt.ll
 synchronization/barrier_impl.ll
 workitem/get_global_offset.cl
 workitem/get_group_id.cl
diff --git a/amdgcn/lib/mem_fence/fence.cl b/amdgcn/lib/mem_fence/fence.cl
new file mode 100644
index 0000000..f64c6e2
--- /dev/null
+++ b/amdgcn/lib/mem_fence/fence.cl
@@ -0,0 +1,32 @@
+#include <clc/clc.h>
+
+void __clc_amdgcn_s_waitcnt(unsigned flags);
+
+// Newer clang supports __builtin_amdgcn_s_waitcnt
+#if __clang_major__ >= 5
+#  define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x)
+#else
+#  define __waitcnt(x) __clc_amdgcn_s_waitcnt(x)
+#endif
+
+_CLC_DEF void mem_fence(cl_mem_fence_flags flags)
+{
+	if (flags & CLK_GLOBAL_MEM_FENCE) {
+		// scalar mem is counted with LGKM but we don't know whether
+		// the compiler turned any loads/stores to scalar
+		__waitcnt(0);
+	} else if (flags & CLK_LOCAL_MEM_FENCE)
+		__waitcnt(0xff); // LGKM is [12:8]
+}
+#undef __waitcnt
+
+// We don't have separate mechanism for read and write fences
+_CLC_DEF void read_mem_fence(cl_mem_fence_flags flags)
+{
+	mem_fence(flags);
+}
+
+_CLC_DEF void write_mem_fence(cl_mem_fence_flags flags)
+{
+	mem_fence(flags);
+}
diff --git a/amdgcn/lib/mem_fence/waitcnt.ll b/amdgcn/lib/mem_fence/waitcnt.ll
new file mode 100644
index 0000000..8be7f18
--- /dev/null
+++ b/amdgcn/lib/mem_fence/waitcnt.ll
@@ -0,0 +1,11 @@
+declare void @llvm.amdgcn.s.waitcnt(i32) #0
+
+; Export waitcnt intrinsic for clang < 5
+define void @__clc_amdgcn_s_waitcnt(i32 %flags) #1 {
+entry:
+  tail call void @llvm.amdgcn.s.waitcnt(i32 %flags)
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind alwaysinline }
diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
index 5130632..deb9d70 100644
--- a/generic/include/clc/clc.h
+++ b/generic/include/clc/clc.h
@@ -179,6 +179,9 @@
 #include <clc/synchronization/cl_mem_fence_flags.h>
 #include <clc/synchronization/barrier.h>
 
+/* 6.11.9 Explicit Memory Fence Functions */
+#include <clc/explicit_fence/explicit_memory_fence.h>
+
 /* 6.11.10 Async Copy and Prefetch Functions */
 #include <clc/async/async_work_group_copy.h>
 #include <clc/async/async_work_group_strided_copy.h>
diff --git a/generic/include/clc/explicit_fence/explicit_memory_fence.h b/generic/include/clc/explicit_fence/explicit_memory_fence.h
new file mode 100644
index 0000000..8e046b1
--- /dev/null
+++ b/generic/include/clc/explicit_fence/explicit_memory_fence.h
@@ -0,0 +1,3 @@
+_CLC_DECL void mem_fence(cl_mem_fence_flags flags);
+_CLC_DECL void read_mem_fence(cl_mem_fence_flags flags);
+_CLC_DECL void write_mem_fence(cl_mem_fence_flags flags);
-- 
2.9.4



More information about the Libclc-dev mailing list