[Libclc-dev] [PATCH 2/4] mem_fence() support for R600 targets
Hilloulin Damien
damien.hilloulin at supelec.fr
Thu Aug 21 17:13:30 PDT 2014
This patch introduces three new intrinsics and therefore
must be used in conjunction with the patches to the LLVM backend. It
adds the
mem_fence built-in function, and an implementation for r600 using the
newly
introduced LLVM intrinsics. (It is mainly copy-paste from
barrier_impl.ll but
if flags equals 0, we do nothing).
Signed-off-by: Damien Hilloulin <damien.hilloulin at supelec.fr>
---
generic/include/clc/clc.h | 3 ++
.../include/clc/explicitmemoryfence/mem_fence.h | 1 +
r600/lib/SOURCES | 1 +
r600/lib/explicitmemoryfence/mem_fence_impl.ll | 44
++++++++++++++++++++++
4 files changed, 49 insertions(+)
create mode 100644 generic/include/clc/explicitmemoryfence/mem_fence.h
create mode 100644 r600/lib/explicitmemoryfence/mem_fence_impl.ll
diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
index 84b25ac..13d9c11 100644
--- a/generic/include/clc/clc.h
+++ b/generic/include/clc/clc.h
@@ -126,6 +126,9 @@
#include <clc/synchronization/cl_mem_fence_flags.h>
#include <clc/synchronization/barrier.h>
+/* 6.11.9 Explicit memory fence Functions */
+#include <clc/explicitmemoryfence/mem_fence.h>
+
/* 6.11.10 Async Copy and Prefetch Functions */
#include <clc/async/prefetch.h>
diff --git a/generic/include/clc/explicitmemoryfence/mem_fence.h
b/generic/include/clc/explicitmemoryfence/mem_fence.h
new file mode 100644
index 0000000..15f4b39
--- /dev/null
+++ b/generic/include/clc/explicitmemoryfence/mem_fence.h
@@ -0,0 +1 @@
+_CLC_DECL void mem_fence(cl_mem_fence_flags flags);
diff --git a/r600/lib/SOURCES b/r600/lib/SOURCES
index ba56605..1a38b76 100644
--- a/r600/lib/SOURCES
+++ b/r600/lib/SOURCES
@@ -1,4 +1,5 @@
atomic/atomic.cl
+explicitmemoryfence/mem_fence_impl.ll
math/nextafter.cl
workitem/get_num_groups.ll
workitem/get_group_id.ll
diff --git a/r600/lib/explicitmemoryfence/mem_fence_impl.ll
b/r600/lib/explicitmemoryfence/mem_fence_impl.ll
new file mode 100644
index 0000000..b04be89
--- /dev/null
+++ b/r600/lib/explicitmemoryfence/mem_fence_impl.ll
@@ -0,0 +1,44 @@
+declare i32 @__clc_clk_local_mem_fence() nounwind alwaysinline
+declare i32 @__clc_clk_global_mem_fence() nounwind alwaysinline
+declare void @llvm.AMDGPU.mem_fence.local() nounwind noduplicate
+declare void @llvm.AMDGPU.mem_fence.global() nounwind noduplicate
+declare void @llvm.AMDGPU.mem_fence.localglobal() nounwind noduplicate
+
+define void @mem_fence(i32 %flags) nounwind noduplicate alwaysinline {
+
+;flags_masking:
+ %CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence()
+ %CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence()
+ %CLK_LOCAL_GLOBAL_MEM_FENCE = or i32 %CLK_LOCAL_MEM_FENCE,
%CLK_GLOBAL_MEM_FENCE
+ %FLAGS_MEM_FENCE_LOCAL_MASKED = and i32 %flags, %CLK_LOCAL_MEM_FENCE
+ %FLAGS_MEM_FENCE_GLOBAL_MASKED = and i32 %flags, %CLK_GLOBAL_MEM_FENCE
+
+;mem_fence_local_and_global_test:
+ %MEM_FENCE_LOCAL_AND_GLOBAL_TEST_RESULT = icmp eq i32 %flags,
%CLK_LOCAL_GLOBAL_MEM_FENCE
+ br i1 %MEM_FENCE_LOCAL_AND_GLOBAL_TEST_RESULT, label
%mem_fence_local_and_global, label %mem_fence_local_test
+
+mem_fence_local_and_global:
+ call void @llvm.AMDGPU.mem_fence.localglobal() noduplicate
+ br label %done
+
+mem_fence_local_test:
+ %MEM_FENCE_LOCAL_TEST_RESULT = icmp ne i32
%FLAGS_MEM_FENCE_LOCAL_MASKED, 0
+ br i1 %MEM_FENCE_LOCAL_TEST_RESULT, label %mem_fence_local, label
%mem_fence_global_test
+
+mem_fence_local:
+ call void @llvm.AMDGPU.mem_fence.local() noduplicate
+ br label %done
+
+mem_fence_global_test:
+ %MEM_FENCE_GLOBAL_TEST_RESULT = icmp ne i32
%FLAGS_MEM_FENCE_GLOBAL_MASKED, 0
+ br i1 %MEM_FENCE_GLOBAL_TEST_RESULT, label %mem_fence_global, label %done
+
+mem_fence_global:
+ call void @llvm.AMDGPU.mem_fence.global() noduplicate
+ br label %done
+
+; nothing to do if flags equals 0
+
+done:
+ ret void
+}
--
1.9.1
More information about the Libclc-dev
mailing list