[Libclc-dev] [PATCH 4/4] write_mem_fence() support for R600 targets

Hilloulin Damien damien.hilloulin at supelec.fr
Thu Aug 21 17:13:41 PDT 2014


  This patch introduces three new intrinsics and therefore
  must be used in conjunction with the patches to the LLVM backend. It 
adds the
  write_mem_fence built-in to libclc, and an implementation for r600 
using the
  newly introduced LLVM intrinsics. Plain copy&paste from the read_mem_fence
  patch, with some find&replace.

Signed-off-by: Damien Hilloulin <damien.hilloulin at supelec.fr>
---
  generic/include/clc/clc.h                          |  1 +
  .../clc/explicitmemoryfence/write_mem_fence.h      |  1 +
  r600/lib/SOURCES                                   |  1 +
  .../explicitmemoryfence/write_mem_fence_impl.ll    | 44 
++++++++++++++++++++++
  4 files changed, 47 insertions(+)
  create mode 100644 
generic/include/clc/explicitmemoryfence/write_mem_fence.h
  create mode 100644 r600/lib/explicitmemoryfence/write_mem_fence_impl.ll

diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
index 3d65b36..4834ce4 100644
--- a/generic/include/clc/clc.h
+++ b/generic/include/clc/clc.h
@@ -129,6 +129,7 @@
  /* 6.11.9 Explicit memory fence Functions */
  #include <clc/explicitmemoryfence/mem_fence.h>
  #include <clc/explicitmemoryfence/read_mem_fence.h>
+#include <clc/explicitmemoryfence/write_mem_fence.h>

  /* 6.11.10 Async Copy and Prefetch Functions */
  #include <clc/async/prefetch.h>
diff --git a/generic/include/clc/explicitmemoryfence/write_mem_fence.h 
b/generic/include/clc/explicitmemoryfence/write_mem_fence.h
new file mode 100644
index 0000000..d0e31f8
--- /dev/null
+++ b/generic/include/clc/explicitmemoryfence/write_mem_fence.h
@@ -0,0 +1 @@
+_CLC_DECL void write_mem_fence(cl_mem_fence_flags flags);
diff --git a/r600/lib/SOURCES b/r600/lib/SOURCES
index b2d143b..b2d82a5 100644
--- a/r600/lib/SOURCES
+++ b/r600/lib/SOURCES
@@ -1,6 +1,7 @@
  atomic/atomic.cl
  explicitmemoryfence/mem_fence_impl.ll
  explicitmemoryfence/read_mem_fence_impl.ll
+explicitmemoryfence/write_mem_fence_impl.ll
  math/nextafter.cl
  workitem/get_num_groups.ll
  workitem/get_group_id.ll
diff --git a/r600/lib/explicitmemoryfence/write_mem_fence_impl.ll 
b/r600/lib/explicitmemoryfence/write_mem_fence_impl.ll
new file mode 100644
index 0000000..74ae898
--- /dev/null
+++ b/r600/lib/explicitmemoryfence/write_mem_fence_impl.ll
@@ -0,0 +1,44 @@
+declare i32 @__clc_clk_local_mem_fence() nounwind alwaysinline
+declare i32 @__clc_clk_global_mem_fence() nounwind alwaysinline
+declare void @llvm.AMDGPU.write_mem_fence.local() nounwind noduplicate
+declare void @llvm.AMDGPU.write_mem_fence.global() nounwind noduplicate
+declare void @llvm.AMDGPU.write_mem_fence.localglobal() nounwind 
noduplicate
+
+define void @write_mem_fence(i32 %flags) nounwind noduplicate 
alwaysinline {
+
+;flags_masking:
+  %CLK_LOCAL_WRITE_MEM_FENCE = call i32 @__clc_clk_local_mem_fence()
+  %CLK_GLOBAL_WRITE_MEM_FENCE = call i32 @__clc_clk_global_mem_fence()
+  %CLK_LOCAL_GLOBAL_WRITE_MEM_FENCE = or i32 
%CLK_LOCAL_WRITE_MEM_FENCE, %CLK_GLOBAL_WRITE_MEM_FENCE
+  %FLAGS_WRITE_MEM_FENCE_LOCAL_MASKED = and i32 %flags, 
%CLK_LOCAL_WRITE_MEM_FENCE
+  %FLAGS_WRITE_MEM_FENCE_GLOBAL_MASKED = and i32 %flags, 
%CLK_GLOBAL_WRITE_MEM_FENCE
+
+;write_mem_fence_local_and_global_test:
+  %WRITE_MEM_FENCE_LOCAL_AND_GLOBAL_TEST_RESULT = icmp eq i32 %flags, 
%CLK_LOCAL_GLOBAL_WRITE_MEM_FENCE
+  br i1 %WRITE_MEM_FENCE_LOCAL_AND_GLOBAL_TEST_RESULT, label 
%write_mem_fence_local_and_global, label %write_mem_fence_local_test
+
+write_mem_fence_local_and_global:
+  call void @llvm.AMDGPU.write_mem_fence.localglobal() noduplicate
+  br label %done
+
+write_mem_fence_local_test:
+  %WRITE_MEM_FENCE_LOCAL_TEST_RESULT = icmp ne i32 
%FLAGS_WRITE_MEM_FENCE_LOCAL_MASKED, 0
+  br i1 %WRITE_MEM_FENCE_LOCAL_TEST_RESULT, label 
%write_mem_fence_local, label %write_mem_fence_global_test
+
+write_mem_fence_local:
+  call void @llvm.AMDGPU.write_mem_fence.local() noduplicate
+  br label %done
+
+write_mem_fence_global_test:
+  %WRITE_MEM_FENCE_GLOBAL_TEST_RESULT = icmp ne i32 
%FLAGS_WRITE_MEM_FENCE_GLOBAL_MASKED, 0
+  br i1 %WRITE_MEM_FENCE_GLOBAL_TEST_RESULT, label 
%write_mem_fence_global, label %done
+
+write_mem_fence_global:
+  call void @llvm.AMDGPU.write_mem_fence.global() noduplicate
+  br label %done
+
+; nothing to do if flags equals 0
+
+done:
+  ret void
+}
-- 
1.9.1





More information about the Libclc-dev mailing list