[Libclc-dev] [PATCH 3/4] read_mem_fence() support for R600 targets

Tom Stellard tom at stellard.net
Fri Aug 22 10:03:56 PDT 2014


On Fri, Aug 22, 2014 at 02:13:37AM +0200, Hilloulin Damien wrote:
>  This patch introduces three new intrinsics and therefore
>  must be used in conjunction with the patches to the LLVM backend.
> It adds the
>  read_mem_fence built-in to libclc, and an implementation for r600
> using the
>  newly introduced LLVM intrinsics. Plain copy&paste from the
> mem_fence patch,
>  with some find&replace.
> 

Same comments as patch #2.  Split into two patches, generic implementation
then r600 implementation and drop the globallocal intrinsic.

-Tom

> Signed-off-by: Damien Hilloulin <damien.hilloulin at supelec.fr>
> ---
>  generic/include/clc/clc.h                          |  1 +
>  .../clc/explicitmemoryfence/read_mem_fence.h       |  1 +
>  r600/lib/SOURCES                                   |  1 +
>  .../lib/explicitmemoryfence/read_mem_fence_impl.ll | 44
> ++++++++++++++++++++++
>  4 files changed, 47 insertions(+)
>  create mode 100644
> generic/include/clc/explicitmemoryfence/read_mem_fence.h
>  create mode 100644 r600/lib/explicitmemoryfence/read_mem_fence_impl.ll
> 
> diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
> index 13d9c11..3d65b36 100644
> --- a/generic/include/clc/clc.h
> +++ b/generic/include/clc/clc.h
> @@ -128,6 +128,7 @@
> 
>  /* 6.11.9 Explicit memory fence Functions */
>  #include <clc/explicitmemoryfence/mem_fence.h>
> +#include <clc/explicitmemoryfence/read_mem_fence.h>
> 
>  /* 6.11.10 Async Copy and Prefetch Functions */
>  #include <clc/async/prefetch.h>
> diff --git
> a/generic/include/clc/explicitmemoryfence/read_mem_fence.h
> b/generic/include/clc/explicitmemoryfence/read_mem_fence.h
> new file mode 100644
> index 0000000..519133c
> --- /dev/null
> +++ b/generic/include/clc/explicitmemoryfence/read_mem_fence.h
> @@ -0,0 +1 @@
> +_CLC_DECL void read_mem_fence(cl_mem_fence_flags flags);
> diff --git a/r600/lib/SOURCES b/r600/lib/SOURCES
> index 1a38b76..b2d143b 100644
> --- a/r600/lib/SOURCES
> +++ b/r600/lib/SOURCES
> @@ -1,5 +1,6 @@
>  atomic/atomic.cl
>  explicitmemoryfence/mem_fence_impl.ll
> +explicitmemoryfence/read_mem_fence_impl.ll
>  math/nextafter.cl
>  workitem/get_num_groups.ll
>  workitem/get_group_id.ll
> diff --git a/r600/lib/explicitmemoryfence/read_mem_fence_impl.ll
> b/r600/lib/explicitmemoryfence/read_mem_fence_impl.ll
> new file mode 100644
> index 0000000..3ae9cb2
> --- /dev/null
> +++ b/r600/lib/explicitmemoryfence/read_mem_fence_impl.ll
> @@ -0,0 +1,44 @@
> +declare i32 @__clc_clk_local_mem_fence() nounwind alwaysinline
> +declare i32 @__clc_clk_global_mem_fence() nounwind alwaysinline
> +declare void @llvm.AMDGPU.read_mem_fence.local() nounwind noduplicate
> +declare void @llvm.AMDGPU.read_mem_fence.global() nounwind noduplicate
> +declare void @llvm.AMDGPU.read_mem_fence.localglobal() nounwind noduplicate
> +
> +define void @read_mem_fence(i32 %flags) nounwind noduplicate alwaysinline {
> +
> +;flags_masking:
> +  %CLK_LOCAL_READ_MEM_FENCE = call i32 @__clc_clk_local_mem_fence()
> +  %CLK_GLOBAL_READ_MEM_FENCE = call i32 @__clc_clk_global_mem_fence()
> +  %CLK_LOCAL_GLOBAL_READ_MEM_FENCE = or i32
> %CLK_LOCAL_READ_MEM_FENCE, %CLK_GLOBAL_READ_MEM_FENCE
> +  %FLAGS_READ_MEM_FENCE_LOCAL_MASKED = and i32 %flags,
> %CLK_LOCAL_READ_MEM_FENCE
> +  %FLAGS_READ_MEM_FENCE_GLOBAL_MASKED = and i32 %flags,
> %CLK_GLOBAL_READ_MEM_FENCE
> +
> +;read_mem_fence_local_and_global_test:
> +  %READ_MEM_FENCE_LOCAL_AND_GLOBAL_TEST_RESULT = icmp eq i32
> %flags, %CLK_LOCAL_GLOBAL_READ_MEM_FENCE
> +  br i1 %READ_MEM_FENCE_LOCAL_AND_GLOBAL_TEST_RESULT, label
> %read_mem_fence_local_and_global, label %read_mem_fence_local_test
> +
> +read_mem_fence_local_and_global:
> +  call void @llvm.AMDGPU.read_mem_fence.localglobal() noduplicate
> +  br label %done
> +
> +read_mem_fence_local_test:
> +  %READ_MEM_FENCE_LOCAL_TEST_RESULT = icmp ne i32
> %FLAGS_READ_MEM_FENCE_LOCAL_MASKED, 0
> +  br i1 %READ_MEM_FENCE_LOCAL_TEST_RESULT, label
> %read_mem_fence_local, label %read_mem_fence_global_test
> +
> +read_mem_fence_local:
> +  call void @llvm.AMDGPU.read_mem_fence.local() noduplicate
> +  br label %done
> +
> +read_mem_fence_global_test:
> +  %READ_MEM_FENCE_GLOBAL_TEST_RESULT = icmp ne i32
> %FLAGS_READ_MEM_FENCE_GLOBAL_MASKED, 0
> +  br i1 %READ_MEM_FENCE_GLOBAL_TEST_RESULT, label
> %read_mem_fence_global, label %done
> +
> +read_mem_fence_global:
> +  call void @llvm.AMDGPU.read_mem_fence.global() noduplicate
> +  br label %done
> +
> +; nothing to do if flags equals 0
> +
> +done:
> +  ret void
> +}
> -- 
> 1.9.1
> 
> 
> _______________________________________________
> Libclc-dev mailing list
> Libclc-dev at pcc.me.uk
> http://www.pcc.me.uk/cgi-bin/mailman/listinfo/libclc-dev




More information about the Libclc-dev mailing list