[Libclc-dev] [PATCH 1/4] Fix and improvements to barrier() for R600 targets

Tom Stellard tom at stellard.net
Fri Aug 22 09:45:36 PDT 2014


On Fri, Aug 22, 2014 at 02:13:24AM +0200, Hilloulin Damien wrote:
>  This patch introduces two new intrinsics and therefore
>  must be used in conjunction with the patches to the LLVM backend.

Hi,

It looks like your mail client has automatically wrapped long lines,
which makes the commit comment hard to read.  You should use
git send-email to submit your patches.


> It fixes the
>  behaviour of barrier(0) : previously no barrier was generated, this
> is fixed

I think the old behavior is correct.  Does the specification say we need
to emit a barrier when the argument is 0?

>  by making a call to the new intrinsic barrier.nofence(). The patch also
>  changes the behaviour of barrier( CLK_LOCAL_MEM_FENCE |
> CLK_GLOBAL_MEM_FENCE
>  ) : previously two barriers were generated, now just one with the new
>  intrinsic barrier.localglobal().
> 

I don't think this is the place to put this optimization.  libclc
shouldn't know that llvm.AMDGPU.barrier.local and llvm.AMDGPU.barrier.global
are lowered to the same instruction.  We should be doing this optimization
in the LLVM backend.

-Tom

> Signed-off-by: Damien Hilloulin <damien.hilloulin at supelec.fr>
> ---
>  r600/lib/synchronization/barrier_impl.ll | 37
> ++++++++++++++++++++++++--------
>  1 file changed, 28 insertions(+), 9 deletions(-)
> 
> diff --git a/r600/lib/synchronization/barrier_impl.ll
> b/r600/lib/synchronization/barrier_impl.ll
> index 3d8ee66..3ef6341 100644
> --- a/r600/lib/synchronization/barrier_impl.ll
> +++ b/r600/lib/synchronization/barrier_impl.ll
> @@ -1,29 +1,48 @@
>  declare i32 @__clc_clk_local_mem_fence() nounwind alwaysinline
>  declare i32 @__clc_clk_global_mem_fence() nounwind alwaysinline
> +declare void @llvm.AMDGPU.barrier.nofence() nounwind noduplicate
>  declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
>  declare void @llvm.AMDGPU.barrier.global() nounwind noduplicate
> +declare void @llvm.AMDGPU.barrier.localglobal() nounwind noduplicate
> 
>  define void @barrier(i32 %flags) nounwind noduplicate alwaysinline {
> -barrier_local_test:
> +
> +;flags_masking:
>    %CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence()
> -  %0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE
> -  %1 = icmp ne i32 %0, 0
> -  br i1 %1, label %barrier_local, label %barrier_global_test
> +  %CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence()
> +  %CLK_LOCAL_GLOBAL_MEM_FENCE = or i32 %CLK_LOCAL_MEM_FENCE,
> %CLK_GLOBAL_MEM_FENCE
> +  %FLAGS_BARRIER_LOCAL_MASKED = and i32 %flags, %CLK_LOCAL_MEM_FENCE
> +  %FLAGS_BARRIER_GLOBAL_MASKED = and i32 %flags, %CLK_GLOBAL_MEM_FENCE
> +
> +;barrier_local_and_global_test:
> +  %BARRIER_LOCAL_AND_GLOBAL_TEST_RESULT = icmp eq i32 %flags,
> %CLK_LOCAL_GLOBAL_MEM_FENCE
> +  br i1 %BARRIER_LOCAL_AND_GLOBAL_TEST_RESULT, label
> %barrier_local_and_global, label %barrier_local_test
> +
> +barrier_local_and_global:
> +  call void @llvm.AMDGPU.barrier.localglobal() noduplicate
> +  br label %done
> +
> +barrier_local_test:
> +  %BARRIER_LOCAL_TEST_RESULT = icmp ne i32 %FLAGS_BARRIER_LOCAL_MASKED, 0
> +  br i1 %BARRIER_LOCAL_TEST_RESULT, label %barrier_local, label
> %barrier_global_test
> 
>  barrier_local:
>    call void @llvm.AMDGPU.barrier.local() noduplicate
> -  br label %barrier_global_test
> +  br label %done
> 
>  barrier_global_test:
> -  %CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence()
> -  %2 = and i32 %flags, %CLK_GLOBAL_MEM_FENCE
> -  %3 = icmp ne i32 %2, 0
> -  br i1 %3, label %barrier_global, label %done
> +  %BARRIER_GLOBAL_TEST_RESULT = icmp ne i32 %FLAGS_BARRIER_GLOBAL_MASKED, 0
> +  br i1 %BARRIER_GLOBAL_TEST_RESULT, label %barrier_global, label
> %barrier_nofence
> 
>  barrier_global:
>    call void @llvm.AMDGPU.barrier.global() noduplicate
>    br label %done
> 
> +; default case: no memory fence queued
> +barrier_nofence:
> +  call void @llvm.AMDGPU.barrier.nofence() noduplicate
> +  br label %done
> +
>  done:
>    ret void
>  }
> -- 
> 1.9.1
> 
> 
> _______________________________________________
> Libclc-dev mailing list
> Libclc-dev at pcc.me.uk
> http://www.pcc.me.uk/cgi-bin/mailman/listinfo/libclc-dev




More information about the Libclc-dev mailing list