[PATCH] R600/SI: Add a pattern for i32 ctpop inside a branch

Matt Arsenault arsenm2 at gmail.com
Wed Jun 18 10:28:47 PDT 2014


On Jun 18, 2014, at 9:25 AM, Tom Stellard <thomas.stellard at amd.com> wrote:

> ---
> lib/Target/R600/SIInstructions.td |  5 +++++
> test/CodeGen/R600/ctpop.ll        | 30 ++++++++++++++++++++++++++++++
> 2 files changed, 35 insertions(+)
> 
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 7dde8c3..2121a1e 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -2714,6 +2714,11 @@ def : Pat <
>> ;
> 
> def : Pat <
> +   (i32 (ctpop i32:$popcnt)),
> +   (V_BCNT_U32_B32_e32 $popcnt, (V_MOV_B32_e32 0))
> +>;
> +
> +def : Pat <
>   (i64 (ctpop i64:$src)),
>   (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
>     (S_BCNT1_I32_B64 $src), sub0),
> diff --git a/test/CodeGen/R600/ctpop.ll b/test/CodeGen/R600/ctpop.ll
> index e4d11e0..15be8e1 100644
> --- a/test/CodeGen/R600/ctpop.ll
> +++ b/test/CodeGen/R600/ctpop.ll
> @@ -252,3 +252,33 @@ define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrsp
>   store i32 %add, i32 addrspace(1)* %out, align 4
>   ret void
> }
> +
> +; FIXME: We currently disallow SALU instructions in all branches,
> +; but there are some cases when the should be allowed.
> +
> +; FUNC-LABEL: @ctpop_i32_in_br
> +; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
> +; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0
> +; SI: BUFFER_STORE_DWORD [[RESULT]],
> +; SI: S_ENDPGM
> +; EG: BCNT_INT
> +define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond) {
> +entry:
> +  %0 = icmp eq i32 %cond, 0
> +  br i1 %0, label %if, label %else
> +
> +if:
> +  %1 = load i32 addrspace(1)* %in
> +  %2 = call i32 @llvm.ctpop.i32(i32 %1)
> +  br label %endif
> +
> +else:
> +  %3 = getelementptr i32 addrspace(1)* %in, i32 1
> +  %4 = load i32 addrspace(1)* %3
> +  br label %endif
> +
> +endif:
> +  %5 = phi i32 [%2, %if], [%4, %else]
> +  store i32 %5, i32 addrspace(1)* %out
> +  ret void
> +}
> -- 
> 1.8.1.5

Why not also i64, unless this is supposed to be a very temporary hack



More information about the llvm-commits mailing list