[PATCH] R600: Add LDS_MAX_[U]INT and LDS_MAX_[U]INT_RET for Evergreen

Matt Arsenault arsenm2 at gmail.com
Mon Sep 8 17:49:06 PDT 2014


On Sep 8, 2014, at 7:49 PM, Aaron Watry <awatry at gmail.com> wrote:

> This was only present for SI before.
> 
> Cayman may still be missing, but I am unable to test that currently.
> 
> Signed-off-by: Aaron Watry <awatry at gmail.com>
> CC: Tom Stellard <thomas.stellard at amd.com>
> CC: Matt Arsenault <matthew.arsenault at amd.com>
> ---
> lib/Target/R600/EvergreenInstructions.td |  8 ++++
> test/CodeGen/R600/atomic_load_max.ll     | 75 ++++++++++++++++++++++++++++++++
> 2 files changed, 83 insertions(+)
> create mode 100644 test/CodeGen/R600/atomic_load_max.ll
> 
> diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td
> index ea3f550..338b6b3 100644
> --- a/lib/Target/R600/EvergreenInstructions.td
> +++ b/lib/Target/R600/EvergreenInstructions.td
> @@ -489,6 +489,8 @@ class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> :
> 
> def LDS_ADD : R600_LDS_1A1D_NORET <0x0, "LDS_ADD", [] >;
> def LDS_SUB : R600_LDS_1A1D_NORET <0x1, "LDS_SUB", [] >;
> +def LDS_MAX_INT : R600_LDS_1A1D_NORET <0x6, "LDS_MAX_INT", [] >;
> +def LDS_MAX_UINT : R600_LDS_1A1D_NORET <0x8, "LDS_MAX_UINT", [] >;
> def LDS_WRITE : R600_LDS_1A1D_NORET <0xD, "LDS_WRITE",
>   [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)]
>> ;
> @@ -504,6 +506,12 @@ def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD",
> def LDS_SUB_RET : R600_LDS_1A1D_RET <0x21, "LDS_SUB",
>   [(set i32:$dst, (atomic_load_sub_local i32:$src0, i32:$src1))]
>> ;
> +def LDS_MAX_INT_RET : R600_LDS_1A1D_RET <0x26, "LDS_MAX_INT",
> +  [(set i32:$dst, (atomic_load_max_local i32:$src0, i32:$src1))]
> +>;
> +def LDS_MAX_UINT_RET : R600_LDS_1A1D_RET <0x28, "LDS_MAX_UINT",
> +  [(set i32:$dst, (atomic_load_umax_local i32:$src0, i32:$src1))]
> +>;
> def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET",
>   [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))]
>> ;


LGTM. The test is largely redundant. local-atomics.ll / local-atomics64.ll already should test all but 1 of the operations atomicrmw has, so the only reason to split these into a separate test is since pre-SI doesn’t yet implement all of the other operations. If it’s not too much work to fill in the others for Evergreen, it would be better to keep them all in the same test. If not, the SI check lines can probably be dropped. These are testing with volatile, which those tests don’t, but I don’t think it’s really necessary to test both


> diff --git a/test/CodeGen/R600/atomic_load_max.ll b/test/CodeGen/R600/atomic_load_max.ll
> new file mode 100644
> index 0000000..ea95087
> --- /dev/null
> +++ b/test/CodeGen/R600/atomic_load_max.ll
> @@ -0,0 +1,75 @@
> +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
> +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
> +
> +; FUNC-LABEL: @atomic_max_local
> +; R600: LDS_MAX_INT *
> +; SI: DS_MAX_RTN_I32
> +define void @atomic_max_local(i32 addrspace(3)* %local) {
> +   %unused = atomicrmw volatile max i32 addrspace(3)* %local, i32 5 seq_cst
> +   ret void
> +}
> +
> +; FUNC-LABEL: @atomic_max_local_const_offset
> +; R600: LDS_MAX_INT *
> +; SI: DS_MAX_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
> +define void @atomic_max_local_const_offset(i32 addrspace(3)* %local) {
> +  %gep = getelementptr i32 addrspace(3)* %local, i32 4
> +  %val = atomicrmw volatile max i32 addrspace(3)* %gep, i32 5 seq_cst
> +  ret void
> +}
> +
> +; FUNC-LABEL: @atomic_max_ret_local
> +; R600: LDS_MAX_INT_RET *
> +; SI: DS_MAX_RTN_I32
> +define void @atomic_max_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
> +  %val = atomicrmw volatile max i32 addrspace(3)* %local, i32 5 seq_cst
> +  store i32 %val, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FUNC-LABEL: @atomic_max_ret_local_const_offset
> +; R600: LDS_MAX_INT_RET *
> +; SI: DS_MAX_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x14
> +define void @atomic_max_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
> +  %gep = getelementptr i32 addrspace(3)* %local, i32 5
> +  %val = atomicrmw volatile max i32 addrspace(3)* %gep, i32 5 seq_cst
> +  store i32 %val, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FUNC-LABEL: @atomic_umax_local
> +; R600: LDS_MAX_UINT *
> +; SI: DS_MAX_RTN_U32
> +define void @atomic_umax_local(i32 addrspace(3)* %local) {
> +   %unused = atomicrmw volatile umax i32 addrspace(3)* %local, i32 5 seq_cst
> +   ret void
> +}
> +
> +; FUNC-LABEL: @atomic_umax_local_const_offset
> +; R600: LDS_MAX_UINT *
> +; SI: DS_MAX_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
> +define void @atomic_umax_local_const_offset(i32 addrspace(3)* %local) {
> +  %gep = getelementptr i32 addrspace(3)* %local, i32 4
> +  %val = atomicrmw volatile umax i32 addrspace(3)* %gep, i32 5 seq_cst
> +  ret void
> +}
> +
> +; FUNC-LABEL: @atomic_umax_ret_local
> +; R600: LDS_MAX_UINT_RET *
> +; SI: DS_MAX_RTN_U32
> +define void @atomic_umax_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
> +  %val = atomicrmw volatile umax i32 addrspace(3)* %local, i32 5 seq_cst
> +  store i32 %val, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FUNC-LABEL: @atomic_umax_ret_local_const_offset
> +; R600: LDS_MAX_UINT_RET *
> +; SI: DS_MAX_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x14
> +define void @atomic_umax_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
> +  %gep = getelementptr i32 addrspace(3)* %local, i32 5
> +  %val = atomicrmw volatile umax i32 addrspace(3)* %gep, i32 5 seq_cst
> +  store i32 %val, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> -- 
> 1.9.1
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits





More information about the llvm-commits mailing list