[PATCH] R600: Add support for LDS atomic subtract

Tom Stellard tom at stellard.net
Thu Sep 5 11:39:41 PDT 2013


On Wed, Sep 04, 2013 at 05:22:48PM -0500, Aaron Watry wrote:

This patch is

Reviewed-by: Tom Stellard <thomas.stellard at amd.com>

However, there is a regression int the LLVM backend that breaks all of
the local store tests on evergreen.  I think this may be related to one
of Vincent's recent patches, so don't be alarmed if you rebase and all
the atomic tests are failing.

-Tom

> ---
>  lib/Target/R600/AMDGPUInstructions.td  |  5 +++++
>  lib/Target/R600/R600Instructions.td    |  4 ++++
>  lib/Target/R600/SIInstructions.td      |  4 ++++
>  lib/Target/R600/SILowerControlFlow.cpp |  1 +
>  test/CodeGen/R600/atomic_load_sub.ll   | 23 +++++++++++++++++++++++
>  5 files changed, 37 insertions(+)
>  create mode 100644 test/CodeGen/R600/atomic_load_sub.ll
> 
> diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
> index 6745fed..e30abc0 100644
> --- a/lib/Target/R600/AMDGPUInstructions.td
> +++ b/lib/Target/R600/AMDGPUInstructions.td
> @@ -196,6 +196,11 @@ def atomic_load_add_local : PatFrag<(ops node:$ptr, node:$value),
>    return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
>  }]>;
>  
> +def atomic_load_sub_local : PatFrag<(ops node:$ptr, node:$value),
> +                                    (atomic_load_sub node:$ptr, node:$value), [{
> +  return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
> +}]>;
> +
>  def mskor_global : PatFrag<(ops node:$val, node:$ptr),
>                              (AMDGPUstore_mskor node:$val, node:$ptr), [{
>    return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
> diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
> index 76c3c4f..7b4d550 100644
> --- a/lib/Target/R600/R600Instructions.td
> +++ b/lib/Target/R600/R600Instructions.td
> @@ -1687,6 +1687,7 @@ class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> :
>  }
>  
>  def LDS_ADD : R600_LDS_1A1D_NORET <0x0, "LDS_ADD", [] >;
> +def LDS_SUB : R600_LDS_1A1D_NORET <0x1, "LDS_SUB", [] >;
>  def LDS_WRITE : R600_LDS_1A1D_NORET <0xD, "LDS_WRITE",
>    [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)]
>  >;
> @@ -1699,6 +1700,9 @@ def LDS_SHORT_WRITE : R600_LDS_1A1D_NORET<0x13, "LDS_SHORT_WRITE",
>  def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD",
>    [(set i32:$dst, (atomic_load_add_local i32:$src0, i32:$src1))]
>  >;
> +def LDS_SUB_RET : R600_LDS_1A1D_RET <0x21, "LDS_SUB",
> +  [(set i32:$dst, (atomic_load_sub_local i32:$src0, i32:$src1))]
> +>;
>  def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET",
>    [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))]
>  >;
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 31a5ad2..14a189a 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -392,6 +392,7 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
>  } // End isCompare = 1
>  
>  def DS_ADD_U32_RTN : DS_1A1D_RET <0x20, "DS_ADD_U32_RTN", VReg_32>;
> +def DS_SUB_U32_RTN : DS_1A1D_RET <0x21, "DS_SUB_U32_RTN", VReg_32>;
>  def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
>  def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>;
>  def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>;
> @@ -1779,6 +1780,9 @@ def : DSWritePat <DS_WRITE_B32, i32, local_store>;
>  def : Pat <(atomic_load_add_local i32:$ptr, i32:$val),
>             (DS_ADD_U32_RTN 0, $ptr, $val, 0, 0)>;
>  
> +def : Pat <(atomic_load_sub_local i32:$ptr, i32:$val),
> +           (DS_SUB_U32_RTN 0, $ptr, $val, 0, 0)>;
> +
>  /********** ================== **********/
>  /**********   SMRD Patterns    **********/
>  /********** ================== **********/
> diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
> index 89c2b04..480cedf 100644
> --- a/lib/Target/R600/SILowerControlFlow.cpp
> +++ b/lib/Target/R600/SILowerControlFlow.cpp
> @@ -488,6 +488,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
>            // Fall through
>          case AMDGPU::DS_WRITE_B32:
>          case AMDGPU::DS_ADD_U32_RTN:
> +        case AMDGPU::DS_SUB_U32_RTN:
>            NeedM0 = true;
>            break;
>  
> diff --git a/test/CodeGen/R600/atomic_load_sub.ll b/test/CodeGen/R600/atomic_load_sub.ll
> new file mode 100644
> index 0000000..e256f07
> --- /dev/null
> +++ b/test/CodeGen/R600/atomic_load_sub.ll
> @@ -0,0 +1,23 @@
> +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
> +; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
> +
> +; R600-CHECK-LABEL: @atomic_sub_local
> +; R600-CHECK: LDS_SUB *
> +; SI-CHECK-LABEL: @atomic_sub_local
> +; SI-CHECK: DS_SUB_U32_RTN 0
> +define void @atomic_sub_local(i32 addrspace(3)* %local) {
> +entry:
> +   %0 = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
> +   ret void
> +}
> +
> +; R600-CHECK-LABEL: @atomic_sub_ret_local
> +; R600-CHECK: LDS_SUB_RET *
> +; SI-CHECK-LABEL: @atomic_sub_ret_local
> +; SI-CHECK: DS_SUB_U32_RTN 0
> +define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
> +entry:
> +  %0 = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
> +  store i32 %0, i32 addrspace(1)* %out
> +  ret void
> +}
> -- 
> 1.8.1.2
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits



More information about the llvm-commits mailing list