[PATCH] R600/SI: Use same complex patterns for DS atomics

Tom Stellard tom at stellard.net
Fri Sep 5 06:54:16 PDT 2014


On Fri, Sep 05, 2014 at 06:20:06AM +0000, Matt Arsenault wrote:
> This fixes hitting the same negative base offset problem 
> that was already fixed for regular loads and stores.

LGTM.

> 
> http://reviews.llvm.org/D5203
> 
> Files:
>   lib/Target/R600/SIInstructions.td
>   test/CodeGen/R600/atomic_cmp_swap_local.ll
>   test/CodeGen/R600/local-atomics.ll

> Index: lib/Target/R600/SIInstructions.td
> ===================================================================
> --- lib/Target/R600/SIInstructions.td
> +++ lib/Target/R600/SIInstructions.td
> @@ -2572,17 +2572,10 @@
>                          (EXTRACT_SUBREG $value, sub1), $offset0, $offset1)
>  >;
>  
> -multiclass DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> {
> -  def : Pat <
> -    (frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$value),
> -    (inst (i1 0), $ptr, $value, (as_i16imm $offset))
> -  >;
> -
> -  def : Pat <
> -    (frag i32:$ptr, vt:$val),
> -    (inst 0, $ptr, $val, 0)
> -  >;
> -}
> +class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
> +  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
> +  (inst (i1 0), $ptr, $value, (as_i16imm $offset))
> +>;
>  
>  // Special case of DSAtomicRetPat for add / sub 1 -> inc / dec
>  //
> @@ -2594,69 +2587,56 @@
>  // We also load this -1 with s_mov_b32 / s_mov_b64 even though this
>  // needs to be a VGPR. The SGPR copy pass will fix this, and it's
>  // easier since there is no v_mov_b64.
> -multiclass DSAtomicIncRetPat<DS inst, ValueType vt,
> -                             Instruction LoadImm, PatFrag frag> {
> -  def : Pat <
> -    (frag (add i32:$ptr, (i32 IMM16bit:$offset)), (vt 1)),
> -    (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset))
> -  >;
> -
> -  def : Pat <
> -    (frag i32:$ptr, (vt 1)),
> -    (inst 0, $ptr, (LoadImm (vt -1)), 0)
> -  >;
> -}
> +class DSAtomicIncRetPat<DS inst, ValueType vt,
> +                        Instruction LoadImm, PatFrag frag> : Pat <
> +  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
> +  (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset))
> +>;
>  
> -multiclass DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> {
> -  def : Pat <
> -    (frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$cmp, vt:$swap),
> -    (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset))
> -  >;
>  
> -  def : Pat <
> -    (frag i32:$ptr, vt:$cmp, vt:$swap),
> -    (inst 0, $ptr, $cmp, $swap, 0)
> -  >;
> -}
> +class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
> +  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
> +  (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset))
> +>;
>  
>  
>  // 32-bit atomics.
> -defm : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
> -                         S_MOV_B32, atomic_load_add_local>;
> -defm : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
> -                         S_MOV_B32, atomic_load_sub_local>;
> -
> -defm : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>;
> -defm : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>;
> -defm : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>;
> -defm : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>;
> -defm : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>;
> -defm : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>;
> -defm : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>;
> -defm : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>;
> -defm : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>;
> -defm : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>;
> -
> -defm : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>;
> +def : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
> +                        S_MOV_B32, atomic_load_add_local>;
> +def : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
> +                        S_MOV_B32, atomic_load_sub_local>;
> +
> +def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>;
> +def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>;
> +def : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>;
> +def : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>;
> +def : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>;
> +def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>;
> +def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>;
> +def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>;
> +def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>;
> +def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>;
> +
> +def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>;
>  
>  // 64-bit atomics.
> -defm : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
> -                         S_MOV_B64, atomic_load_add_local>;
> -defm : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
> -                         S_MOV_B64, atomic_load_sub_local>;
> -
> -defm : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>;
> -defm : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>;
> -defm : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>;
> -defm : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>;
> -defm : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>;
> -defm : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>;
> -defm : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>;
> -defm : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>;
> -defm : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>;
> -defm : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>;
> -
> -defm : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
> +def : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
> +                        S_MOV_B64, atomic_load_add_local>;
> +def : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
> +                        S_MOV_B64, atomic_load_sub_local>;
> +
> +def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>;
> +def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>;
> +def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>;
> +def : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>;
> +def : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>;
> +def : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>;
> +def : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>;
> +def : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>;
> +def : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>;
> +def : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>;
> +
> +def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
>  
>  
>  //===----------------------------------------------------------------------===//
> Index: test/CodeGen/R600/atomic_cmp_swap_local.ll
> ===================================================================
> --- test/CodeGen/R600/atomic_cmp_swap_local.ll
> +++ test/CodeGen/R600/atomic_cmp_swap_local.ll
> @@ -1,4 +1,5 @@
>  ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
>  
>  ; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i32_offset:
>  ; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
> @@ -35,3 +36,17 @@
>    store i64 %result, i64 addrspace(1)* %out, align 8
>    ret void
>  }
> +
> +; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i32_bad_si_offset
> +; SI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
> +; CI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
> +; SI: S_ENDPGM
> +define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind {
> +  %sub = sub i32 %a, %b
> +  %add = add i32 %sub, 4
> +  %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
> +  %pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
> +  %result = extractvalue { i32, i1 } %pair, 0
> +  store i32 %result, i32 addrspace(1)* %out, align 4
> +  ret void
> +}
> Index: test/CodeGen/R600/local-atomics.ll
> ===================================================================
> --- test/CodeGen/R600/local-atomics.ll
> +++ test/CodeGen/R600/local-atomics.ll
> @@ -1,4 +1,5 @@
> -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
> +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
>  
>  ; FUNC-LABEL: @lds_atomic_xchg_ret_i32:
>  ; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
> @@ -47,6 +48,19 @@
>    ret void
>  }
>  
> +; FUNC-LABEL: @lds_atomic_add_ret_i32_bad_si_offset
> +; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
> +; CI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
> +; SI: S_ENDPGM
> +define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
> +  %sub = sub i32 %a, %b
> +  %add = add i32 %sub, 4
> +  %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
> +  %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
> +  store i32 %result, i32 addrspace(1)* %out, align 4
> +  ret void
> +}
> +
>  ; FUNC-LABEL: @lds_atomic_inc_ret_i32:
>  ; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
>  ; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
> @@ -70,6 +84,19 @@
>    ret void
>  }
>  
> +; FUNC-LABEL: @lds_atomic_inc_ret_i32_bad_si_offset:
> +; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
> +; CI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
> +; SI: S_ENDPGM
> +define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
> +  %sub = sub i32 %a, %b
> +  %add = add i32 %sub, 4
> +  %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
> +  %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
> +  store i32 %result, i32 addrspace(1)* %out, align 4
> +  ret void
> +}
> +
>  ; FUNC-LABEL: @lds_atomic_sub_ret_i32:
>  ; SI: DS_SUB_RTN_U32
>  ; SI: S_ENDPGM

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list