[PATCH] R600/SI: Use same complex patterns for DS atomics
Tom Stellard
tom at stellard.net
Fri Sep 5 06:54:16 PDT 2014
On Fri, Sep 05, 2014 at 06:20:06AM +0000, Matt Arsenault wrote:
> This fixes hitting the same negative base offset problem
> that was already fixed for regular loads and stores.
LGTM.
>
> http://reviews.llvm.org/D5203
>
> Files:
> lib/Target/R600/SIInstructions.td
> test/CodeGen/R600/atomic_cmp_swap_local.ll
> test/CodeGen/R600/local-atomics.ll
> Index: lib/Target/R600/SIInstructions.td
> ===================================================================
> --- lib/Target/R600/SIInstructions.td
> +++ lib/Target/R600/SIInstructions.td
> @@ -2572,17 +2572,10 @@
> (EXTRACT_SUBREG $value, sub1), $offset0, $offset1)
> >;
>
> -multiclass DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> {
> - def : Pat <
> - (frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$value),
> - (inst (i1 0), $ptr, $value, (as_i16imm $offset))
> - >;
> -
> - def : Pat <
> - (frag i32:$ptr, vt:$val),
> - (inst 0, $ptr, $val, 0)
> - >;
> -}
> +class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
> + (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
> + (inst (i1 0), $ptr, $value, (as_i16imm $offset))
> +>;
>
> // Special case of DSAtomicRetPat for add / sub 1 -> inc / dec
> //
> @@ -2594,69 +2587,56 @@
> // We also load this -1 with s_mov_b32 / s_mov_b64 even though this
> // needs to be a VGPR. The SGPR copy pass will fix this, and it's
> // easier since there is no v_mov_b64.
> -multiclass DSAtomicIncRetPat<DS inst, ValueType vt,
> - Instruction LoadImm, PatFrag frag> {
> - def : Pat <
> - (frag (add i32:$ptr, (i32 IMM16bit:$offset)), (vt 1)),
> - (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset))
> - >;
> -
> - def : Pat <
> - (frag i32:$ptr, (vt 1)),
> - (inst 0, $ptr, (LoadImm (vt -1)), 0)
> - >;
> -}
> +class DSAtomicIncRetPat<DS inst, ValueType vt,
> + Instruction LoadImm, PatFrag frag> : Pat <
> + (frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
> + (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset))
> +>;
>
> -multiclass DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> {
> - def : Pat <
> - (frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$cmp, vt:$swap),
> - (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset))
> - >;
>
> - def : Pat <
> - (frag i32:$ptr, vt:$cmp, vt:$swap),
> - (inst 0, $ptr, $cmp, $swap, 0)
> - >;
> -}
> +class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
> + (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
> + (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset))
> +>;
>
>
> // 32-bit atomics.
> -defm : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
> - S_MOV_B32, atomic_load_add_local>;
> -defm : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
> - S_MOV_B32, atomic_load_sub_local>;
> -
> -defm : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>;
> -defm : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>;
> -defm : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>;
> -defm : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>;
> -defm : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>;
> -defm : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>;
> -defm : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>;
> -defm : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>;
> -defm : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>;
> -defm : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>;
> -
> -defm : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>;
> +def : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
> + S_MOV_B32, atomic_load_add_local>;
> +def : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
> + S_MOV_B32, atomic_load_sub_local>;
> +
> +def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>;
> +def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>;
> +def : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>;
> +def : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>;
> +def : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>;
> +def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>;
> +def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>;
> +def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>;
> +def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>;
> +def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>;
> +
> +def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>;
>
> // 64-bit atomics.
> -defm : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
> - S_MOV_B64, atomic_load_add_local>;
> -defm : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
> - S_MOV_B64, atomic_load_sub_local>;
> -
> -defm : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>;
> -defm : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>;
> -defm : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>;
> -defm : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>;
> -defm : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>;
> -defm : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>;
> -defm : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>;
> -defm : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>;
> -defm : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>;
> -defm : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>;
> -
> -defm : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
> +def : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
> + S_MOV_B64, atomic_load_add_local>;
> +def : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
> + S_MOV_B64, atomic_load_sub_local>;
> +
> +def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>;
> +def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>;
> +def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>;
> +def : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>;
> +def : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>;
> +def : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>;
> +def : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>;
> +def : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>;
> +def : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>;
> +def : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>;
> +
> +def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
>
>
> //===----------------------------------------------------------------------===//
> Index: test/CodeGen/R600/atomic_cmp_swap_local.ll
> ===================================================================
> --- test/CodeGen/R600/atomic_cmp_swap_local.ll
> +++ test/CodeGen/R600/atomic_cmp_swap_local.ll
> @@ -1,4 +1,5 @@
> ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
>
> ; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i32_offset:
> ; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
> @@ -35,3 +36,17 @@
> store i64 %result, i64 addrspace(1)* %out, align 8
> ret void
> }
> +
> +; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i32_bad_si_offset
> +; SI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
> +; CI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
> +; SI: S_ENDPGM
> +define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind {
> + %sub = sub i32 %a, %b
> + %add = add i32 %sub, 4
> + %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
> + %pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
> + %result = extractvalue { i32, i1 } %pair, 0
> + store i32 %result, i32 addrspace(1)* %out, align 4
> + ret void
> +}
> Index: test/CodeGen/R600/local-atomics.ll
> ===================================================================
> --- test/CodeGen/R600/local-atomics.ll
> +++ test/CodeGen/R600/local-atomics.ll
> @@ -1,4 +1,5 @@
> -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
> +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
>
> ; FUNC-LABEL: @lds_atomic_xchg_ret_i32:
> ; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
> @@ -47,6 +48,19 @@
> ret void
> }
>
> +; FUNC-LABEL: @lds_atomic_add_ret_i32_bad_si_offset
> +; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
> +; CI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
> +; SI: S_ENDPGM
> +define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
> + %sub = sub i32 %a, %b
> + %add = add i32 %sub, 4
> + %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
> + %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
> + store i32 %result, i32 addrspace(1)* %out, align 4
> + ret void
> +}
> +
> ; FUNC-LABEL: @lds_atomic_inc_ret_i32:
> ; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
> ; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
> @@ -70,6 +84,19 @@
> ret void
> }
>
> +; FUNC-LABEL: @lds_atomic_inc_ret_i32_bad_si_offset:
> +; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
> +; CI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
> +; SI: S_ENDPGM
> +define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
> + %sub = sub i32 %a, %b
> + %add = add i32 %sub, 4
> + %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
> + %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
> + store i32 %result, i32 addrspace(1)* %out, align 4
> + ret void
> +}
> +
> ; FUNC-LABEL: @lds_atomic_sub_ret_i32:
> ; SI: DS_SUB_RTN_U32
> ; SI: S_ENDPGM
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list