[PATCH] R600/SI: Implement common local atomics

Tom Stellard tom at stellard.net
Wed Jun 11 07:44:52 PDT 2014


On Tue, Jun 10, 2014 at 09:37:08PM -0700, Matt Arsenault wrote:
> Hi,
> 
> These implement most of the common atomic operations for LDS
> 

> From 5068ba2aece9725897aa5872d51bd66064a2b582 Mon Sep 17 00:00:00 2001
> From: Matt Arsenault <Matthew.Arsenault at amd.com>
> Date: Tue, 27 May 2014 17:02:10 -0700
> Subject: [PATCH 1/8] R600/SI: Refactor local atomics.
>

LGTM.
 
> From e45f8957f49dad57e832b4af6250b50871cc280f Mon Sep 17 00:00:00 2001
> From: Matt Arsenault <Matthew.Arsenault at amd.com>
> Date: Tue, 27 May 2014 17:14:03 -0700
> Subject: [PATCH 2/8] R600/SI: Fix backwards names for local atomic
>  instructions.
> 

LGTM.

> From 044cab8bc42898df77034c4cd3b364eb9f23fa8e Mon Sep 17 00:00:00 2001
> From: Matt Arsenault <Matthew.Arsenault at amd.com>
> Date: Tue, 27 May 2014 17:49:07 -0700
> Subject: [PATCH 3/8] R600/SI: Add instruction definitions for more LDS ops
> 
> ---
>  lib/Target/R600/SIInstrInfo.td    | 62 +++++++++++++++++++++++++++++++++++++++
>  lib/Target/R600/SIInstructions.td | 42 ++++++++++++++++++++++++++
>  2 files changed, 104 insertions(+)
> 
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index 77ef190..61769f3 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -481,6 +481,7 @@ class DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A
>    let vdst = 0;
>  }
>  
> +// 1 address, 1 data.
>  class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
>    op,
>    (outs rc:$vdst),
> @@ -493,6 +494,67 @@ class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
>    let mayLoad = 1;
>  }
>  
> +// 1 address, 0 data.
> +class DS_1A0D_RET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
> +  op,
> +  (outs rc:$vdst),
> +  (ins i1imm:$gds, VReg_32:$addr, u16imm:$offset),
> +  asm#" $vdst, $addr, $offset, [M0]",
> +  []> {
> +  let data0 = 0;
> +  let data1 = 0;
> +  let mayStore = 1;
> +  let mayLoad = 1;
> +}
> +
> +// 1 address, 0 data.
> +class DS_1A0D_NORET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
> +  op,
> +  (outs ),
> +  (ins i1imm:$gds, VReg_32:$addr, u16imm:$offset),
> +  asm#" $addr, $offset, [M0]",
> +  []> {
> +  let data0 = 0;
> +  let data1 = 0;
> +  let mayStore = 1;
> +  let mayLoad = 1;
> +}
> +
> +// 1 address, 2 data.
> +class DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
> +  op,
> +  (outs rc:$vdst),
> +  (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, VReg_32:$data1, u16imm:$offset),
> +  asm#" $vdst, $addr, $data0, $data1, $offset, [M0]",
> +  []> {
> +  let mayStore = 1;
> +  let mayLoad = 1;
> +}
> +
> +// 1 address, 2 data.
> +class DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
> +  op,
> +  (outs),
> +  (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, VReg_32:$data1, u16imm:$offset),
> +  asm#" $addr, $data0, $data1, $offset, [M0]",
> +  []> {
> +  let mayStore = 1;
> +  let mayLoad = 1;
> +}
> +
> +// 1 address, 1 data.
> +class DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
> +  op,
> +  (outs),
> +  (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, u16imm:$offset),
> +  asm#" $addr, $data0, $offset, [M0]",
> +  []> {
> +
> +  let data1 = 0;
> +  let mayStore = 1;
> +  let mayLoad = 1;
> +}
> +
>  class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
>    op,
>    (outs),
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index dd1dca7..22c4ba6 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -713,8 +713,50 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
>  // DS Instructions
>  //===----------------------------------------------------------------------===//
>  
> +
> +def DS_ADD_U32 : DS_1A1D_NORET <0x0, "DS_ADD_U32", VReg_32>;
> +def DS_SUB_U32 : DS_1A1D_NORET <0x1, "DS_SUB_U32", VReg_32>;
> +def DS_RSUB_U32 : DS_1A1D_NORET <0x2, "DS_RSUB_U32", VReg_32>;
> +def DS_INC_U32 : DS_1A0D_NORET <0x3, "DS_INC_U32", VReg_32>;
> +def DS_DEC_U32 : DS_1A0D_NORET <0x4, "DS_DEC_U32", VReg_32>;
> +def DS_MIN_I32 : DS_1A1D_NORET <0x5, "DS_MIN_I32", VReg_32>;
> +def DS_MAX_I32 : DS_1A1D_NORET <0x6, "DS_MAX_I32", VReg_32>;
> +def DS_MIN_U32 : DS_1A1D_NORET <0x7, "DS_MIN_U32", VReg_32>;
> +def DS_MAX_U32 : DS_1A1D_NORET <0x8, "DS_MAX_U32", VReg_32>;
> +def DS_AND_B32 : DS_1A1D_NORET <0x9, "DS_AND_B32", VReg_32>;
> +def DS_OR_B32 : DS_1A1D_NORET <0xa, "DS_OR_B32", VReg_32>;
> +def DS_XOR_B32 : DS_1A1D_NORET <0xb, "DS_XOR_B32", VReg_32>;
> +def DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "DS_MSKOR_B32", VReg_32>;
> +def DS_CMPST_B32 : DS_1A2D_NORET <0x10, "DS_CMPST_B32", VReg_32>;
> +def DS_CMPST_F32 : DS_1A2D_NORET <0x11, "DS_CMPST_F32", VReg_32>;
> +def DS_MIN_F32 : DS_1A1D_NORET <0x12, "DS_MIN_F32", VReg_32>;
> +def DS_MAX_F32 : DS_1A1D_NORET <0x13, "DS_MAX_F32", VReg_32>;
> +
>  def DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "DS_ADD_RTN_U32", VReg_32>;
>  def DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "DS_SUB_RTN_U32", VReg_32>;
> +def DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "DS_RSUB_RTN_U32", VReg_32>;
> +def DS_INC_RTN_U32 : DS_1A0D_RET <0x23, "DS_INC_RTN_U32", VReg_32>;
> +def DS_DEC_RTN_U32 : DS_1A0D_RET <0x24, "DS_DEC_RTN_U32", VReg_32>;
> +def DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "DS_MIN_RTN_I32", VReg_32>;
> +def DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "DS_MAX_RTN_I32", VReg_32>;
> +def DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "DS_MIN_RTN_U32", VReg_32>;
> +def DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "DS_MAX_RTN_U32", VReg_32>;
> +def DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "DS_AND_RTN_B32", VReg_32>;
> +def DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "DS_OR_RTN_B32", VReg_32>;
> +def DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "DS_XOR_RTN_B32", VReg_32>;
> +def DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "DS_MSKOR_RTN_B32", VReg_32>;
> +def DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "DS_WRXCHG_RTN_B32", VReg_32>;
> +//def DS_WRXCHG2_RTN_B32 : DS_2A0D_RET <0x2e, "DS_WRXCHG2_RTN_B32", VReg_32>;
> +//def DS_WRXCHG2ST64_RTN_B32 : DS_2A0D_RET <0x2f, "DS_WRXCHG2_RTN_B32", VReg_32>;
> +def DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "DS_CMPST_RTN_B32", VReg_32>;
> +def DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "DS_CMPST_RTN_F32", VReg_32>;
> +def DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "DS_MIN_RTN_F32", VReg_32>;
> +def DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "DS_MAX_RTN_F32", VReg_32>;
> +
> +let Predicates = [isCI] in {

This should be:

let SubtargetPredicate = isCI in {

I added finer grained predicates a few weeks a go.  See the
PredicateControl class in AMDGPU.td

Otherwise, LTGM.

> +def DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "DS_WRAP_RTN_F32", VReg_32>;
> +} // End isCI
> +
>  def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
>  def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>;
>  def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>;
> -- 
> 1.8.4.3
> 

> From cfec1e278661fc23acd1c7eedc671a3b2357c897 Mon Sep 17 00:00:00 2001
> From: Matt Arsenault <Matthew.Arsenault at amd.com>
> Date: Tue, 27 May 2014 18:35:33 -0700
> Subject: [PATCH 4/8] R600/SI: Add other LDS atomic operations
> 

LGTM.

> From 0078d1dd28e6388f34af5dd691f4eb5183744e1f Mon Sep 17 00:00:00 2001
> From: Matt Arsenault <Matthew.Arsenault at amd.com>
> Date: Wed, 28 May 2014 00:38:43 -0700
> Subject: [PATCH 5/8] R600/SI: Use LDS atomic inc / dec
> 

LGTM.

> From b2011f34757313cf727662486d79d305702dc1d9 Mon Sep 17 00:00:00 2001
> From: Matt Arsenault <Matthew.Arsenault at amd.com>
> Date: Wed, 28 May 2014 02:00:34 -0700
> Subject: [PATCH 6/8] R600/SI: Add 32-bit LDS atomic cmpxchg
> 

LGTM.

> From 81a19c019f5093ccb888212e7341369701082385 Mon Sep 17 00:00:00 2001
> From: Matt Arsenault <Matthew.Arsenault at amd.com>
> Date: Wed, 28 May 2014 15:29:20 -0700
> Subject: [PATCH 7/8] R600/SI: Add instruction definitions for 64-bit LDS
>  atomics
> 
> ---
>  lib/Target/R600/SIInstructions.td | 47 +++++++++++++++++++++++++++++++++++++++
>  1 file changed, 47 insertions(+)
> 
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 2af1fe6..ade189a 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -757,6 +757,53 @@ let Predicates = [isCI] in {
>  def DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "DS_WRAP_RTN_F32", VReg_32>;
>  } // End isCI
>  
> +
> +def DS_ADD_U64 : DS_1A1D_NORET <0x40, "DS_ADD_U64", VReg_32>;
> +def DS_SUB_U64 : DS_1A1D_NORET <0x41, "DS_SUB_U64", VReg_32>;
> +def DS_RSUB_U64 : DS_1A1D_NORET <0x42, "DS_RSUB_U64", VReg_32>;
> +def DS_INC_U64 : DS_1A0D_NORET <0x43, "DS_INC_U64", VReg_32>;
> +def DS_DEC_U64 : DS_1A0D_NORET <0x44, "DS_DEC_U64", VReg_32>;
> +def DS_MIN_I64 : DS_1A1D_NORET <0x45, "DS_MIN_I64", VReg_64>;
> +def DS_MAX_I64 : DS_1A1D_NORET <0x46, "DS_MAX_I64", VReg_64>;
> +def DS_MIN_U64 : DS_1A1D_NORET <0x47, "DS_MIN_U64", VReg_64>;
> +def DS_MAX_U64 : DS_1A1D_NORET <0x48, "DS_MAX_U64", VReg_64>;
> +def DS_AND_B64 : DS_1A1D_NORET <0x49, "DS_AND_B64", VReg_64>;
> +def DS_OR_B64 : DS_1A1D_NORET <0x4a, "DS_OR_B64", VReg_64>;
> +def DS_XOR_B64 : DS_1A1D_NORET <0x4b, "DS_XOR_B64", VReg_64>;
> +def DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "DS_MSKOR_B64", VReg_64>;
> +def DS_CMPST_B64 : DS_1A2D_NORET <0x50, "DS_CMPST_B64", VReg_64>;
> +def DS_CMPST_F64 : DS_1A2D_NORET <0x51, "DS_CMPST_F64", VReg_64>;
> +def DS_MIN_F64 : DS_1A1D_NORET <0x52, "DS_MIN_F64", VReg_64>;
> +def DS_MAX_F64 : DS_1A1D_NORET <0x53, "DS_MAX_F64", VReg_64>;
> +
> +def DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "DS_ADD_RTN_U64", VReg_64>;
> +def DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "DS_SUB_RTN_U64", VReg_64>;
> +def DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "DS_RSUB_RTN_U64", VReg_64>;
> +def DS_INC_RTN_U64 : DS_1A0D_RET <0x63, "DS_INC_RTN_U64", VReg_64>;
> +def DS_DEC_RTN_U64 : DS_1A0D_RET <0x64, "DS_DEC_RTN_U64", VReg_64>;
> +def DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "DS_MIN_RTN_I64", VReg_64>;
> +def DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "DS_MAX_RTN_I64", VReg_64>;
> +def DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "DS_MIN_RTN_U64", VReg_64>;
> +def DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "DS_MAX_RTN_U64", VReg_64>;
> +def DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "DS_AND_RTN_B64", VReg_64>;
> +def DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "DS_OR_RTN_B64", VReg_64>;
> +def DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "DS_XOR_RTN_B64", VReg_64>;
> +def DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "DS_MSKOR_RTN_B64", VReg_64>;
> +def DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "DS_WRXCHG_RTN_B64", VReg_64>;
> +//def DS_WRXCHG2_RTN_B64 : DS_2A0D_RET <0x6e, "DS_WRXCHG2_RTN_B64", VReg_64>;
> +//def DS_WRXCHG2ST64_RTN_B64 : DS_2A0D_RET <0x6f, "DS_WRXCHG2_RTN_B64", VReg_64>;
> +def DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "DS_CMPST_RTN_B64", VReg_64>;
> +def DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "DS_CMPST_RTN_F64", VReg_64>;
> +def DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "DS_MIN_F64", VReg_64>;
> +def DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "DS_MAX_F64", VReg_64>;
> +
> +//let Predicates = [isCI] in {

This should be let SubtargetPredicate = isCI too.  Otherwise, LGTM.

> +// DS_CONDXCHG32_RTN_B64
> +// DS_CONDXCHG32_RTN_B128
> +//} // End isCI
> +
> +// TODO: _SRC2_* forms
> +
>  def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
>  def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>;
>  def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>;
> -- 
> 1.8.4.3
> 

> From 89616a1c8d3c5eebf5d7d4f5f6dd93efa3794746 Mon Sep 17 00:00:00 2001
> From: Matt Arsenault <Matthew.Arsenault at amd.com>
> Date: Wed, 28 May 2014 16:38:54 -0700
> Subject: [PATCH 8/8] R600/SI: Add common 64-bit LDS atomics
> 

LGTM.



More information about the llvm-commits mailing list