[PATCH] R600/SI: Implement common local atomics

Matt Arsenault arsenm2 at gmail.com
Wed Jun 11 11:17:05 PDT 2014


On Jun 11, 2014, at 7:44 AM, Tom Stellard <tom at stellard.net> wrote:

> On Tue, Jun 10, 2014 at 09:37:08PM -0700, Matt Arsenault wrote:
>> Hi,
>> 
>> These implement most of the common atomic operations for LDS
>> 
> 
>> From 5068ba2aece9725897aa5872d51bd66064a2b582 Mon Sep 17 00:00:00 2001
>> From: Matt Arsenault <Matthew.Arsenault at amd.com>
>> Date: Tue, 27 May 2014 17:02:10 -0700
>> Subject: [PATCH 1/8] R600/SI: Refactor local atomics.
>> 
> 
> LGTM.
> 
>> From e45f8957f49dad57e832b4af6250b50871cc280f Mon Sep 17 00:00:00 2001
>> From: Matt Arsenault <Matthew.Arsenault at amd.com>
>> Date: Tue, 27 May 2014 17:14:03 -0700
>> Subject: [PATCH 2/8] R600/SI: Fix backwards names for local atomic
>> instructions.
>> 
> 
> LGTM.
> 
>> From 044cab8bc42898df77034c4cd3b364eb9f23fa8e Mon Sep 17 00:00:00 2001
>> From: Matt Arsenault <Matthew.Arsenault at amd.com>
>> Date: Tue, 27 May 2014 17:49:07 -0700
>> Subject: [PATCH 3/8] R600/SI: Add instruction definitions for more LDS ops
>> 
>> ---
>> lib/Target/R600/SIInstrInfo.td    | 62 +++++++++++++++++++++++++++++++++++++++
>> lib/Target/R600/SIInstructions.td | 42 ++++++++++++++++++++++++++
>> 2 files changed, 104 insertions(+)
>> 
>> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
>> index 77ef190..61769f3 100644
>> --- a/lib/Target/R600/SIInstrInfo.td
>> +++ b/lib/Target/R600/SIInstrInfo.td
>> @@ -481,6 +481,7 @@ class DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A
>>   let vdst = 0;
>> }
>> 
>> +// 1 address, 1 data.
>> class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
>>   op,
>>   (outs rc:$vdst),
>> @@ -493,6 +494,67 @@ class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
>>   let mayLoad = 1;
>> }
>> 
>> +// 1 address, 0 data.
>> +class DS_1A0D_RET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
>> +  op,
>> +  (outs rc:$vdst),
>> +  (ins i1imm:$gds, VReg_32:$addr, u16imm:$offset),
>> +  asm#" $vdst, $addr, $offset, [M0]",
>> +  []> {
>> +  let data0 = 0;
>> +  let data1 = 0;
>> +  let mayStore = 1;
>> +  let mayLoad = 1;
>> +}
>> +
>> +// 1 address, 0 data.
>> +class DS_1A0D_NORET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
>> +  op,
>> +  (outs ),
>> +  (ins i1imm:$gds, VReg_32:$addr, u16imm:$offset),
>> +  asm#" $addr, $offset, [M0]",
>> +  []> {
>> +  let data0 = 0;
>> +  let data1 = 0;
>> +  let mayStore = 1;
>> +  let mayLoad = 1;
>> +}
>> +
>> +// 1 address, 2 data.
>> +class DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
>> +  op,
>> +  (outs rc:$vdst),
>> +  (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, VReg_32:$data1, u16imm:$offset),
>> +  asm#" $vdst, $addr, $data0, $data1, $offset, [M0]",
>> +  []> {
>> +  let mayStore = 1;
>> +  let mayLoad = 1;
>> +}
>> +
>> +// 1 address, 2 data.
>> +class DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
>> +  op,
>> +  (outs),
>> +  (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, VReg_32:$data1, u16imm:$offset),
>> +  asm#" $addr, $data0, $data1, $offset, [M0]",
>> +  []> {
>> +  let mayStore = 1;
>> +  let mayLoad = 1;
>> +}
>> +
>> +// 1 address, 1 data.
>> +class DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
>> +  op,
>> +  (outs),
>> +  (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, u16imm:$offset),
>> +  asm#" $addr, $data0, $offset, [M0]",
>> +  []> {
>> +
>> +  let data1 = 0;
>> +  let mayStore = 1;
>> +  let mayLoad = 1;
>> +}
>> +
>> class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
>>   op,
>>   (outs),
>> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
>> index dd1dca7..22c4ba6 100644
>> --- a/lib/Target/R600/SIInstructions.td
>> +++ b/lib/Target/R600/SIInstructions.td
>> @@ -713,8 +713,50 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
>> // DS Instructions
>> //===----------------------------------------------------------------------===//
>> 
>> +
>> +def DS_ADD_U32 : DS_1A1D_NORET <0x0, "DS_ADD_U32", VReg_32>;
>> +def DS_SUB_U32 : DS_1A1D_NORET <0x1, "DS_SUB_U32", VReg_32>;
>> +def DS_RSUB_U32 : DS_1A1D_NORET <0x2, "DS_RSUB_U32", VReg_32>;
>> +def DS_INC_U32 : DS_1A0D_NORET <0x3, "DS_INC_U32", VReg_32>;
>> +def DS_DEC_U32 : DS_1A0D_NORET <0x4, "DS_DEC_U32", VReg_32>;
>> +def DS_MIN_I32 : DS_1A1D_NORET <0x5, "DS_MIN_I32", VReg_32>;
>> +def DS_MAX_I32 : DS_1A1D_NORET <0x6, "DS_MAX_I32", VReg_32>;
>> +def DS_MIN_U32 : DS_1A1D_NORET <0x7, "DS_MIN_U32", VReg_32>;
>> +def DS_MAX_U32 : DS_1A1D_NORET <0x8, "DS_MAX_U32", VReg_32>;
>> +def DS_AND_B32 : DS_1A1D_NORET <0x9, "DS_AND_B32", VReg_32>;
>> +def DS_OR_B32 : DS_1A1D_NORET <0xa, "DS_OR_B32", VReg_32>;
>> +def DS_XOR_B32 : DS_1A1D_NORET <0xb, "DS_XOR_B32", VReg_32>;
>> +def DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "DS_MSKOR_B32", VReg_32>;
>> +def DS_CMPST_B32 : DS_1A2D_NORET <0x10, "DS_CMPST_B32", VReg_32>;
>> +def DS_CMPST_F32 : DS_1A2D_NORET <0x11, "DS_CMPST_F32", VReg_32>;
>> +def DS_MIN_F32 : DS_1A1D_NORET <0x12, "DS_MIN_F32", VReg_32>;
>> +def DS_MAX_F32 : DS_1A1D_NORET <0x13, "DS_MAX_F32", VReg_32>;
>> +
>> def DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "DS_ADD_RTN_U32", VReg_32>;
>> def DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "DS_SUB_RTN_U32", VReg_32>;
>> +def DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "DS_RSUB_RTN_U32", VReg_32>;
>> +def DS_INC_RTN_U32 : DS_1A0D_RET <0x23, "DS_INC_RTN_U32", VReg_32>;
>> +def DS_DEC_RTN_U32 : DS_1A0D_RET <0x24, "DS_DEC_RTN_U32", VReg_32>;
>> +def DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "DS_MIN_RTN_I32", VReg_32>;
>> +def DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "DS_MAX_RTN_I32", VReg_32>;
>> +def DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "DS_MIN_RTN_U32", VReg_32>;
>> +def DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "DS_MAX_RTN_U32", VReg_32>;
>> +def DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "DS_AND_RTN_B32", VReg_32>;
>> +def DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "DS_OR_RTN_B32", VReg_32>;
>> +def DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "DS_XOR_RTN_B32", VReg_32>;
>> +def DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "DS_MSKOR_RTN_B32", VReg_32>;
>> +def DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "DS_WRXCHG_RTN_B32", VReg_32>;
>> +//def DS_WRXCHG2_RTN_B32 : DS_2A0D_RET <0x2e, "DS_WRXCHG2_RTN_B32", VReg_32>;
>> +//def DS_WRXCHG2ST64_RTN_B32 : DS_2A0D_RET <0x2f, "DS_WRXCHG2_RTN_B32", VReg_32>;
>> +def DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "DS_CMPST_RTN_B32", VReg_32>;
>> +def DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "DS_CMPST_RTN_F32", VReg_32>;
>> +def DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "DS_MIN_RTN_F32", VReg_32>;
>> +def DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "DS_MAX_RTN_F32", VReg_32>;
>> +
>> +let Predicates = [isCI] in {
> 
> This should be:
> 
> let SubtargetPredicate = isCI in {
> 
> I added finer grained predicates a few weeks a go.  See the
> PredicateControl class in AMDGPU.td
> 
> Otherwise, LTGM.
> 
>> +def DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "DS_WRAP_RTN_F32", VReg_32>;
>> +} // End isCI
>> +
>> def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
>> def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>;
>> def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>;
>> -- 
>> 1.8.4.3
>> 
> 
>> From cfec1e278661fc23acd1c7eedc671a3b2357c897 Mon Sep 17 00:00:00 2001
>> From: Matt Arsenault <Matthew.Arsenault at amd.com>
>> Date: Tue, 27 May 2014 18:35:33 -0700
>> Subject: [PATCH 4/8] R600/SI: Add other LDS atomic operations
>> 
> 
> LGTM.
> 
>> From 0078d1dd28e6388f34af5dd691f4eb5183744e1f Mon Sep 17 00:00:00 2001
>> From: Matt Arsenault <Matthew.Arsenault at amd.com>
>> Date: Wed, 28 May 2014 00:38:43 -0700
>> Subject: [PATCH 5/8] R600/SI: Use LDS atomic inc / dec
>> 
> 
> LGTM.
> 
>> From b2011f34757313cf727662486d79d305702dc1d9 Mon Sep 17 00:00:00 2001
>> From: Matt Arsenault <Matthew.Arsenault at amd.com>
>> Date: Wed, 28 May 2014 02:00:34 -0700
>> Subject: [PATCH 6/8] R600/SI: Add 32-bit LDS atomic cmpxchg
>> 
> 
> LGTM.
> 
>> From 81a19c019f5093ccb888212e7341369701082385 Mon Sep 17 00:00:00 2001
>> From: Matt Arsenault <Matthew.Arsenault at amd.com>
>> Date: Wed, 28 May 2014 15:29:20 -0700
>> Subject: [PATCH 7/8] R600/SI: Add instruction definitions for 64-bit LDS
>> atomics
>> 
>> ---
>> lib/Target/R600/SIInstructions.td | 47 +++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 47 insertions(+)
>> 
>> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
>> index 2af1fe6..ade189a 100644
>> --- a/lib/Target/R600/SIInstructions.td
>> +++ b/lib/Target/R600/SIInstructions.td
>> @@ -757,6 +757,53 @@ let Predicates = [isCI] in {
>> def DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "DS_WRAP_RTN_F32", VReg_32>;
>> } // End isCI
>> 
>> +
>> +def DS_ADD_U64 : DS_1A1D_NORET <0x40, "DS_ADD_U64", VReg_32>;
>> +def DS_SUB_U64 : DS_1A1D_NORET <0x41, "DS_SUB_U64", VReg_32>;
>> +def DS_RSUB_U64 : DS_1A1D_NORET <0x42, "DS_RSUB_U64", VReg_32>;
>> +def DS_INC_U64 : DS_1A0D_NORET <0x43, "DS_INC_U64", VReg_32>;
>> +def DS_DEC_U64 : DS_1A0D_NORET <0x44, "DS_DEC_U64", VReg_32>;
>> +def DS_MIN_I64 : DS_1A1D_NORET <0x45, "DS_MIN_I64", VReg_64>;
>> +def DS_MAX_I64 : DS_1A1D_NORET <0x46, "DS_MAX_I64", VReg_64>;
>> +def DS_MIN_U64 : DS_1A1D_NORET <0x47, "DS_MIN_U64", VReg_64>;
>> +def DS_MAX_U64 : DS_1A1D_NORET <0x48, "DS_MAX_U64", VReg_64>;
>> +def DS_AND_B64 : DS_1A1D_NORET <0x49, "DS_AND_B64", VReg_64>;
>> +def DS_OR_B64 : DS_1A1D_NORET <0x4a, "DS_OR_B64", VReg_64>;
>> +def DS_XOR_B64 : DS_1A1D_NORET <0x4b, "DS_XOR_B64", VReg_64>;
>> +def DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "DS_MSKOR_B64", VReg_64>;
>> +def DS_CMPST_B64 : DS_1A2D_NORET <0x50, "DS_CMPST_B64", VReg_64>;
>> +def DS_CMPST_F64 : DS_1A2D_NORET <0x51, "DS_CMPST_F64", VReg_64>;
>> +def DS_MIN_F64 : DS_1A1D_NORET <0x52, "DS_MIN_F64", VReg_64>;
>> +def DS_MAX_F64 : DS_1A1D_NORET <0x53, "DS_MAX_F64", VReg_64>;
>> +
>> +def DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "DS_ADD_RTN_U64", VReg_64>;
>> +def DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "DS_SUB_RTN_U64", VReg_64>;
>> +def DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "DS_RSUB_RTN_U64", VReg_64>;
>> +def DS_INC_RTN_U64 : DS_1A0D_RET <0x63, "DS_INC_RTN_U64", VReg_64>;
>> +def DS_DEC_RTN_U64 : DS_1A0D_RET <0x64, "DS_DEC_RTN_U64", VReg_64>;
>> +def DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "DS_MIN_RTN_I64", VReg_64>;
>> +def DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "DS_MAX_RTN_I64", VReg_64>;
>> +def DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "DS_MIN_RTN_U64", VReg_64>;
>> +def DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "DS_MAX_RTN_U64", VReg_64>;
>> +def DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "DS_AND_RTN_B64", VReg_64>;
>> +def DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "DS_OR_RTN_B64", VReg_64>;
>> +def DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "DS_XOR_RTN_B64", VReg_64>;
>> +def DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "DS_MSKOR_RTN_B64", VReg_64>;
>> +def DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "DS_WRXCHG_RTN_B64", VReg_64>;
>> +//def DS_WRXCHG2_RTN_B64 : DS_2A0D_RET <0x6e, "DS_WRXCHG2_RTN_B64", VReg_64>;
>> +//def DS_WRXCHG2ST64_RTN_B64 : DS_2A0D_RET <0x6f, "DS_WRXCHG2_RTN_B64", VReg_64>;
>> +def DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "DS_CMPST_RTN_B64", VReg_64>;
>> +def DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "DS_CMPST_RTN_F64", VReg_64>;
>> +def DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "DS_MIN_F64", VReg_64>;
>> +def DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "DS_MAX_F64", VReg_64>;
>> +
>> +//let Predicates = [isCI] in {
> 
> This should be let SubtargetPredicate = isCI too.  Otherwise, LGTM.
> 
>> +// DS_CONDXCHG32_RTN_B64
>> +// DS_CONDXCHG32_RTN_B128
>> +//} // End isCI
>> +
>> +// TODO: _SRC2_* forms
>> +
>> def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
>> def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>;
>> def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>;
>> -- 
>> 1.8.4.3
>> 
> 
>> From 89616a1c8d3c5eebf5d7d4f5f6dd93efa3794746 Mon Sep 17 00:00:00 2001
>> From: Matt Arsenault <Matthew.Arsenault at amd.com>
>> Date: Wed, 28 May 2014 16:38:54 -0700
>> Subject: [PATCH 8/8] R600/SI: Add common 64-bit LDS atomics
>> 
> 
> LGTM.


r210673-210680



More information about the llvm-commits mailing list