[PATCH] R600/SI: Add support for global atomic add

Matt Arsenault arsenm2 at gmail.com
Fri Sep 19 12:31:17 PDT 2014


On Sep 19, 2014, at 2:19 PM, Tom Stellard <thomas.stellard at amd.com> wrote:

> ---
> lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 21 +++++++++
> lib/Target/R600/AMDGPUInstructions.td  |  8 ++++
> lib/Target/R600/SIInstrInfo.td         | 81 +++++++++++++++++++++++++++++++++-
> lib/Target/R600/SIInstructions.td      |  4 +-
> test/CodeGen/R600/global_atomics.ll    | 39 ++++++++++++++++
> 5 files changed, 150 insertions(+), 3 deletions(-)
> create mode 100644 test/CodeGen/R600/global_atomics.ll
> 
> diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> index 64e10df..9afcb5d 100644
> --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> @@ -101,11 +101,16 @@ private:
>                    SDValue &TFE) const;
>   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
>                          SDValue &Offset) const;
> +  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
> +                         SDValue &VAddr, SDValue &Offset,
> +                         SDValue &SLC) const;
>   bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
>                           SDValue &SOffset, SDValue &ImmOffset) const;
>   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
>                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
>                          SDValue &TFE) const;
> +  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
> +                         SDValue &Offset, SDValue &GLC) const;
>   SDNode *SelectAddrSpaceCast(SDNode *N);
>   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
>   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
> @@ -909,6 +914,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
>   return false;
> }
> 
> +bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
> +                                           SDValue &VAddr, SDValue &Offset,
> +                                           SDValue &SLC) const {
> +  SLC = CurDAG->getTargetConstant(0, MVT::i1);
> +
> +  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, Offset);
> +}
> +
> static SDValue buildRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr,
>                          uint32_t RsrcDword1, uint64_t RsrcDword2And3) {
> 
> @@ -1019,6 +1032,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
>   return false;
> }
> 
> +bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
> +                                           SDValue &Soffset, SDValue &Offset,
> +                                           SDValue &GLC) const {
> +  SDValue SLC, TFE;
> +
> +  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
> +}
> +
> // FIXME: This is incorrect and only enough to be able to compile.
> SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
>   AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);
> diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
> index d152c88..a608627 100644
> --- a/lib/Target/R600/AMDGPUInstructions.td
> +++ b/lib/Target/R600/AMDGPUInstructions.td
> @@ -380,6 +380,14 @@ def mskor_flat : PatFrag<(ops node:$val, node:$ptr),
>   return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
> }]>;
> 
> +class global_binary_atomic_op<SDNode atomic_op> : PatFrag<
> +  (ops node:$ptr, node:$value),
> +  (atomic_op node:$ptr, node:$value),
> +  [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]
> +>;
> +
> +def atomic_add_global : global_binary_atomic_op<atomic_load_add>;
> +
> //===----------------------------------------------------------------------===//
> // Misc Pattern Fragments
> //===----------------------------------------------------------------------===//
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index bde90d8..abeaf67 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -196,8 +196,10 @@ def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
> 
> def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
> def MUBUFAddr64 : ComplexPattern<i64, 3, "SelectMUBUFAddr64">;
> +def MUBUFAddr64Atomic : ComplexPattern<i64, 4, "SelectMUBUFAddr64">;
> def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
> def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
> +def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
> 
> def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
> def VOP3Mods  : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
> @@ -929,9 +931,10 @@ class DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc, string noRetOp =
>   let mayLoad = 1;
> }
> 
> -class MUBUFAddr64Table <bit is_addr64> {
> +class MUBUFAddr64Table <bit is_addr64, string suffix = ""> {
> 
>   bit IsAddr64 = is_addr64;
> +  string OpName = NAME # suffix;
> }
> 
> class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
> @@ -947,6 +950,80 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU
>   let mayLoad = 0;
> }
> 
> +class MUBUFAtomicAddr64 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern>
> +    : MUBUF <op, outs, ins, asm, pattern> {
> +
> +  let offen = 0;
> +  let idxen = 0;
> +  let addr64 = 1;
> +  let tfe = 0;
> +  let lds = 0;
> +  let soffset = 128;
> +}
> +
> +class MUBUFAtomicOffset <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern>
> +    : MUBUF <op, outs, ins, asm, pattern> {
> +
> +  let offen = 0;
> +  let idxen = 0;
> +  let addr64 = 0;
> +  let tfe = 0;
> +  let lds = 0;
> +  let vaddr = 0;
> +}
> +
> +multiclass MUBUF_Atomic <bits<7> op, string name, RegisterClass rc,
> +                         ValueType vt, SDPatternOperator atomic> {
> +
> +  let mayStore = 1, mayLoad = 1 in {
> +
> +    // No return variants
> +    let glc = 0 in {
> +
> +      def _ADDR64 : MUBUFAtomicAddr64 <
> +        op, (outs),
> +        (ins rc:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
> +             mbuf_offset:$offset, slc:$slc),
> +        name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#"$slc", []
> +      >, MUBUFAddr64Table<1>, AtomicNoRet<NAME#"_ADDR64", 0>;
> +
> +      def _OFFSET : MUBUFAtomicOffset <
> +        op, (outs),
> +        (ins rc:$vdata, SReg_128:$srsrc, mbuf_offset:$offset,
> +             SSrc_32:$soffset, slc:$slc),
> +        name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", []
> +      >, MUBUFAddr64Table<0>, AtomicNoRet<NAME#"_OFFSET", 0>;
> +    } // glc = 0
> +
> +    // Variant that return values
> +    let glc = 1, Constraints = "$vdata = $vdata_in",
> +        DisableEncoding = "$vdata_in"  in {
> +
> +      def _RTN_ADDR64 : MUBUFAtomicAddr64 <
> +        op, (outs rc:$vdata),
> +        (ins rc:$vdata_in, SReg_128:$srsrc, VReg_64:$vaddr,
> +             mbuf_offset:$offset, slc:$slc),
> +        name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#" glc"#"$slc",
> +        [(set vt:$vdata,
> +         (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i16:$offset,
> +                                    i1:$slc), vt:$vdata_in))]
> +      >, MUBUFAddr64Table<1, "_RTN">, AtomicNoRet<NAME#"_ADDR64", 1>;
> +
> +      def _RTN_OFFSET : MUBUFAtomicOffset <
> +        op, (outs rc:$vdata),
> +        (ins rc:$vdata_in, SReg_128:$srsrc, mbuf_offset:$offset,
> +             SSrc_32:$soffset, slc:$slc),
> +        name#" $vdata, $srsrc, $soffset"#"$offset"#" glc $slc",
> +        [(set vt:$vdata,
> +         (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset,
> +                                    i1:$slc), vt:$vdata_in))]
> +      >, MUBUFAddr64Table<0, "_RTN">, AtomicNoRet<NAME#"_OFFSET", 1>;
> +
> +    } // glc = 1
> +
> +  } // mayStore = 1, mayLoad = 1
> +}
> +
> multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass,
>                               ValueType load_vt = i32,
>                               SDPatternOperator ld = null_frag> {
> @@ -1292,7 +1369,7 @@ def getMCOpcode : InstrMapping {
> 
> def getAddr64Inst : InstrMapping {
>   let FilterClass = "MUBUFAddr64Table";
> -  let RowFields = ["NAME"];
> +  let RowFields = ["OpName"];
>   let ColFields = ["IsAddr64"];
>   let KeyCol = ["0"];
>   let ValueCols = [["1"]];
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index c1b9f57..35cf9dd 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -896,7 +896,9 @@ defm BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
>> ;
> //def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
> //def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
> -//def BUFFER_ATOMIC_ADD : MUBUF_ <0x00000032, "BUFFER_ATOMIC_ADD", []>;
> +defm BUFFER_ATOMIC_ADD : MUBUF_Atomic <
> +  0x00000032, "BUFFER_ATOMIC_ADD", VReg_32, i32, atomic_add_global
> +>;
> //def BUFFER_ATOMIC_SUB : MUBUF_ <0x00000033, "BUFFER_ATOMIC_SUB", []>;
> //def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>;
> //def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>;
> diff --git a/test/CodeGen/R600/global_atomics.ll b/test/CodeGen/R600/global_atomics.ll
> new file mode 100644
> index 0000000..665913f
> --- /dev/null
> +++ b/test/CodeGen/R600/global_atomics.ll
> @@ -0,0 +1,39 @@
> +; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
> +
> +; FUNC-LABEL: @atomic_load_i32_offset
> +; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
> +define void @atomic_load_i32_offset(i32 addrspace(1)* %out, i32 %in) {
> +entry:
> +  %0  = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
> +  ret void
> +}
> +
> +; FUNC-LABEL: @atomic_load_i32_ret_offset
> +; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
> +; SI: BUFFER_STORE_DWORD [[RET]]
> +define void @atomic_load_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
> +entry:
> +  %0  = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
> +  store i32 %0, i32 addrspace(1)* %out2
> +  ret void
> +}
> +
> +; FUNC-LABEL: @atomic_load_i32_addr64
> +; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
> +define void @atomic_load_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
> +entry:
> +  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
> +  %0  = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
> +  ret void
> +}
> +
> +; FUNC-LABEL: @atomic_load_i32_ret_addr64
> +; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
> +; SI: BUFFER_STORE_DWORD [[RET]]
> +define void @atomic_load_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
> +entry:
> +  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
> +  %0  = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
> +  store i32 %0, i32 addrspace(1)* %out2
> +  ret void
> +}
> -- 
> 1.8.5.5
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits

LGTM



More information about the llvm-commits mailing list