[PATCH] R600/SI: Add support for global atomic add
Matt Arsenault
arsenm2 at gmail.com
Fri Sep 19 12:31:17 PDT 2014
On Sep 19, 2014, at 2:19 PM, Tom Stellard <thomas.stellard at amd.com> wrote:
> ---
> lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 21 +++++++++
> lib/Target/R600/AMDGPUInstructions.td | 8 ++++
> lib/Target/R600/SIInstrInfo.td | 81 +++++++++++++++++++++++++++++++++-
> lib/Target/R600/SIInstructions.td | 4 +-
> test/CodeGen/R600/global_atomics.ll | 39 ++++++++++++++++
> 5 files changed, 150 insertions(+), 3 deletions(-)
> create mode 100644 test/CodeGen/R600/global_atomics.ll
>
> diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> index 64e10df..9afcb5d 100644
> --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> @@ -101,11 +101,16 @@ private:
> SDValue &TFE) const;
> bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
> SDValue &Offset) const;
> + bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
> + SDValue &VAddr, SDValue &Offset,
> + SDValue &SLC) const;
> bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
> SDValue &SOffset, SDValue &ImmOffset) const;
> bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
> SDValue &Offset, SDValue &GLC, SDValue &SLC,
> SDValue &TFE) const;
> + bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
> + SDValue &Offset, SDValue &GLC) const;
> SDNode *SelectAddrSpaceCast(SDNode *N);
> bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
> bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
> @@ -909,6 +914,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
> return false;
> }
>
> +bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
> + SDValue &VAddr, SDValue &Offset,
> + SDValue &SLC) const {
> + SLC = CurDAG->getTargetConstant(0, MVT::i1);
> +
> + return SelectMUBUFAddr64(Addr, SRsrc, VAddr, Offset);
> +}
> +
> static SDValue buildRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr,
> uint32_t RsrcDword1, uint64_t RsrcDword2And3) {
>
> @@ -1019,6 +1032,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
> return false;
> }
>
> +bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
> + SDValue &Soffset, SDValue &Offset,
> + SDValue &GLC) const {
> + SDValue SLC, TFE;
> +
> + return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
> +}
> +
> // FIXME: This is incorrect and only enough to be able to compile.
> SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
> AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);
> diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
> index d152c88..a608627 100644
> --- a/lib/Target/R600/AMDGPUInstructions.td
> +++ b/lib/Target/R600/AMDGPUInstructions.td
> @@ -380,6 +380,14 @@ def mskor_flat : PatFrag<(ops node:$val, node:$ptr),
> return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
> }]>;
>
> +class global_binary_atomic_op<SDNode atomic_op> : PatFrag<
> + (ops node:$ptr, node:$value),
> + (atomic_op node:$ptr, node:$value),
> + [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]
> +>;
> +
> +def atomic_add_global : global_binary_atomic_op<atomic_load_add>;
> +
> //===----------------------------------------------------------------------===//
> // Misc Pattern Fragments
> //===----------------------------------------------------------------------===//
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index bde90d8..abeaf67 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -196,8 +196,10 @@ def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
>
> def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
> def MUBUFAddr64 : ComplexPattern<i64, 3, "SelectMUBUFAddr64">;
> +def MUBUFAddr64Atomic : ComplexPattern<i64, 4, "SelectMUBUFAddr64">;
> def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
> def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
> +def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
>
> def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
> def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
> @@ -929,9 +931,10 @@ class DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc, string noRetOp =
> let mayLoad = 1;
> }
>
> -class MUBUFAddr64Table <bit is_addr64> {
> +class MUBUFAddr64Table <bit is_addr64, string suffix = ""> {
>
> bit IsAddr64 = is_addr64;
> + string OpName = NAME # suffix;
> }
>
> class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
> @@ -947,6 +950,80 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU
> let mayLoad = 0;
> }
>
> +class MUBUFAtomicAddr64 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern>
> + : MUBUF <op, outs, ins, asm, pattern> {
> +
> + let offen = 0;
> + let idxen = 0;
> + let addr64 = 1;
> + let tfe = 0;
> + let lds = 0;
> + let soffset = 128;
> +}
> +
> +class MUBUFAtomicOffset <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern>
> + : MUBUF <op, outs, ins, asm, pattern> {
> +
> + let offen = 0;
> + let idxen = 0;
> + let addr64 = 0;
> + let tfe = 0;
> + let lds = 0;
> + let vaddr = 0;
> +}
> +
> +multiclass MUBUF_Atomic <bits<7> op, string name, RegisterClass rc,
> + ValueType vt, SDPatternOperator atomic> {
> +
> + let mayStore = 1, mayLoad = 1 in {
> +
> + // No return variants
> + let glc = 0 in {
> +
> + def _ADDR64 : MUBUFAtomicAddr64 <
> + op, (outs),
> + (ins rc:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
> + mbuf_offset:$offset, slc:$slc),
> + name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#"$slc", []
> + >, MUBUFAddr64Table<1>, AtomicNoRet<NAME#"_ADDR64", 0>;
> +
> + def _OFFSET : MUBUFAtomicOffset <
> + op, (outs),
> + (ins rc:$vdata, SReg_128:$srsrc, mbuf_offset:$offset,
> + SSrc_32:$soffset, slc:$slc),
> + name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", []
> + >, MUBUFAddr64Table<0>, AtomicNoRet<NAME#"_OFFSET", 0>;
> + } // glc = 0
> +
> + // Variant that return values
> + let glc = 1, Constraints = "$vdata = $vdata_in",
> + DisableEncoding = "$vdata_in" in {
> +
> + def _RTN_ADDR64 : MUBUFAtomicAddr64 <
> + op, (outs rc:$vdata),
> + (ins rc:$vdata_in, SReg_128:$srsrc, VReg_64:$vaddr,
> + mbuf_offset:$offset, slc:$slc),
> + name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#" glc"#"$slc",
> + [(set vt:$vdata,
> + (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i16:$offset,
> + i1:$slc), vt:$vdata_in))]
> + >, MUBUFAddr64Table<1, "_RTN">, AtomicNoRet<NAME#"_ADDR64", 1>;
> +
> + def _RTN_OFFSET : MUBUFAtomicOffset <
> + op, (outs rc:$vdata),
> + (ins rc:$vdata_in, SReg_128:$srsrc, mbuf_offset:$offset,
> + SSrc_32:$soffset, slc:$slc),
> + name#" $vdata, $srsrc, $soffset"#"$offset"#" glc $slc",
> + [(set vt:$vdata,
> + (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset,
> + i1:$slc), vt:$vdata_in))]
> + >, MUBUFAddr64Table<0, "_RTN">, AtomicNoRet<NAME#"_OFFSET", 1>;
> +
> + } // glc = 1
> +
> + } // mayStore = 1, mayLoad = 1
> +}
> +
> multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass,
> ValueType load_vt = i32,
> SDPatternOperator ld = null_frag> {
> @@ -1292,7 +1369,7 @@ def getMCOpcode : InstrMapping {
>
> def getAddr64Inst : InstrMapping {
> let FilterClass = "MUBUFAddr64Table";
> - let RowFields = ["NAME"];
> + let RowFields = ["OpName"];
> let ColFields = ["IsAddr64"];
> let KeyCol = ["0"];
> let ValueCols = [["1"]];
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index c1b9f57..35cf9dd 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -896,7 +896,9 @@ defm BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
>> ;
> //def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
> //def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
> -//def BUFFER_ATOMIC_ADD : MUBUF_ <0x00000032, "BUFFER_ATOMIC_ADD", []>;
> +defm BUFFER_ATOMIC_ADD : MUBUF_Atomic <
> + 0x00000032, "BUFFER_ATOMIC_ADD", VReg_32, i32, atomic_add_global
> +>;
> //def BUFFER_ATOMIC_SUB : MUBUF_ <0x00000033, "BUFFER_ATOMIC_SUB", []>;
> //def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>;
> //def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>;
> diff --git a/test/CodeGen/R600/global_atomics.ll b/test/CodeGen/R600/global_atomics.ll
> new file mode 100644
> index 0000000..665913f
> --- /dev/null
> +++ b/test/CodeGen/R600/global_atomics.ll
> @@ -0,0 +1,39 @@
> +; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
> +
> +; FUNC-LABEL: @atomic_load_i32_offset
> +; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
> +define void @atomic_load_i32_offset(i32 addrspace(1)* %out, i32 %in) {
> +entry:
> + %0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
> + ret void
> +}
> +
> +; FUNC-LABEL: @atomic_load_i32_ret_offset
> +; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
> +; SI: BUFFER_STORE_DWORD [[RET]]
> +define void @atomic_load_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
> +entry:
> + %0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
> + store i32 %0, i32 addrspace(1)* %out2
> + ret void
> +}
> +
> +; FUNC-LABEL: @atomic_load_i32_addr64
> +; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
> +define void @atomic_load_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
> +entry:
> + %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
> + %0 = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
> + ret void
> +}
> +
> +; FUNC-LABEL: @atomic_load_i32_ret_addr64
> +; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
> +; SI: BUFFER_STORE_DWORD [[RET]]
> +define void @atomic_load_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
> +entry:
> + %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
> + %0 = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
> + store i32 %0, i32 addrspace(1)* %out2
> + ret void
> +}
> --
> 1.8.5.5
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
LGTM
More information about the llvm-commits
mailing list