[PATCH] R600/SI: Add support for global atomic add
Tom Stellard
thomas.stellard at amd.com
Fri Sep 19 11:19:28 PDT 2014
---
lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 21 +++++++++
lib/Target/R600/AMDGPUInstructions.td | 8 ++++
lib/Target/R600/SIInstrInfo.td | 81 +++++++++++++++++++++++++++++++++-
lib/Target/R600/SIInstructions.td | 4 +-
test/CodeGen/R600/global_atomics.ll | 39 ++++++++++++++++
5 files changed, 150 insertions(+), 3 deletions(-)
create mode 100644 test/CodeGen/R600/global_atomics.ll
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index 64e10df..9afcb5d 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -101,11 +101,16 @@ private:
SDValue &TFE) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &Offset) const;
+ bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
+ SDValue &VAddr, SDValue &Offset,
+ SDValue &SLC) const;
bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
SDValue &SOffset, SDValue &ImmOffset) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
SDValue &Offset, SDValue &GLC, SDValue &SLC,
SDValue &TFE) const;
+ bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
+ SDValue &Offset, SDValue &GLC) const;
SDNode *SelectAddrSpaceCast(SDNode *N);
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
@@ -909,6 +914,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
return false;
}
+bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
+ SDValue &VAddr, SDValue &Offset,
+ SDValue &SLC) const {
+ SLC = CurDAG->getTargetConstant(0, MVT::i1);
+
+ return SelectMUBUFAddr64(Addr, SRsrc, VAddr, Offset);
+}
+
static SDValue buildRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr,
uint32_t RsrcDword1, uint64_t RsrcDword2And3) {
@@ -1019,6 +1032,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
return false;
}
+bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
+ SDValue &Soffset, SDValue &Offset,
+ SDValue &GLC) const {
+ SDValue SLC, TFE;
+
+ return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
+}
+
// FIXME: This is incorrect and only enough to be able to compile.
SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index d152c88..a608627 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -380,6 +380,14 @@ def mskor_flat : PatFrag<(ops node:$val, node:$ptr),
return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
}]>;
+class global_binary_atomic_op<SDNode atomic_op> : PatFrag<
+ (ops node:$ptr, node:$value),
+ (atomic_op node:$ptr, node:$value),
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]
+>;
+
+def atomic_add_global : global_binary_atomic_op<atomic_load_add>;
+
//===----------------------------------------------------------------------===//
// Misc Pattern Fragments
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index bde90d8..abeaf67 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -196,8 +196,10 @@ def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
def MUBUFAddr64 : ComplexPattern<i64, 3, "SelectMUBUFAddr64">;
+def MUBUFAddr64Atomic : ComplexPattern<i64, 4, "SelectMUBUFAddr64">;
def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
+def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
@@ -929,9 +931,10 @@ class DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc, string noRetOp =
let mayLoad = 1;
}
-class MUBUFAddr64Table <bit is_addr64> {
+class MUBUFAddr64Table <bit is_addr64, string suffix = ""> {
bit IsAddr64 = is_addr64;
+ string OpName = NAME # suffix;
}
class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
@@ -947,6 +950,80 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU
let mayLoad = 0;
}
+class MUBUFAtomicAddr64 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern>
+ : MUBUF <op, outs, ins, asm, pattern> {
+
+ let offen = 0;
+ let idxen = 0;
+ let addr64 = 1;
+ let tfe = 0;
+ let lds = 0;
+ let soffset = 128;
+}
+
+class MUBUFAtomicOffset <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern>
+ : MUBUF <op, outs, ins, asm, pattern> {
+
+ let offen = 0;
+ let idxen = 0;
+ let addr64 = 0;
+ let tfe = 0;
+ let lds = 0;
+ let vaddr = 0;
+}
+
+multiclass MUBUF_Atomic <bits<7> op, string name, RegisterClass rc,
+ ValueType vt, SDPatternOperator atomic> {
+
+ let mayStore = 1, mayLoad = 1 in {
+
+ // No return variants
+ let glc = 0 in {
+
+ def _ADDR64 : MUBUFAtomicAddr64 <
+ op, (outs),
+ (ins rc:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
+ mbuf_offset:$offset, slc:$slc),
+ name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#"$slc", []
+ >, MUBUFAddr64Table<1>, AtomicNoRet<NAME#"_ADDR64", 0>;
+
+ def _OFFSET : MUBUFAtomicOffset <
+ op, (outs),
+ (ins rc:$vdata, SReg_128:$srsrc, mbuf_offset:$offset,
+ SSrc_32:$soffset, slc:$slc),
+ name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", []
+ >, MUBUFAddr64Table<0>, AtomicNoRet<NAME#"_OFFSET", 0>;
+ } // glc = 0
+
+ // Variant that return values
+ let glc = 1, Constraints = "$vdata = $vdata_in",
+ DisableEncoding = "$vdata_in" in {
+
+ def _RTN_ADDR64 : MUBUFAtomicAddr64 <
+ op, (outs rc:$vdata),
+ (ins rc:$vdata_in, SReg_128:$srsrc, VReg_64:$vaddr,
+ mbuf_offset:$offset, slc:$slc),
+ name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#" glc"#"$slc",
+ [(set vt:$vdata,
+ (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i16:$offset,
+ i1:$slc), vt:$vdata_in))]
+ >, MUBUFAddr64Table<1, "_RTN">, AtomicNoRet<NAME#"_ADDR64", 1>;
+
+ def _RTN_OFFSET : MUBUFAtomicOffset <
+ op, (outs rc:$vdata),
+ (ins rc:$vdata_in, SReg_128:$srsrc, mbuf_offset:$offset,
+ SSrc_32:$soffset, slc:$slc),
+ name#" $vdata, $srsrc, $soffset"#"$offset"#" glc $slc",
+ [(set vt:$vdata,
+ (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset,
+ i1:$slc), vt:$vdata_in))]
+ >, MUBUFAddr64Table<0, "_RTN">, AtomicNoRet<NAME#"_OFFSET", 1>;
+
+ } // glc = 1
+
+ } // mayStore = 1, mayLoad = 1
+}
+
multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass,
ValueType load_vt = i32,
SDPatternOperator ld = null_frag> {
@@ -1292,7 +1369,7 @@ def getMCOpcode : InstrMapping {
def getAddr64Inst : InstrMapping {
let FilterClass = "MUBUFAddr64Table";
- let RowFields = ["NAME"];
+ let RowFields = ["OpName"];
let ColFields = ["IsAddr64"];
let KeyCol = ["0"];
let ValueCols = [["1"]];
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index c1b9f57..35cf9dd 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -896,7 +896,9 @@ defm BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
>;
//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
-//def BUFFER_ATOMIC_ADD : MUBUF_ <0x00000032, "BUFFER_ATOMIC_ADD", []>;
+defm BUFFER_ATOMIC_ADD : MUBUF_Atomic <
+ 0x00000032, "BUFFER_ATOMIC_ADD", VReg_32, i32, atomic_add_global
+>;
//def BUFFER_ATOMIC_SUB : MUBUF_ <0x00000033, "BUFFER_ATOMIC_SUB", []>;
//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>;
//def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>;
diff --git a/test/CodeGen/R600/global_atomics.ll b/test/CodeGen/R600/global_atomics.ll
new file mode 100644
index 0000000..665913f
--- /dev/null
+++ b/test/CodeGen/R600/global_atomics.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+
+; FUNC-LABEL: @atomic_load_i32_offset
+; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+define void @atomic_load_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: @atomic_load_i32_ret_offset
+; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: BUFFER_STORE_DWORD [[RET]]
+define void @atomic_load_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+ %0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: @atomic_load_i32_addr64
+; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+define void @atomic_load_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %0 = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: @atomic_load_i32_ret_addr64
+; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: BUFFER_STORE_DWORD [[RET]]
+define void @atomic_load_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %0 = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
--
1.8.5.5
More information about the llvm-commits
mailing list