[PATCH] R600/SI: Add support for global atomic add
Tom Stellard
thomas.stellard at amd.com
Wed Jun 25 08:58:38 PDT 2014
---
lib/Target/R600/AMDGPUInstructions.td | 8 ++++++++
lib/Target/R600/SIInstrInfo.td | 34 ++++++++++++++++++++++++++++++++++
lib/Target/R600/SIInstructions.td | 4 +++-
test/CodeGen/R600/global_atomics.ll | 22 ++++++++++++++++++++++
4 files changed, 67 insertions(+), 1 deletion(-)
create mode 100644 test/CodeGen/R600/global_atomics.ll
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index b86b781..7b9a100 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -293,6 +293,14 @@ def atomic_cmp_swap_64_local :
AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
}]>;
+class global_binary_atomic_op<SDNode atomic_op> : PatFrag<
+ (ops node:$ptr, node:$value),
+ (atomic_op node:$ptr, node:$value),
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]
+>;
+
+def atomic_add_global : global_binary_atomic_op<atomic_load_add>;
+
class Constants {
int TWO_PI = 0x40c90fdb;
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index 774c9d1..2161c64 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -576,6 +576,40 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU
let mayLoad = 0;
}
+multiclass MUBUF_Atomic <bits<7> op, string name, RegisterClass rc,
+ ValueType vt, SDPatternOperator atomic> {
+
+let lds = 0, offen = 0, idxen = 0, addr64 = 1, slc = 0, tfe = 0,
+ soffset = 128 /* ZERO */, mayStore = 1, mayLoad = 1 in {
+
+let glc = 0 in {
+
+def "" : MUBUF <
+ op, (outs),
+ (ins rc:$vdata, SReg_128:$srsrc, VReg_64:$vaddr, u16imm:$offset),
+ name#" $vdata, $srsrc + $vaddr + $offset", []
+>;
+
+} // glc = 0
+
+let glc = 1, Constraints = "$vdata = $vdata_in", DisableEncoding = "$vdata_in" in {
+
+def _RTN : MUBUF<
+ op, (outs rc:$vdata),
+ (ins rc:$vdata_in, SReg_128:$srsrc, VReg_64:$vaddr, u16imm:$offset),
+ name#" $vdata, $srsrc + $vaddr + $offset glc",
+ [(set vt:$vdata,
+ (atomic (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, u16imm:$offset),
+ vt:$vdata_in))]
+>;
+
+} // glc = 1
+
+} // lds = 0, offen = 0, idxen = 0, addr64 = 1, slc = 0, tfe = 0,
+ // soffset = 128 /* ZERO */, mayStore = 1, mayLoad = 1
+
+}
+
multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass,
ValueType load_vt = i32,
SDPatternOperator ld = null_frag> {
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 291d634..d4e1f80 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -889,7 +889,9 @@ def BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
>;
//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
-//def BUFFER_ATOMIC_ADD : MUBUF_ <0x00000032, "BUFFER_ATOMIC_ADD", []>;
+defm BUFFER_ATOMIC_ADD : MUBUF_Atomic <
+ 0x00000032, "BUFFER_ATOMIC_ADD", VReg_32, i32, atomic_add_global
+>;
//def BUFFER_ATOMIC_SUB : MUBUF_ <0x00000033, "BUFFER_ATOMIC_SUB", []>;
//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>;
//def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>;
diff --git a/test/CodeGen/R600/global_atomics.ll b/test/CodeGen/R600/global_atomics.ll
new file mode 100644
index 0000000..80df206
--- /dev/null
+++ b/test/CodeGen/R600/global_atomics.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+
+; FUNC-LABEL: @atomic_load_i32
+; SI: BUFFER_ATOMIC_ADD
+; FIXME: We need to add support for the no return versions of atomics.
+; SI-NOT-FIXME: glc
+define void @atomic_load_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: @atomic_load_i32_ret
+; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]]
+; SI: glc
+; SI: BUFFER_STORE_DWORD [[RET]]
+define void @atomic_load_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+ %0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
--
1.8.1.5
More information about the llvm-commits
mailing list