[llvm] r265709 - AMDGPU/SI: Implement atomic load/store for i32 and i64
Jan Vesely via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 7 12:23:11 PDT 2016
Author: jvesely
Date: Thu Apr 7 14:23:11 2016
New Revision: 265709
URL: http://llvm.org/viewvc/llvm-project?rev=265709&view=rev
Log:
AMDGPU/SI: Implement atomic load/store for i32 and i64
Standard load/store instructions with GLC bit set.
Reviewers: tstellardAMD, arsenm
Differential Revision: http://reviews.llvm.org/D18760
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
llvm/trunk/lib/Target/AMDGPU/CIInstructions.td
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
llvm/trunk/test/CodeGen/AMDGPU/global_atomics.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp?rev=265709&r1=265708&r2=265709&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Thu Apr 7 14:23:11 2016
@@ -81,16 +81,16 @@ private:
static bool checkType(const Value *ptr, unsigned int addrspace);
static bool checkPrivateAddress(const MachineMemOperand *Op);
- static bool isGlobalStore(const StoreSDNode *N);
- static bool isFlatStore(const StoreSDNode *N);
+ static bool isGlobalStore(const MemSDNode *N);
+ static bool isFlatStore(const MemSDNode *N);
static bool isPrivateStore(const StoreSDNode *N);
static bool isLocalStore(const StoreSDNode *N);
static bool isRegionStore(const StoreSDNode *N);
bool isCPLoad(const LoadSDNode *N) const;
- bool isConstantLoad(const LoadSDNode *N, int cbID) const;
- bool isGlobalLoad(const LoadSDNode *N) const;
- bool isFlatLoad(const LoadSDNode *N) const;
+ bool isConstantLoad(const MemSDNode *N, int cbID) const;
+ bool isGlobalLoad(const MemSDNode *N) const;
+ bool isFlatLoad(const MemSDNode *N) const;
bool isParamLoad(const LoadSDNode *N) const;
bool isPrivateLoad(const LoadSDNode *N) const;
bool isLocalLoad(const LoadSDNode *N) const;
@@ -128,6 +128,8 @@ private:
SDValue &TFE) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset, SDValue &GLC) const;
+ bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
+ SDValue &Offset) const;
void SelectMUBUFConstant(SDValue Constant,
SDValue &SOffset,
SDValue &ImmOffset) const;
@@ -558,7 +560,9 @@ bool AMDGPUDAGToDAGISel::checkPrivateAdd
return false;
}
-bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
+bool AMDGPUDAGToDAGISel::isGlobalStore(const MemSDNode *N) {
+ if (!N->writeMem())
+ return false;
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
}
@@ -573,7 +577,9 @@ bool AMDGPUDAGToDAGISel::isLocalStore(co
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
}
-bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) {
+bool AMDGPUDAGToDAGISel::isFlatStore(const MemSDNode *N) {
+ if (!N->writeMem())
+ return false;
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
}
@@ -581,7 +587,9 @@ bool AMDGPUDAGToDAGISel::isRegionStore(c
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
}
-bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
+bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
+ if (!N->readMem())
+ return false;
const Value *MemVal = N->getMemOperand()->getValue();
if (CbId == -1)
return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);
@@ -589,7 +597,9 @@ bool AMDGPUDAGToDAGISel::isConstantLoad(
return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
}
-bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
+bool AMDGPUDAGToDAGISel::isGlobalLoad(const MemSDNode *N) const {
+ if (!N->readMem())
+ return false;
if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
N->getMemoryVT().bitsLT(MVT::i32))
@@ -606,7 +616,9 @@ bool AMDGPUDAGToDAGISel::isLocalLoad(con
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
}
-bool AMDGPUDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const {
+bool AMDGPUDAGToDAGISel::isFlatLoad(const MemSDNode *N) const {
+ if (!N->readMem())
+ return false;
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
}
@@ -955,8 +967,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDV
SDLoc DL(Addr);
- GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
- SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ if (!GLC.getNode())
+ GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ if (!SLC.getNode())
+ SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
@@ -1113,6 +1127,13 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffs
}
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
+ SDValue &Soffset, SDValue &Offset
+ ) const {
+ SDValue GLC, SLC, TFE;
+
+ return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
+}
+bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &Soffset, SDValue &Offset,
SDValue &GLC) const {
SDValue SLC, TFE;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td?rev=265709&r1=265708&r2=265709&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td Thu Apr 7 14:23:11 2016
@@ -192,6 +192,11 @@ def global_store : PatFrag<(ops node:$va
return isGlobalStore(dyn_cast<StoreSDNode>(N));
}]>;
+def global_store_atomic : PatFrag<(ops node:$val, node:$ptr),
+ (atomic_store node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<MemSDNode>(N));
+}]>;
+
// Global address space loads
def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
Modified: llvm/trunk/lib/Target/AMDGPU/CIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/CIInstructions.td?rev=265709&r1=265708&r2=265709&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/CIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/CIInstructions.td Thu Apr 7 14:23:11 2016
@@ -289,6 +289,11 @@ class FlatLoadPat <FLAT inst, SDPatternO
(inst $addr, 0, 0, 0)
>;
+class FlatLoadAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
+ (vt (node i64:$addr)),
+ (inst $addr, 1, 0, 0)
+>;
+
def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i32>;
def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i32>;
def : FlatLoadPat <FLAT_LOAD_USHORT, flat_az_extloadi16, i32>;
@@ -297,17 +302,30 @@ def : FlatLoadPat <FLAT_LOAD_DWORD, flat
def : FlatLoadPat <FLAT_LOAD_DWORDX2, flat_load, v2i32>;
def : FlatLoadPat <FLAT_LOAD_DWORDX4, flat_load, v4i32>;
+def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_flat_load, i32>;
+def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_flat_load, i64>;
+
+
class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
(node vt:$data, i64:$addr),
(inst $addr, $data, 0, 0, 0)
>;
+class FlatStoreAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
+ // atomic store follows aotmic binop convenction so the address comes first
+ (node i64:$addr, vt:$data),
+ (inst $addr, $data, 1, 0, 0)
+>;
+
def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i32>;
def : FlatStorePat <FLAT_STORE_SHORT, flat_truncstorei16, i32>;
def : FlatStorePat <FLAT_STORE_DWORD, flat_store, i32>;
def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>;
def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>;
+def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_flat_store, i32>;
+def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_flat_store, i64>;
+
class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt,
ValueType data_vt = vt> : Pat <
(vt (node i64:$addr, data_vt:$data)),
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=265709&r1=265708&r2=265709&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Thu Apr 7 14:23:11 2016
@@ -140,12 +140,13 @@ def SIconstdata_ptr : SDNode<
class flat_ld <SDPatternOperator ld> : PatFrag<(ops node:$ptr),
(ld node:$ptr), [{
- return isFlatLoad(dyn_cast<LoadSDNode>(N)) ||
- isGlobalLoad(dyn_cast<LoadSDNode>(N)) ||
- isConstantLoad(cast<LoadSDNode>(N), -1);
+ return isFlatLoad(dyn_cast<MemSDNode>(N)) ||
+ isGlobalLoad(dyn_cast<MemSDNode>(N)) ||
+ isConstantLoad(cast<MemSDNode>(N), -1);
}]>;
def flat_load : flat_ld <load>;
+def atomic_flat_load : flat_ld<atomic_load>;
def flat_az_extloadi8 : flat_ld <az_extloadi8>;
def flat_sextloadi8 : flat_ld <sextloadi8>;
def flat_az_extloadi16 : flat_ld <az_extloadi16>;
@@ -153,11 +154,12 @@ def flat_sextloadi16 : flat_ld <sextload
class flat_st <SDPatternOperator st> : PatFrag<(ops node:$val, node:$ptr),
(st node:$val, node:$ptr), [{
- return isFlatStore(dyn_cast<StoreSDNode>(N)) ||
- isGlobalStore(dyn_cast<StoreSDNode>(N));
+ return isFlatStore(dyn_cast<MemSDNode>(N)) ||
+ isGlobalStore(dyn_cast<MemSDNode>(N));
}]>;
def flat_store: flat_st <store>;
+def atomic_flat_store: flat_st <atomic_store>;
def flat_truncstorei8 : flat_st <truncstorei8>;
def flat_truncstorei16 : flat_st <truncstorei16>;
@@ -167,6 +169,12 @@ def mubuf_load : PatFrag <(ops node:$ptr
isConstantLoad(cast<LoadSDNode>(N), -1);
}]>;
+def mubuf_load_atomic : PatFrag <(ops node:$ptr), (atomic_load node:$ptr), [{
+ return isGlobalLoad(cast<MemSDNode>(N)) ||
+ isConstantLoad(cast<MemSDNode>(N), -1);
+}]>;
+
+
def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
return isConstantLoad(cast<LoadSDNode>(N), -1) &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N);
@@ -721,6 +729,7 @@ def MUBUFAddr64 : ComplexPattern<i64, 7,
def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
+def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
def MUBUFIntrinsicOffset : ComplexPattern<i32, 2, "SelectMUBUFIntrinsicOffset">;
def MUBUFIntrinsicVOffset : ComplexPattern<i32, 3, "SelectMUBUFIntrinsicVOffset">;
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=265709&r1=265708&r2=265709&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Thu Apr 7 14:23:11 2016
@@ -3102,20 +3102,35 @@ def : DSAtomicCmpXChg<DS_CMPST_RTN_B64,
// MUBUF Patterns
//===----------------------------------------------------------------------===//
-multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
- PatFrag constant_ld> {
- def : Pat <
+class MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
+ PatFrag constant_ld> : Pat <
(vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
(Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
>;
+
+multiclass MUBUFLoad_Atomic_Pattern <MUBUF Instr_ADDR64, MUBUF Instr_OFFSET,
+ ValueType vt, PatFrag atomic_ld> {
+ def : Pat <
+ (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
+ i16:$offset, i1:$slc))),
+ (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 1, $slc, 0)
+ >;
+
+ def : Pat <
+ (vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))),
+ (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 1, 0, 0)
+ >;
}
let Predicates = [isSICI] in {
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_ADDR64, i32, sextloadi8_constant>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32, az_extloadi8_constant>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_constant>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_ADDR64, i32, az_extloadi16_constant>;
+def : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_ADDR64, i32, sextloadi8_constant>;
+def : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32, az_extloadi8_constant>;
+def : MUBUFLoad_Pattern <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_constant>;
+def : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_ADDR64, i32, az_extloadi16_constant>;
+
+defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORD_ADDR64, BUFFER_LOAD_DWORD_OFFSET, i32, mubuf_load_atomic>;
+defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, BUFFER_LOAD_DWORDX2_OFFSET, i64, mubuf_load_atomic>;
} // End Predicates = [isSICI]
class MUBUFScratchLoadPat <MUBUF Instr, ValueType vt, PatFrag ld> : Pat <
@@ -3176,6 +3191,25 @@ defm : MUBUF_Load_Dword <v2i32, BUFFER_L
defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
+multiclass MUBUFStore_Atomic_Pattern <MUBUF Instr_ADDR64, MUBUF Instr_OFFSET,
+ ValueType vt, PatFrag atomic_st> {
+ // Store follows atomic op convention so address is forst
+ def : Pat <
+ (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
+ i16:$offset, i1:$slc), vt:$val),
+ (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 1, $slc, 0)
+ >;
+
+ def : Pat <
+ (atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val),
+ (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 1, 0, 0)
+ >;
+}
+let Predicates = [isSICI] in {
+defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORD_ADDR64, BUFFER_STORE_DWORD_OFFSET, i32, global_store_atomic>;
+defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORDX2_ADDR64, BUFFER_STORE_DWORDX2_OFFSET, i64, global_store_atomic>;
+} // End Predicates = [isSICI]
+
class MUBUFScratchStorePat <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
(st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset,
u16imm:$offset)),
Modified: llvm/trunk/test/CodeGen/AMDGPU/global_atomics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/global_atomics.ll?rev=265709&r1=265708&r2=265709&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/global_atomics.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/global_atomics.ll Thu Apr 7 14:23:11 2016
@@ -930,3 +930,181 @@ entry:
store i32 %0, i32 addrspace(1)* %out2
ret void
}
+
+; ATOMIC_LOAD
+; FUNC-LABEL: {{^}}atomic_load_i32_offset:
+; SI: buffer_load_dword [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GCN: buffer_store_dword [[RET]]
+define void @atomic_load_i32_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
+entry:
+ %gep = getelementptr i32, i32 addrspace(1)* %in, i32 4
+ %0 = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_load_i32:
+; SI: buffer_load_dword [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc
+; GCN: buffer_store_dword [[RET]]
+define void @atomic_load_i32(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
+entry:
+ %0 = load atomic i32, i32 addrspace(1)* %in seq_cst, align 4
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_load_i32_addr64_offset:
+; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
+; GCN: buffer_store_dword [[RET]]
+define void @atomic_load_i32_addr64_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) {
+entry:
+ %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
+ %0 = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_load_i32_addr64:
+; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
+; GCN: buffer_store_dword [[RET]]
+define void @atomic_load_i32_addr64(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) {
+entry:
+ %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index
+ %0 = load atomic i32, i32 addrspace(1)* %ptr seq_cst, align 4
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_load_i64_offset:
+; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GCN: buffer_store_dwordx2 [[RET]]
+define void @atomic_load_i64_offset(i64 addrspace(1)* %in, i64 addrspace(1)* %out) {
+entry:
+ %gep = getelementptr i64, i64 addrspace(1)* %in, i64 4
+ %0 = load atomic i64, i64 addrspace(1)* %gep seq_cst, align 8
+ store i64 %0, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_load_i64:
+; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc
+; GCN: buffer_store_dwordx2 [[RET]]
+define void @atomic_load_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %out) {
+entry:
+ %0 = load atomic i64, i64 addrspace(1)* %in seq_cst, align 8
+ store i64 %0, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_load_i64_addr64_offset:
+; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
+; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
+; GCN: buffer_store_dwordx2 [[RET]]
+define void @atomic_load_i64_addr64_offset(i64 addrspace(1)* %in, i64 addrspace(1)* %out, i64 %index) {
+entry:
+ %ptr = getelementptr i64, i64 addrspace(1)* %in, i64 %index
+ %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
+ %0 = load atomic i64, i64 addrspace(1)* %gep seq_cst, align 8
+ store i64 %0, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_load_i64_addr64:
+; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
+; GCN: buffer_store_dwordx2 [[RET]]
+define void @atomic_load_i64_addr64(i64 addrspace(1)* %in, i64 addrspace(1)* %out, i64 %index) {
+entry:
+ %ptr = getelementptr i64, i64 addrspace(1)* %in, i64 %index
+ %0 = load atomic i64, i64 addrspace(1)* %ptr seq_cst, align 8
+ store i64 %0, i64 addrspace(1)* %out
+ ret void
+}
+
+; ATOMIC_STORE
+; FUNC-LABEL: {{^}}atomic_store_i32_offset:
+; SI: buffer_store_dword {{v[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
+define void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) {
+entry:
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
+ store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_store_i32:
+; SI: buffer_store_dword {{v[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc{{$}}
+; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
+define void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) {
+entry:
+ store atomic i32 %in, i32 addrspace(1)* %out seq_cst, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_store_i32_addr64_offset:
+; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
+define void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(1)* %out, i64 %index) {
+entry:
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
+ store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_store_i32_addr64:
+; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
+define void @atomic_store_i32_addr64(i32 %in, i32 addrspace(1)* %out, i64 %index) {
+entry:
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ store atomic i32 %in, i32 addrspace(1)* %ptr seq_cst, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_store_i64_offset:
+; SI: buffer_store_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; VI: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+define void @atomic_store_i64_offset(i64 %in, i64 addrspace(1)* %out) {
+entry:
+ %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
+ store atomic i64 %in, i64 addrspace(1)* %gep seq_cst, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_store_i64:
+; SI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}] glc
+define void @atomic_store_i64(i64 %in, i64 addrspace(1)* %out) {
+entry:
+ store atomic i64 %in, i64 addrspace(1)* %out seq_cst, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_store_i64_addr64_offset:
+; SI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
+; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}] glc{{$}}
+define void @atomic_store_i64_addr64_offset(i64 %in, i64 addrspace(1)* %out, i64 %index) {
+entry:
+ %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
+ store atomic i64 %in, i64 addrspace(1)* %gep seq_cst, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_store_i64_addr64:
+; SI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}] glc{{$}}
+define void @atomic_store_i64_addr64(i64 %in, i64 addrspace(1)* %out, i64 %index) {
+entry:
+ %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
+ store atomic i64 %in, i64 addrspace(1)* %ptr seq_cst, align 8
+ ret void
+}
More information about the llvm-commits
mailing list