[llvm] r265170 - AMDGPU: Implement {BUFFER, FLAT}_ATOMIC_CMPSWAP{, _X2}
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 1 11:27:38 PDT 2016
Author: tstellar
Date: Fri Apr 1 13:27:37 2016
New Revision: 265170
URL: http://llvm.org/viewvc/llvm-project?rev=265170&view=rev
Log:
AMDGPU: Implement {BUFFER,FLAT}_ATOMIC_CMPSWAP{,_X2}
Summary:
Implement BUFFER_ATOMIC_CMPSWAP{,_X2} instructions on all GCN targets, and FLAT_ATOMIC_CMPSWAP{,_X2} on CI+.
32-bit instruction variants tested manually on Kabini and Bonaire. Tests and parts of code provided by Jan Veselý.
Patch by: Vedran Miletić
Reviewers: arsenm, tstellarAMD, nhaehnle
Subscribers: jvesely, scchan, kanarayan, arsenm
Differential Revision: http://reviews.llvm.org/D17280
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
llvm/trunk/lib/Target/AMDGPU/CIInstructions.td
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
llvm/trunk/test/CodeGen/AMDGPU/global_atomics.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=265170&r1=265169&r2=265170&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Fri Apr 1 13:27:37 2016
@@ -2812,6 +2812,7 @@ const char* AMDGPUTargetLowering::getTar
NODE_NAME_CASE(INTERP_P2)
NODE_NAME_CASE(STORE_MSKOR)
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
+ NODE_NAME_CASE(ATOMIC_CMP_SWAP)
case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
}
return nullptr;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h?rev=265170&r1=265169&r2=265170&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h Fri Apr 1 13:27:37 2016
@@ -313,6 +313,7 @@ enum NodeType : unsigned {
STORE_MSKOR,
LOAD_CONSTANT,
TBUFFER_STORE_FORMAT,
+ ATOMIC_CMP_SWAP,
LAST_AMDGPU_ISD_NUMBER
};
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td?rev=265170&r1=265169&r2=265170&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td Fri Apr 1 13:27:37 2016
@@ -183,6 +183,11 @@ def AMDGPUstore_mskor : SDNode<"AMDGPUIS
SDTypeProfile<0, 2, []>,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def AMDGPUatomic_cmp_swap : SDNode<"AMDGPUISD::ATOMIC_CMP_SWAP",
+ SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisVec<2>]>,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+ SDNPMemOperand]>;
+
def AMDGPUround : SDNode<"ISD::FROUND",
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>>;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td?rev=265170&r1=265169&r2=265170&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td Fri Apr 1 13:27:37 2016
@@ -400,6 +400,13 @@ def atomic_umax_global : global_binary_a
def atomic_umin_global : global_binary_atomic_op<atomic_load_umin>;
def atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;
+def atomic_cmp_swap_global : global_binary_atomic_op<AMDGPUatomic_cmp_swap>;
+def atomic_cmp_swap_global_nortn : PatFrag<
+ (ops node:$ptr, node:$value),
+ (atomic_cmp_swap_global node:$ptr, node:$value),
+ [{ return SDValue(N, 0).use_empty(); }]
+>;
+
//===----------------------------------------------------------------------===//
// Misc Pattern Fragments
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/AMDGPU/CIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/CIInstructions.td?rev=265170&r1=265169&r2=265170&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/CIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/CIInstructions.td Fri Apr 1 13:27:37 2016
@@ -308,8 +308,9 @@ def : FlatStorePat <FLAT_STORE_DWORD, fl
def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>;
def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>;
-class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
- (vt (node i64:$addr, vt:$data)),
+class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt,
+ ValueType data_vt = vt> : Pat <
+ (vt (node i64:$addr, data_vt:$data)),
(inst $addr, $data, 0, 0)
>;
@@ -322,6 +323,9 @@ def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RT
def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, atomic_cmp_swap_global, i32, v2i32>;
def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, atomic_cmp_swap_global, i64, v2i64>;
+
} // End Predicates = [isCIVI]
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=265170&r1=265169&r2=265170&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Fri Apr 1 13:27:37 2016
@@ -257,6 +257,16 @@ SITargetLowering::SITargetLowering(Targe
setOperationAction(ISD::FDIV, MVT::f32, Custom);
setOperationAction(ISD::FDIV, MVT::f64, Custom);
+ // BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling,
+ // and output demarshalling
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
+
+ // We can't return success/failure, only the old value,
+ // let LLVM add the comparison
+ setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Expand);
+
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FMINNUM);
@@ -1156,6 +1166,7 @@ SDValue SITargetLowering::LowerOperation
return LowerTrig(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::FDIV: return LowerFDIV(Op, DAG);
+ case ISD::ATOMIC_CMP_SWAP: return LowerATOMIC_CMP_SWAP(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::GlobalAddress: {
MachineFunction &MF = DAG.getMachineFunction();
@@ -2003,6 +2014,34 @@ SDValue SITargetLowering::LowerTrig(SDVa
}
}
+SDValue SITargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
+ AtomicSDNode *AtomicNode = cast<AtomicSDNode>(Op);
+ assert(AtomicNode->isCompareAndSwap());
+ unsigned AS = AtomicNode->getAddressSpace();
+
+ // No custom lowering required for local address space
+ if (!isFlatGlobalAddrSpace(AS))
+ return Op;
+
+ // Non-local address space requires custom lowering for atomic compare
+ // and swap; cmp and swap should be in a v2i32 or v2i64 in case of _X2
+ SDLoc DL(Op);
+ SDValue ChainIn = Op.getOperand(0);
+ SDValue Addr = Op.getOperand(1);
+ SDValue Old = Op.getOperand(2);
+ SDValue New = Op.getOperand(3);
+ EVT VT = Op.getValueType();
+ MVT SimpleVT = VT.getSimpleVT();
+ MVT VecType = MVT::getVectorVT(SimpleVT, 2);
+
+ SDValue NewOld = DAG.getNode(ISD::BUILD_VECTOR, DL, VecType,
+ New, Old);
+ SDValue Ops[] = { ChainIn, Addr, NewOld };
+ SDVTList VTList = DAG.getVTList(VT, MVT::Other);
+ return DAG.getMemIntrinsicNode(AMDGPUISD::ATOMIC_CMP_SWAP, DL,
+ VTList, Ops, VT, AtomicNode->getMemOperand());
+}
+
//===----------------------------------------------------------------------===//
// Custom DAG optimizations
//===----------------------------------------------------------------------===//
@@ -2849,8 +2888,31 @@ void SITargetLowering::AdjustInstrPostIn
if (!Node->hasAnyUseOfValue(0)) {
MI->setDesc(TII->get(NoRetAtomicOp));
MI->RemoveOperand(0);
+ return;
}
+ // For mubuf_atomic_cmpswap, we need to have tablegen use an extract_subreg
+ // instruction, because the return type of these instructions is a vec2 of
+ // the memory type, so it can be tied to the input operand.
+ // This means these instructions always have a use, so we need to add a
+ // special case to check if the atomic has only one extract_subreg use,
+ // which itself has no uses.
+ if ((Node->hasNUsesOfValue(1, 0) &&
+ Node->use_begin()->getMachineOpcode() == AMDGPU::EXTRACT_SUBREG &&
+ !Node->use_begin()->hasAnyUseOfValue(0))) {
+ unsigned Def = MI->getOperand(0).getReg();
+
+ // Change this into a noret atomic.
+ MI->setDesc(TII->get(NoRetAtomicOp));
+ MI->RemoveOperand(0);
+
+ // If we only remove the def operand from the atomic instruction, the
+ // extract_subreg will be left with a use of a vreg without a def.
+ // So we need to insert an implicit_def to avoid machine verifier
+ // errors.
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::IMPLICIT_DEF), Def);
+ }
return;
}
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h?rev=265170&r1=265169&r2=265170&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h Fri Apr 1 13:27:37 2016
@@ -41,6 +41,7 @@ class SITargetLowering final : public AM
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool Signed) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=265170&r1=265169&r2=265170&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Fri Apr 1 13:27:37 2016
@@ -1048,7 +1048,9 @@ defm BUFFER_ATOMIC_XOR : MUBUF_Atomic <
//def BUFFER_ATOMIC_FMIN : MUBUF_ <mubuf<0x3f>, "buffer_atomic_fmin", []>; // isn't on VI
//def BUFFER_ATOMIC_FMAX : MUBUF_ <mubuf<0x40>, "buffer_atomic_fmax", []>; // isn't on VI
//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <mubuf<0x50, 0x60>, "buffer_atomic_swap_x2", []>;
-//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <mubuf<0x51, 0x61>, "buffer_atomic_cmpswap_x2", []>;
+defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Atomic <
+ mubuf<0x51, 0x61>, "buffer_atomic_cmpswap_x2", VReg_128, v2i64, null_frag
+>;
//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <mubuf<0x52, 0x62>, "buffer_atomic_add_x2", []>;
//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <mubuf<0x53, 0x63>, "buffer_atomic_sub_x2", []>;
//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <mubuf<0x54>, "buffer_atomic_rsub_x2", []>; // isn't on CI & VI
@@ -3186,6 +3188,37 @@ def : MUBUFScratchStorePat <BUFFER_STORE
def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>;
def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>;
+
+multiclass MUBUFCmpSwapPat <Instruction inst_addr64, Instruction inst_offset,
+ SDPatternOperator node, ValueType data_vt,
+ ValueType node_vt> {
+
+let Predicates = [isSI] in {
+ def : Pat <
+ (node_vt (node (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset,
+ i16:$offset, i1:$slc), data_vt:$vdata_in)),
+ (EXTRACT_SUBREG
+ (inst_addr64 $vdata_in, $vaddr, $srsrc, $soffset, $offset, $slc), sub0)
+ >;
+
+}
+
+ def : Pat <
+ (node_vt (node (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset,
+ i1:$slc), data_vt:$vdata_in)),
+ (EXTRACT_SUBREG
+ (inst_offset $vdata_in, $srsrc, $soffset, $offset, $slc), sub0)
+ >;
+}
+
+defm : MUBUFCmpSwapPat <BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64,
+ BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET,
+ atomic_cmp_swap_global, v2i32, i32>;
+
+defm : MUBUFCmpSwapPat <BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64,
+ BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET,
+ atomic_cmp_swap_global, v2i64, i64>;
+
//===----------------------------------------------------------------------===//
// MTBUF Patterns
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/test/CodeGen/AMDGPU/global_atomics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/global_atomics.ll?rev=265170&r1=265169&r2=265170&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/global_atomics.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/global_atomics.ll Fri Apr 1 13:27:37 2016
@@ -758,6 +758,95 @@ entry:
ret void
}
+; CMP_SWAP
+
+; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_offset:
+; GCN: buffer_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+define void @atomic_cmpxchg_i32_offset(i32 addrspace(1)* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
+ %0 = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset:
+; GCN: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_store_dword v[[RET]]
+define void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
+ %0 = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
+ %1 = extractvalue { i32, i1 } %0, 0
+ store i32 %1, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
+; SI: buffer_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
+entry:
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
+ %0 = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset:
+; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
+; GCN: buffer_store_dword v[[RET]]
+define void @atomic_cmpxchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {
+entry:
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
+ %0 = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
+ %1 = extractvalue { i32, i1 } %0, 0
+ store i32 %1, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_cmpxchg_i32:
+; GCN: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+define void @atomic_cmpxchg_i32(i32 addrspace(1)* %out, i32 %in, i32 %old) {
+entry:
+ %0 = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret:
+; GCN: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_store_dword v[[RET]]
+define void @atomic_cmpxchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
+entry:
+ %0 = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
+ %1 = extractvalue { i32, i1 } %0, 0
+ store i32 %1, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_addr64:
+; SI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; VI: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
+define void @atomic_cmpxchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
+entry:
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %0 = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64:
+; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
+; GCN: buffer_store_dword v[[RET]]
+define void @atomic_cmpxchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {
+entry:
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %0 = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst
+ %1 = extractvalue { i32, i1 } %0, 0
+ store i32 %1, i32 addrspace(1)* %out2
+ ret void
+}
+
; FUNC-LABEL: {{^}}atomic_xor_i32_offset:
; GCN: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
More information about the llvm-commits
mailing list