[llvm-branch-commits] [llvm-branch] r242685 - Merging r242673:
Hans Wennborg
hans at hanshq.net
Mon Jul 20 09:46:01 PDT 2015
Author: hans
Date: Mon Jul 20 11:46:01 2015
New Revision: 242685
URL: http://llvm.org/viewvc/llvm-project?rev=242685&view=rev
Log:
Merging r242673:
------------------------------------------------------------------------
r242673 | tstellar | 2015-07-20 07:28:41 -0700 (Mon, 20 Jul 2015) | 11 lines
AMDGPU/SI: Add VI patterns to select FLAT instructions for global memory ops
Summary:
The MUBUF addr64 bit has been removed on VI, so we must use FLAT
instructions when the pointer is stored in VGPRs.
Reviewers: arsenm
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D11067
------------------------------------------------------------------------
Modified:
llvm/branches/release_37/ (props changed)
llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/branches/release_37/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/branches/release_37/lib/Target/AMDGPU/SIISelLowering.h
llvm/branches/release_37/lib/Target/AMDGPU/VIInstructions.td
llvm/branches/release_37/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
llvm/branches/release_37/test/CodeGen/AMDGPU/global_atomics.ll
Propchange: llvm/branches/release_37/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Mon Jul 20 11:46:01 2015
@@ -1,3 +1,3 @@
/llvm/branches/Apple/Pertwee:110850,110961
/llvm/branches/type-system-rewrite:133420-134817
-/llvm/trunk:155241,242236,242239,242281,242288,242296,242331,242341,242410,242412,242433-242434,242442,242543
+/llvm/trunk:155241,242236,242239,242281,242288,242296,242331,242341,242410,242412,242433-242434,242442,242543,242673
Modified: llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp?rev=242685&r1=242684&r2=242685&view=diff
==============================================================================
--- llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (original)
+++ llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Mon Jul 20 11:46:01 2015
@@ -1029,6 +1029,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr
SDValue &SLC, SDValue &TFE) const {
SDValue Ptr, Offen, Idxen, Addr64;
+ // addr64 bit was removed for volcanic islands.
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ return false;
+
SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
GLC, SLC, TFE);
Modified: llvm/branches/release_37/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_37/lib/Target/AMDGPU/SIISelLowering.cpp?rev=242685&r1=242684&r2=242685&view=diff
==============================================================================
--- llvm/branches/release_37/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/branches/release_37/lib/Target/AMDGPU/SIISelLowering.cpp Mon Jul 20 11:46:01 2015
@@ -254,6 +254,12 @@ bool SITargetLowering::isShuffleMaskLega
return false;
}
+bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
+ // Flat instructions do not have offsets, and only have the register
+ // address.
+ return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1);
+}
+
bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS) const {
@@ -263,8 +269,21 @@ bool SITargetLowering::isLegalAddressing
switch (AS) {
case AMDGPUAS::GLOBAL_ADDRESS:
- case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions?
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ // Assume the we will use FLAT for all global memory accesses
+ // on VI.
+ // FIXME: This assumption is currently wrong. On VI we still use
+ // MUBUF instructions for the r + i addressing mode. As currently
+ // implemented, the MUBUF instructions only work on buffer < 4GB.
+ // It may be possible to support > 4GB buffers with MUBUF instructions,
+ // by setting the stride value in the resource descriptor which would
+ // increase the size limit to (stride * 4GB). However, this is risky,
+ // because it has never been validated.
+ return isLegalFlatAddressingMode(AM);
+ }
+ // fall-through
case AMDGPUAS::PRIVATE_ADDRESS:
+ case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions?
case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: {
// MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and
// additionally can do r + r + i with addr64. 32-bit has more addressing
@@ -324,11 +343,9 @@ bool SITargetLowering::isLegalAddressing
return false;
}
- case AMDGPUAS::FLAT_ADDRESS: {
- // Flat instructions do not have offsets, and only have the register
- // address.
- return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1);
- }
+ case AMDGPUAS::FLAT_ADDRESS:
+ return isLegalFlatAddressingMode(AM);
+
default:
llvm_unreachable("unhandled address space");
}
Modified: llvm/branches/release_37/lib/Target/AMDGPU/SIISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_37/lib/Target/AMDGPU/SIISelLowering.h?rev=242685&r1=242684&r2=242685&view=diff
==============================================================================
--- llvm/branches/release_37/lib/Target/AMDGPU/SIISelLowering.h (original)
+++ llvm/branches/release_37/lib/Target/AMDGPU/SIISelLowering.h Mon Jul 20 11:46:01 2015
@@ -56,6 +56,7 @@ class SITargetLowering : public AMDGPUTa
SDValue performMin3Max3Combine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ bool isLegalFlatAddressingMode(const AddrMode &AM) const;
public:
SITargetLowering(TargetMachine &tm, const AMDGPUSubtarget &STI);
Modified: llvm/branches/release_37/lib/Target/AMDGPU/VIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_37/lib/Target/AMDGPU/VIInstructions.td?rev=242685&r1=242684&r2=242685&view=diff
==============================================================================
--- llvm/branches/release_37/lib/Target/AMDGPU/VIInstructions.td (original)
+++ llvm/branches/release_37/lib/Target/AMDGPU/VIInstructions.td Mon Jul 20 11:46:01 2015
@@ -103,4 +103,46 @@ def : Pat <
(S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
>;
+// Patterns for global loads with no offset
+class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
+ (vt (node i64:$addr)),
+ (inst $addr, 0, 0, 0)
+>;
+
+def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_global, i32>;
+def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_global, i32>;
+def : FlatLoadPat <FLAT_LOAD_USHORT, az_extloadi16_global, i32>;
+def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_global, i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORD, global_load, i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORDX2, global_load, v2i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORDX4, global_load, v4i32>;
+
+class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
+ (node vt:$data, i64:$addr),
+ (inst $data, $addr, 0, 0, 0)
+>;
+
+def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_global, i32>;
+def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_global, i32>;
+def : FlatStorePat <FLAT_STORE_DWORD, global_store, i32>;
+def : FlatStorePat <FLAT_STORE_DWORDX2, global_store, v2i32>;
+def : FlatStorePat <FLAT_STORE_DWORDX4, global_store, v4i32>;
+
+class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
+ (vt (node i64:$addr, vt:$data)),
+ (inst $addr, $data, 0, 0)
+>;
+
+def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
+
+
} // End Predicates = [isVI]
Modified: llvm/branches/release_37/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_37/test/CodeGen/AMDGPU/cgp-addressing-modes.ll?rev=242685&r1=242684&r2=242685&view=diff
==============================================================================
--- llvm/branches/release_37/test/CodeGen/AMDGPU/cgp-addressing-modes.ll (original)
+++ llvm/branches/release_37/test/CodeGen/AMDGPU/cgp-addressing-modes.ll Mon Jul 20 11:46:01 2015
@@ -1,12 +1,15 @@
-; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown < %s | FileCheck -check-prefix=OPT %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN %s
+; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
+; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
declare i32 @llvm.r600.read.tidig.x() #0
; OPT-LABEL: @test_sink_global_small_offset_i32(
-; OPT-NOT: getelementptr i32, i32 addrspace(1)* %in
+; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in
+; OPT-VI: getelementptr i32, i32 addrspace(1)* %in
; OPT: br i1
-; OPT: ptrtoint
+; OPT-CI: ptrtoint
; GCN-LABEL: {{^}}test_sink_global_small_offset_i32:
; GCN: {{^}}BB0_2:
@@ -214,8 +217,11 @@ done:
}
; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32:
+; VI-DAG: s_movk_i32 flat_scratch_lo, 0x0
+; VI-DAG: s_movk_i32 flat_scratch_hi, 0x0
; GCN: s_and_saveexec_b64
-; GCN: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
; GCN: {{^}}BB7_2:
define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset, i32 %cond) {
entry:
Modified: llvm/branches/release_37/test/CodeGen/AMDGPU/global_atomics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_37/test/CodeGen/AMDGPU/global_atomics.ll?rev=242685&r1=242684&r2=242685&view=diff
==============================================================================
--- llvm/branches/release_37/test/CodeGen/AMDGPU/global_atomics.ll (original)
+++ llvm/branches/release_37/test/CodeGen/AMDGPU/global_atomics.ll Mon Jul 20 11:46:01 2015
@@ -1,7 +1,9 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=GCN --check-prefix=SI --check-prefix=FUNC %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN --check-prefix=VI --check-prefix=FUNC %s
+
; FUNC-LABEL: {{^}}atomic_add_i32_offset:
-; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -10,8 +12,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_add_i32_ret_offset:
-; SI: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -22,6 +24,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_add_i32_addr64_offset:
; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+
define void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -32,7 +38,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -43,7 +52,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_add_i32:
-; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
@@ -51,8 +60,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_add_i32_ret:
-; SI: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
@@ -62,6 +71,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_add_i32_addr64:
; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -71,7 +83,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64:
; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -81,7 +96,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_and_i32_offset:
-; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -90,8 +105,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_and_i32_ret_offset:
-; SI: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -102,6 +117,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_and_i32_addr64_offset:
; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -112,7 +130,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -123,7 +144,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_and_i32:
-; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
@@ -131,8 +152,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_and_i32_ret:
-; SI: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
@@ -142,6 +163,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_and_i32_addr64:
; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -151,7 +175,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64:
; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -161,7 +188,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_sub_i32_offset:
-; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -170,8 +197,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_sub_i32_ret_offset:
-; SI: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -182,6 +209,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_sub_i32_addr64_offset:
; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -192,7 +222,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -203,7 +236,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_sub_i32:
-; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
@@ -211,8 +244,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_sub_i32_ret:
-; SI: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
@@ -222,6 +255,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_sub_i32_addr64:
; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -231,7 +267,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64:
; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -241,7 +280,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_max_i32_offset:
-; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -250,8 +289,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_max_i32_ret_offset:
-; SI: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -262,6 +301,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_max_i32_addr64_offset:
; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -272,7 +314,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -283,7 +328,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_max_i32:
-; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
@@ -291,8 +336,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_max_i32_ret:
-; SI: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
@@ -302,6 +347,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_max_i32_addr64:
; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -311,7 +359,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64:
; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -321,7 +372,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umax_i32_offset:
-; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -330,8 +381,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umax_i32_ret_offset:
-; SI: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -342,6 +393,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_umax_i32_addr64_offset:
; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -352,7 +406,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -363,7 +420,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umax_i32:
-; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
@@ -371,8 +428,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umax_i32_ret:
-; SI: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
@@ -382,6 +439,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_umax_i32_addr64:
; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -391,7 +451,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64:
; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -401,7 +464,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_min_i32_offset:
-; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -410,8 +473,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_min_i32_ret_offset:
-; SI: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -422,6 +485,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_min_i32_addr64_offset:
; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -432,7 +498,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -443,7 +512,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_min_i32:
-; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
@@ -451,8 +520,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_min_i32_ret:
-; SI: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
@@ -462,6 +531,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_min_i32_addr64:
; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -471,7 +543,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64:
; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -481,7 +556,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umin_i32_offset:
-; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -490,8 +565,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umin_i32_ret_offset:
-; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -502,6 +577,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_umin_i32_addr64_offset:
; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -512,7 +590,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -523,7 +604,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umin_i32:
-; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
@@ -532,7 +613,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_umin_i32_ret:
; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
@@ -542,6 +623,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_umin_i32_addr64:
; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -551,7 +635,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64:
; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -561,7 +648,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_or_i32_offset:
-; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -570,8 +657,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_or_i32_ret_offset:
-; SI: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -582,6 +669,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_or_i32_addr64_offset:
; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -592,7 +682,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -603,7 +696,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_or_i32:
-; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
@@ -611,8 +704,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_or_i32_ret:
-; SI: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
@@ -622,6 +715,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_or_i32_addr64:
; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -631,7 +727,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64:
; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -641,7 +740,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_offset:
-; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -650,8 +749,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_offset:
-; SI: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -672,7 +771,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -683,7 +785,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xchg_i32:
-; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
@@ -691,8 +793,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_ret:
-; SI: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
@@ -702,6 +804,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64:
; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -711,7 +816,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -721,7 +829,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xor_i32_offset:
-; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -730,8 +838,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xor_i32_ret_offset:
-; SI: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -742,6 +850,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_xor_i32_addr64_offset:
; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -752,7 +863,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -763,7 +877,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xor_i32:
-; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
@@ -771,8 +885,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xor_i32_ret:
-; SI: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: buffer_store_dword [[RET]]
+; GCN: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
@@ -782,6 +896,9 @@ entry:
; FUNC-LABEL: {{^}}atomic_xor_i32_addr64:
; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
@@ -791,7 +908,10 @@ entry:
; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64:
; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: buffer_store_dword [[RET]]
+; VI: s_movk_i32 flat_scratch_lo, 0x0
+; VI: s_movk_i32 flat_scratch_hi, 0x0
+; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
More information about the llvm-branch-commits
mailing list