[llvm] r305201 - AMDGPU: Start selecting flat instruction offsets
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 12 09:53:51 PDT 2017
Author: arsenm
Date: Mon Jun 12 11:53:51 2017
New Revision: 305201
URL: http://llvm.org/viewvc/llvm-project?rev=305201&view=rev
Log:
AMDGPU: Start selecting flat instruction offsets
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td
llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-kernel-debug-props.ll
llvm/trunk/test/CodeGen/AMDGPU/flat-address-space.ll
llvm/trunk/test/CodeGen/AMDGPU/flat_atomics.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp?rev=305201&r1=305200&r2=305201&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Mon Jun 12 11:53:51 2017
@@ -138,8 +138,10 @@ private:
bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
SDValue &ImmOffset, SDValue &VOffset) const;
- bool SelectFlat(SDValue Addr, SDValue &VAddr,
- SDValue &Offset, SDValue &SLC) const;
+ bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
+ SDValue &Offset, SDValue &SLC) const;
+ bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
+ SDValue &Offset, SDValue &SLC) const;
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
bool &Imm) const;
@@ -1314,16 +1316,37 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFIntr
return true;
}
-bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr,
- SDValue &VAddr,
- SDValue &Offset,
- SDValue &SLC) const {
+bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
+ SDValue &VAddr,
+ SDValue &Offset,
+ SDValue &SLC) const {
+ int64_t OffsetVal = 0;
+
+ if (Subtarget->hasFlatInstOffsets() &&
+ CurDAG->isBaseWithConstantOffset(Addr)) {
+ SDValue N0 = Addr.getOperand(0);
+ SDValue N1 = Addr.getOperand(1);
+ uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getZExtValue();
+ if (isUInt<12>(COffsetVal)) {
+ Addr = N0;
+ OffsetVal = COffsetVal;
+ }
+ }
+
VAddr = Addr;
- Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i16);
+ Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
+
return true;
}
+bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
+ SDValue &VAddr,
+ SDValue &Offset,
+ SDValue &SLC) const {
+ return SelectFlatOffset(Addr, VAddr, Offset, SLC);
+}
+
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
SDValue &Offset, bool &Imm) const {
Modified: llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td?rev=305201&r1=305200&r2=305201&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td Mon Jun 12 11:53:51 2017
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
-def FLATAtomic : ComplexPattern<i64, 3, "SelectFlat">;
+def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [], -10>;
+def FLATOffset : ComplexPattern<i64, 3, "SelectFlat", [], [], -10>;
//===----------------------------------------------------------------------===//
// FLAT classes
@@ -338,31 +339,31 @@ def flat_truncstorei16 : flat_st <truncs
// Patterns for global loads with no offset.
class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
- (vt (node i64:$addr)),
- (inst $addr, 0, 0, 0)
+ (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))),
+ (inst $vaddr, $offset, 0, $slc)
>;
class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
- (vt (node i64:$addr)),
- (inst $addr, 0, 1, 0)
+ (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))),
+ (inst $vaddr, $offset, 1, $slc)
>;
class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
- (node vt:$data, i64:$addr),
- (inst $addr, $data, 0, 0, 0)
+ (node vt:$data, (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc)),
+ (inst $vaddr, $data, $offset, 0, $slc)
>;
class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
// atomic store follows atomic binop convention so the address comes
// first.
- (node i64:$addr, vt:$data),
- (inst $addr, $data, 0, 1, 0)
+ (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
+ (inst $vaddr, $data, $offset, 1, $slc)
>;
class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
ValueType data_vt = vt> : Pat <
- (vt (node i64:$addr, data_vt:$data)),
- (inst $addr, $data, 0, 0)
+ (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
+ (inst $vaddr, $data, $offset, $slc)
>;
let Predicates = [isCIVI] in {
Modified: llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-kernel-debug-props.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-kernel-debug-props.ll?rev=305201&r1=305200&r2=305201&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-kernel-debug-props.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-kernel-debug-props.ll Mon Jun 12 11:53:51 2017
@@ -12,7 +12,9 @@ declare void @llvm.dbg.declare(metadata,
; CHECK: DebugProps:
; CHECK: DebuggerABIVersion: [ 1, 0 ]
; CHECK: ReservedNumVGPRs: 4
-; CHECK: ReservedFirstVGPR: 11
+; GFX700: ReservedFirstVGPR: 11
+; GFX800: ReservedFirstVGPR: 11
+; GFX9: ReservedFirstVGPR: 14
; CHECK: PrivateSegmentBufferSGPR: 0
; CHECK: WavefrontPrivateSegmentOffsetSGPR: 11
define amdgpu_kernel void @test(i32 addrspace(1)* %A) #0 !dbg !7 !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !15 {
Modified: llvm/trunk/test/CodeGen/AMDGPU/flat-address-space.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/flat-address-space.ll?rev=305201&r1=305200&r2=305201&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/flat-address-space.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/flat-address-space.ll Mon Jun 12 11:53:51 2017
@@ -1,6 +1,7 @@
-; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck %s
-; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefixes=CHECK,CIVI %s
+; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,CIVI %s
; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA %s
+; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA,GFX9 %s
; Disable optimizations in case there are optimizations added that
; specialize away generic pointer accesses.
@@ -172,6 +173,55 @@ define amdgpu_kernel void @flat_scratch_
ret void
}
+; CHECK-LABEL: {{^}}store_flat_i8_max_offset:
+; CIVI: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
+; GFX9: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:4095{{$}}
+define amdgpu_kernel void @store_flat_i8_max_offset(i8 addrspace(4)* %fptr, i8 %x) #0 {
+ %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4095
+ store volatile i8 %x, i8 addrspace(4)* %fptr.offset
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_flat_i8_max_offset_p1:
+; CHECK: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
+define amdgpu_kernel void @store_flat_i8_max_offset_p1(i8 addrspace(4)* %fptr, i8 %x) #0 {
+ %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4096
+ store volatile i8 %x, i8 addrspace(4)* %fptr.offset
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_flat_i8_neg_offset:
+; CHECK: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
+define amdgpu_kernel void @store_flat_i8_neg_offset(i8 addrspace(4)* %fptr, i8 %x) #0 {
+ %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 -2
+ store volatile i8 %x, i8 addrspace(4)* %fptr.offset
+ ret void
+}
+
+; CHECK-LABEL: {{^}}load_flat_i8_max_offset:
+; CIVI: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
+; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
+define amdgpu_kernel void @load_flat_i8_max_offset(i8 addrspace(4)* %fptr) #0 {
+ %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4095
+ %val = load volatile i8, i8 addrspace(4)* %fptr.offset
+ ret void
+}
+
+; CHECK-LABEL: {{^}}load_flat_i8_max_offset_p1:
+; CHECK: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
+define amdgpu_kernel void @load_flat_i8_max_offset_p1(i8 addrspace(4)* %fptr) #0 {
+ %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4096
+ %val = load volatile i8, i8 addrspace(4)* %fptr.offset
+ ret void
+}
+
+; CHECK-LABEL: {{^}}load_flat_i8_neg_offset:
+; CHECK: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
+define amdgpu_kernel void @load_flat_i8_neg_offset(i8 addrspace(4)* %fptr) #0 {
+ %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 -2
+ %val = load volatile i8, i8 addrspace(4)* %fptr.offset
+ ret void
+}
+
attributes #0 = { nounwind }
attributes #1 = { nounwind convergent }
-attributes #3 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/flat_atomics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/flat_atomics.ll?rev=305201&r1=305200&r2=305201&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/flat_atomics.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/flat_atomics.ll Mon Jun 12 11:53:51 2017
@@ -1,8 +1,10 @@
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; GCN-LABEL: {{^}}atomic_add_i32_offset:
-; GCN: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_add_i32_offset(i32 addrspace(4)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@@ -10,8 +12,28 @@ entry:
ret void
}
+; GCN-LABEL: {{^}}atomic_add_i32_max_offset:
+; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:4092{{$}}
+define amdgpu_kernel void @atomic_add_i32_max_offset(i32 addrspace(4)* %out, i32 %in) {
+entry:
+ %gep = getelementptr i32, i32 addrspace(4)* %out, i32 1023
+ %val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_add_i32_max_offset_p1:
+; GCN: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+define amdgpu_kernel void @atomic_add_i32_max_offset_p1(i32 addrspace(4)* %out, i32 %in) {
+entry:
+ %gep = getelementptr i32, i32 addrspace(4)* %out, i32 1024
+ %val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst
+ ret void
+}
+
; GCN-LABEL: {{^}}atomic_add_i32_ret_offset:
-; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_add_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
entry:
@@ -22,7 +44,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset:
-; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; CIVI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; GFX9: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
@@ -32,7 +55,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
-; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
entry:
@@ -82,7 +106,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_and_i32_offset:
-; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_and_i32_offset(i32 addrspace(4)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@@ -91,7 +116,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_and_i32_ret_offset:
-; GCN: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_and_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
entry:
@@ -102,7 +128,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset:
-; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
@@ -112,7 +139,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
-; GCN: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
entry:
@@ -162,7 +190,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_sub_i32_offset:
-; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_sub_i32_offset(i32 addrspace(4)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@@ -171,7 +200,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_sub_i32_ret_offset:
-; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
entry:
@@ -182,7 +212,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset:
-; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
@@ -192,7 +223,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
-; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
entry:
@@ -242,7 +274,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_max_i32_offset:
-; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_max_i32_offset(i32 addrspace(4)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@@ -251,7 +284,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_max_i32_ret_offset:
-; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_max_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
entry:
@@ -262,7 +296,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset:
-; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
@@ -272,7 +307,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
-; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
entry:
@@ -322,7 +358,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_umax_i32_offset:
-; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_umax_i32_offset(i32 addrspace(4)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@@ -331,7 +368,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_umax_i32_ret_offset:
-; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
entry:
@@ -342,7 +380,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset:
-; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
@@ -352,7 +391,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
-; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
entry:
@@ -402,7 +442,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_min_i32_offset:
-; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_min_i32_offset(i32 addrspace(4)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@@ -411,7 +452,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_min_i32_ret_offset:
-; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_min_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
entry:
@@ -422,7 +464,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset:
-; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
@@ -432,7 +475,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
-; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
entry:
@@ -482,7 +526,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_umin_i32_offset:
-; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_umin_i32_offset(i32 addrspace(4)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@@ -491,7 +536,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_umin_i32_ret_offset:
-; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
entry:
@@ -502,7 +548,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset:
-; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
@@ -512,7 +559,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
-; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
entry:
@@ -562,7 +610,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_or_i32_offset:
-; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}{{$}}
+; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_or_i32_offset(i32 addrspace(4)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@@ -571,7 +620,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_or_i32_ret_offset:
-; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_or_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
entry:
@@ -582,7 +632,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset:
-; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}{{$}}
+; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
@@ -592,7 +643,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
-; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
entry:
@@ -642,7 +694,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_xchg_i32_offset:
-; GCN: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}{{$}}
+; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(4)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@@ -651,7 +704,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:
-; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
entry:
@@ -662,7 +716,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
-; GCN: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}{{$}}
+; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
@@ -672,7 +727,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
-; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
entry:
@@ -724,7 +780,8 @@ entry:
; CMP_SWAP
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset:
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32 addrspace(4)* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@@ -733,7 +790,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset:
-; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i32 %old) {
entry:
@@ -745,7 +803,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index, i32 %old) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
@@ -755,7 +814,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset:
-; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
+; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
+; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index, i32 %old) {
entry:
@@ -808,7 +868,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_xor_i32_offset:
-; GCN: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; CIVI: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GFX9: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_xor_i32_offset(i32 addrspace(4)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@@ -817,7 +878,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_xor_i32_ret_offset:
-; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
entry:
@@ -828,7 +890,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset:
-; GCN: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; CIVI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
+; GFX9: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
@@ -838,7 +901,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
-; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) {
entry:
@@ -888,7 +952,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_load_i32_offset:
-; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_load_i32_offset(i32 addrspace(4)* %in, i32 addrspace(4)* %out) {
entry:
@@ -909,7 +974,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset:
-; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
+; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
+; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32 addrspace(4)* %in, i32 addrspace(4)* %out, i64 %index) {
entry:
@@ -932,7 +998,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_store_i32_offset:
-; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
+; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
+; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16 glc{{$}}
define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32 addrspace(4)* %out) {
entry:
%gep = getelementptr i32, i32 addrspace(4)* %out, i32 4
@@ -949,7 +1016,8 @@ entry:
}
; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset:
-; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
+; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
+; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16 glc{{$}}
define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(4)* %out, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index
More information about the llvm-commits
mailing list