[llvm] r343978 - AMDGPU: Future-proof {raw, struct}.buffer.atomic intrinsics
Nicolai Haehnle via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 8 09:53:48 PDT 2018
Author: nha
Date: Mon Oct 8 09:53:48 2018
New Revision: 343978
URL: http://llvm.org/viewvc/llvm-project?rev=343978&view=rev
Log:
AMDGPU: Future-proof {raw,struct}.buffer.atomic intrinsics
Summary:
The ISA is really supposed to support 64-bit atomics as well,
so the data type should be an overload.
Mesa doesn't use these atomics yet, in fact I noticed this
issue while trying to use the atomics from Mesa.
Change-Id: I77f58317a085a0d3eb933cc7e99308c48a19f83e
Reviewers: tpr
Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D52291
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td?rev=343978&r1=343977&r2=343978&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td Mon Oct 8 09:53:48 2018
@@ -879,8 +879,8 @@ def int_amdgcn_struct_buffer_store_forma
def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore;
class AMDGPURawBufferAtomic : Intrinsic <
- [llvm_i32_ty],
- [llvm_i32_ty, // vdata(VGPR)
+ [llvm_anyint_ty],
+ [LLVMMatchType<0>, // vdata(VGPR)
llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
@@ -898,9 +898,9 @@ def int_amdgcn_raw_buffer_atomic_and : A
def int_amdgcn_raw_buffer_atomic_or : AMDGPURawBufferAtomic;
def int_amdgcn_raw_buffer_atomic_xor : AMDGPURawBufferAtomic;
def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
- [llvm_i32_ty],
- [llvm_i32_ty, // src(VGPR)
- llvm_i32_ty, // cmp(VGPR)
+ [llvm_anyint_ty],
+ [LLVMMatchType<0>, // src(VGPR)
+ LLVMMatchType<0>, // cmp(VGPR)
llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
@@ -909,8 +909,8 @@ def int_amdgcn_raw_buffer_atomic_cmpswap
AMDGPURsrcIntrinsic<2, 0>;
class AMDGPUStructBufferAtomic : Intrinsic <
- [llvm_i32_ty],
- [llvm_i32_ty, // vdata(VGPR)
+ [llvm_anyint_ty],
+ [LLVMMatchType<0>, // vdata(VGPR)
llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
@@ -929,9 +929,9 @@ def int_amdgcn_struct_buffer_atomic_and
def int_amdgcn_struct_buffer_atomic_or : AMDGPUStructBufferAtomic;
def int_amdgcn_struct_buffer_atomic_xor : AMDGPUStructBufferAtomic;
def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic<
- [llvm_i32_ty],
- [llvm_i32_ty, // src(VGPR)
- llvm_i32_ty, // cmp(VGPR)
+ [llvm_anyint_ty],
+ [LLVMMatchType<0>, // src(VGPR)
+ LLVMMatchType<0>, // cmp(VGPR)
llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll?rev=343978&r1=343977&r2=343978&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll Mon Oct 8 09:53:48 2018
@@ -15,12 +15,12 @@
;CHECK: buffer_atomic_swap v0, off, s[0:3], 0{{$}}
define amdgpu_ps float @test1(<4 x i32> inreg %rsrc, i32 %data, i32 %voffset) {
main_body:
- %o1 = call i32 @llvm.amdgcn.raw.buffer.atomic.swap(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
- %o3 = call i32 @llvm.amdgcn.raw.buffer.atomic.swap(i32 %o1, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
+ %o1 = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
+ %o3 = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32 %o1, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
%off5 = add i32 %voffset, 42
- %o5 = call i32 @llvm.amdgcn.raw.buffer.atomic.swap(i32 %o3, <4 x i32> %rsrc, i32 %off5, i32 0, i32 0)
- %o6 = call i32 @llvm.amdgcn.raw.buffer.atomic.swap(i32 %o5, <4 x i32> %rsrc, i32 4, i32 8188, i32 0)
- %unused = call i32 @llvm.amdgcn.raw.buffer.atomic.swap(i32 %o6, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
+ %o5 = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32 %o3, <4 x i32> %rsrc, i32 %off5, i32 0, i32 0)
+ %o6 = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32 %o5, <4 x i32> %rsrc, i32 4, i32 8188, i32 0)
+ %unused = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32 %o6, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
%out = bitcast i32 %o6 to float
ret float %out
}
@@ -46,15 +46,15 @@ main_body:
;CHECK: buffer_atomic_xor v0, v1, s[0:3], 0 offen glc
define amdgpu_ps float @test2(<4 x i32> inreg %rsrc, i32 %data, i32 %voffset) {
main_body:
- %t1 = call i32 @llvm.amdgcn.raw.buffer.atomic.add(i32 %data, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
- %t2 = call i32 @llvm.amdgcn.raw.buffer.atomic.sub(i32 %t1, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 2)
- %t3 = call i32 @llvm.amdgcn.raw.buffer.atomic.smin(i32 %t2, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
- %t4 = call i32 @llvm.amdgcn.raw.buffer.atomic.umin(i32 %t3, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 2)
- %t5 = call i32 @llvm.amdgcn.raw.buffer.atomic.smax(i32 %t4, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
- %t6 = call i32 @llvm.amdgcn.raw.buffer.atomic.umax(i32 %t5, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 2)
- %t7 = call i32 @llvm.amdgcn.raw.buffer.atomic.and(i32 %t6, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
- %t8 = call i32 @llvm.amdgcn.raw.buffer.atomic.or(i32 %t7, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 2)
- %t9 = call i32 @llvm.amdgcn.raw.buffer.atomic.xor(i32 %t8, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
+ %t1 = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %data, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
+ %t2 = call i32 @llvm.amdgcn.raw.buffer.atomic.sub.i32(i32 %t1, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 2)
+ %t3 = call i32 @llvm.amdgcn.raw.buffer.atomic.smin.i32(i32 %t2, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
+ %t4 = call i32 @llvm.amdgcn.raw.buffer.atomic.umin.i32(i32 %t3, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 2)
+ %t5 = call i32 @llvm.amdgcn.raw.buffer.atomic.smax.i32(i32 %t4, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
+ %t6 = call i32 @llvm.amdgcn.raw.buffer.atomic.umax.i32(i32 %t5, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 2)
+ %t7 = call i32 @llvm.amdgcn.raw.buffer.atomic.and.i32(i32 %t6, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
+ %t8 = call i32 @llvm.amdgcn.raw.buffer.atomic.or.i32(i32 %t7, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 2)
+ %t9 = call i32 @llvm.amdgcn.raw.buffer.atomic.xor.i32(i32 %t8, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
%out = bitcast i32 %t9 to float
ret float %out
}
@@ -75,18 +75,18 @@ main_body:
;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[SOFS]] offset:4 glc
define amdgpu_ps float @test3(<4 x i32> inreg %rsrc, i32 %data, i32 %cmp, i32 %vindex, i32 %voffset) {
main_body:
- %o1 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
- %o3 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap(i32 %o1, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
+ %o1 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
+ %o3 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %o1, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
%ofs.5 = add i32 %voffset, 44
- %o5 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap(i32 %o3, i32 %cmp, <4 x i32> %rsrc, i32 %ofs.5, i32 0, i32 0)
- %o6 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap(i32 %o5, i32 %cmp, <4 x i32> %rsrc, i32 4, i32 8188, i32 0)
+ %o5 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %o3, i32 %cmp, <4 x i32> %rsrc, i32 %ofs.5, i32 0, i32 0)
+ %o6 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %o5, i32 %cmp, <4 x i32> %rsrc, i32 4, i32 8188, i32 0)
; Detecting the no-return variant doesn't work right now because of how the
; intrinsic is replaced by an instruction that feeds into an EXTRACT_SUBREG.
; Since there probably isn't a reasonable use-case of cmpswap that discards
; the return value, that seems okay.
;
-; %unused = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap(i32 %o6, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
+; %unused = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %o6, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
%out = bitcast i32 %o6 to float
ret float %out
}
@@ -95,21 +95,21 @@ main_body:
;CHECK: buffer_atomic_add v0,
define amdgpu_ps float @test4() {
main_body:
- %v = call i32 @llvm.amdgcn.raw.buffer.atomic.add(i32 1, <4 x i32> undef, i32 4, i32 0, i32 0)
+ %v = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 1, <4 x i32> undef, i32 4, i32 0, i32 0)
%v.float = bitcast i32 %v to float
ret float %v.float
}
-declare i32 @llvm.amdgcn.raw.buffer.atomic.swap(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.add(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.sub(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.smin(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.umin(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.smax(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.umax(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.and(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.or(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.xor(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.raw.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.raw.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.raw.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.raw.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.raw.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.raw.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.raw.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.raw.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32) #0
attributes #0 = { nounwind }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll?rev=343978&r1=343977&r2=343978&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll Mon Oct 8 09:53:48 2018
@@ -19,14 +19,14 @@
;CHECK: buffer_atomic_swap v0, {{v[0-9]+}}, s[0:3], 0 idxen{{$}}
define amdgpu_ps float @test1(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex, i32 %voffset) {
main_body:
- %o1 = call i32 @llvm.amdgcn.struct.buffer.atomic.swap(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
- %o2 = call i32 @llvm.amdgcn.struct.buffer.atomic.swap(i32 %o1, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
- %o3 = call i32 @llvm.amdgcn.struct.buffer.atomic.swap(i32 %o2, <4 x i32> %rsrc, i32 0, i32 %voffset, i32 0, i32 0)
- %o4 = call i32 @llvm.amdgcn.struct.buffer.atomic.swap(i32 %o3, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 0, i32 0)
+ %o1 = call i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
+ %o2 = call i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32 %o1, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
+ %o3 = call i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32 %o2, <4 x i32> %rsrc, i32 0, i32 %voffset, i32 0, i32 0)
+ %o4 = call i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32 %o3, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 0, i32 0)
%ofs.5 = add i32 %voffset, 42
- %o5 = call i32 @llvm.amdgcn.struct.buffer.atomic.swap(i32 %o4, <4 x i32> %rsrc, i32 0, i32 %ofs.5, i32 0, i32 0)
- %o6 = call i32 @llvm.amdgcn.struct.buffer.atomic.swap(i32 %o5, <4 x i32> %rsrc, i32 0, i32 4, i32 8188, i32 0)
- %unused = call i32 @llvm.amdgcn.struct.buffer.atomic.swap(i32 %o6, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
+ %o5 = call i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32 %o4, <4 x i32> %rsrc, i32 0, i32 %ofs.5, i32 0, i32 0)
+ %o6 = call i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32 %o5, <4 x i32> %rsrc, i32 0, i32 4, i32 8188, i32 0)
+ %unused = call i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32 %o6, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
%out = bitcast i32 %o6 to float
ret float %out
}
@@ -52,15 +52,15 @@ main_body:
;CHECK: buffer_atomic_xor v0, v1, s[0:3], 0 idxen glc
define amdgpu_ps float @test2(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) {
main_body:
- %t1 = call i32 @llvm.amdgcn.struct.buffer.atomic.add(i32 %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
- %t2 = call i32 @llvm.amdgcn.struct.buffer.atomic.sub(i32 %t1, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 2)
- %t3 = call i32 @llvm.amdgcn.struct.buffer.atomic.smin(i32 %t2, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
- %t4 = call i32 @llvm.amdgcn.struct.buffer.atomic.umin(i32 %t3, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 2)
- %t5 = call i32 @llvm.amdgcn.struct.buffer.atomic.smax(i32 %t4, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
- %t6 = call i32 @llvm.amdgcn.struct.buffer.atomic.umax(i32 %t5, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 2)
- %t7 = call i32 @llvm.amdgcn.struct.buffer.atomic.and(i32 %t6, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
- %t8 = call i32 @llvm.amdgcn.struct.buffer.atomic.or(i32 %t7, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 2)
- %t9 = call i32 @llvm.amdgcn.struct.buffer.atomic.xor(i32 %t8, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
+ %t1 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
+ %t2 = call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32 %t1, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 2)
+ %t3 = call i32 @llvm.amdgcn.struct.buffer.atomic.smin.i32(i32 %t2, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
+ %t4 = call i32 @llvm.amdgcn.struct.buffer.atomic.umin.i32(i32 %t3, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 2)
+ %t5 = call i32 @llvm.amdgcn.struct.buffer.atomic.smax.i32(i32 %t4, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
+ %t6 = call i32 @llvm.amdgcn.struct.buffer.atomic.umax.i32(i32 %t5, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 2)
+ %t7 = call i32 @llvm.amdgcn.struct.buffer.atomic.and.i32(i32 %t6, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
+ %t8 = call i32 @llvm.amdgcn.struct.buffer.atomic.or.i32(i32 %t7, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 2)
+ %t9 = call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32 %t8, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
%out = bitcast i32 %t9 to float
ret float %out
}
@@ -85,20 +85,20 @@ main_body:
;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[0:3], [[SOFS]] idxen offset:4 glc
define amdgpu_ps float @test3(<4 x i32> inreg %rsrc, i32 %data, i32 %cmp, i32 %vindex, i32 %voffset) {
main_body:
- %o1 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
- %o2 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap(i32 %o1, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
- %o3 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap(i32 %o2, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 %voffset, i32 0, i32 0)
- %o4 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap(i32 %o3, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 0, i32 0)
+ %o1 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
+ %o2 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %o1, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
+ %o3 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %o2, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 %voffset, i32 0, i32 0)
+ %o4 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %o3, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 0, i32 0)
%offs.5 = add i32 %voffset, 44
- %o5 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap(i32 %o4, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 %offs.5, i32 0, i32 0)
- %o6 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap(i32 %o5, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 4, i32 8188, i32 0)
+ %o5 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %o4, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 %offs.5, i32 0, i32 0)
+ %o6 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %o5, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 4, i32 8188, i32 0)
; Detecting the no-return variant doesn't work right now because of how the
; intrinsic is replaced by an instruction that feeds into an EXTRACT_SUBREG.
; Since there probably isn't a reasonable use-case of cmpswap that discards
; the return value, that seems okay.
;
-; %unused = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap(i32 %o6, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
+; %unused = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %o6, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
%out = bitcast i32 %o6 to float
ret float %out
}
@@ -107,21 +107,21 @@ main_body:
;CHECK: buffer_atomic_add v0,
define amdgpu_ps float @test4() {
main_body:
- %v = call i32 @llvm.amdgcn.struct.buffer.atomic.add(i32 1, <4 x i32> undef, i32 0, i32 4, i32 0, i32 0)
+ %v = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 1, <4 x i32> undef, i32 0, i32 4, i32 0, i32 0)
%v.float = bitcast i32 %v to float
ret float %v.float
}
-declare i32 @llvm.amdgcn.struct.buffer.atomic.swap(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.add(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.sub(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.smin(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.umin(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.smax(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.umax(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.and(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.or(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.xor(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.struct.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.struct.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.struct.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.struct.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.struct.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.struct.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32, i32) #0
attributes #0 = { nounwind }
More information about the llvm-commits
mailing list