[PATCH] D85187: [AMDGPU] add buffer_atomic_swap for float
Ruiling, Song via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 5 18:46:17 PDT 2020
This revision was automatically updated to reflect the committed changes.
Closed by commit rG5ddc8b49babb: [AMDGPU] add buffer_atomic_swap for float (authored by ruiling).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D85187/new/
https://reviews.llvm.org/D85187
Files:
llvm/lib/Target/AMDGPU/BUFInstructions.td
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll
@@ -17,6 +17,7 @@
;CHECK: buffer_atomic_swap v0, {{v[0-9]+}}, s[0:3], [[SOFS]] idxen offset:4 glc
;CHECK: s_waitcnt vmcnt(0)
;CHECK: buffer_atomic_swap v0, {{v[0-9]+}}, s[0:3], 0 idxen{{$}}
+;CHECK: buffer_atomic_swap v0, {{v[0-9]+}}, s[0:3], 0 idxen glc
define amdgpu_ps float @test1(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex, i32 %voffset) {
main_body:
%o1 = call i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
@@ -27,7 +28,8 @@
%o5 = call i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32 %o4, <4 x i32> %rsrc, i32 0, i32 %ofs.5, i32 0, i32 0)
%o6 = call i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32 %o5, <4 x i32> %rsrc, i32 0, i32 4, i32 8188, i32 0)
%unused = call i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32 %o6, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
- %out = bitcast i32 %o6 to float
+ %o7 = bitcast i32 %o6 to float
+ %out = call float @llvm.amdgcn.struct.buffer.atomic.swap.f32(float %o7, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
ret float %out
}
@@ -119,6 +121,7 @@
}
declare i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
+declare float @llvm.amdgcn.struct.buffer.atomic.swap.f32(float, <4 x i32>, i32, i32, i32, i32) #0
declare i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
declare i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
declare i32 @llvm.amdgcn.struct.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll
@@ -13,6 +13,7 @@
;CHECK: buffer_atomic_swap v0, off, s[0:3], [[SOFS]] offset:4 glc
;CHECK: s_waitcnt vmcnt(0)
;CHECK: buffer_atomic_swap v0, off, s[0:3], 0{{$}}
+;CHECK: buffer_atomic_swap v0, off, s[0:3], 0 glc
define amdgpu_ps float @test1(<4 x i32> inreg %rsrc, i32 %data, i32 %voffset) {
main_body:
%o1 = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
@@ -21,7 +22,8 @@
%o5 = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32 %o3, <4 x i32> %rsrc, i32 %off5, i32 0, i32 0)
%o6 = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32 %o5, <4 x i32> %rsrc, i32 4, i32 8188, i32 0)
%unused = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32 %o6, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
- %out = bitcast i32 %o6 to float
+ %o7 = bitcast i32 %o6 to float
+ %out = call float @llvm.amdgcn.raw.buffer.atomic.swap.f32(float %o7, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
ret float %out
}
@@ -107,6 +109,7 @@
}
declare i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32) #0
+declare float @llvm.amdgcn.raw.buffer.atomic.swap.f32(float, <4 x i32>, i32, i32, i32) #0
declare i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32) #0
declare i32 @llvm.amdgcn.raw.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i32) #0
declare i32 @llvm.amdgcn.raw.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i32) #0
Index: llvm/lib/Target/AMDGPU/BUFInstructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1368,6 +1368,7 @@
}
defm : BufferAtomicPatterns<SIbuffer_atomic_swap, i32, "BUFFER_ATOMIC_SWAP">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_swap, f32, "BUFFER_ATOMIC_SWAP">;
defm : BufferAtomicPatterns<SIbuffer_atomic_add, i32, "BUFFER_ATOMIC_ADD">;
defm : BufferAtomicPatterns<SIbuffer_atomic_sub, i32, "BUFFER_ATOMIC_SUB">;
defm : BufferAtomicPatterns<SIbuffer_atomic_smin, i32, "BUFFER_ATOMIC_SMIN">;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D85187.283468.patch
Type: text/x-patch
Size: 4144 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200806/76bf2108/attachment.bin>
More information about the llvm-commits
mailing list