[PATCH] D64411: [AMDGPU] Simplify the exclusive scan used for optimized atomics
Jay Foad via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 19 01:40:22 PDT 2019
This revision was automatically updated to reflect the committed changes.
Closed by commit rL366543: [AMDGPU] Simplify the exclusive scan used for optimized atomics (authored by foad, committed by ).
Changed prior to commit:
https://reviews.llvm.org/D64411?vs=208661&id=210771#toc
Repository:
rL LLVM
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D64411/new/
https://reviews.llvm.org/D64411
Files:
llvm/trunk/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
llvm/trunk/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll
Index: llvm/trunk/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll
@@ -47,7 +47,6 @@
; GFX8MORE: v_mov_b32_dpp v[[wave_shr1:[0-9]+]], v{{[0-9]+}} wave_shr:1 row_mask:0xf bank_mask:0xf
; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[wave_shr1]] row_shr:1 row_mask:0xf bank_mask:0xf
; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[wave_shr1]] row_shr:2 row_mask:0xf bank_mask:0xf
-; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[wave_shr1]] row_shr:3 row_mask:0xf bank_mask:0xf
; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_shr:4 row_mask:0xf bank_mask:0xe
; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_shr:8 row_mask:0xf bank_mask:0xc
; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_bcast:15 row_mask:0xa bank_mask:0xf
@@ -115,7 +114,6 @@
; GFX8MORE: v_mov_b32_dpp v[[wave_shr1:[0-9]+]], v{{[0-9]+}} wave_shr:1 row_mask:0xf bank_mask:0xf
; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[wave_shr1]] row_shr:1 row_mask:0xf bank_mask:0xf
; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[wave_shr1]] row_shr:2 row_mask:0xf bank_mask:0xf
-; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[wave_shr1]] row_shr:3 row_mask:0xf bank_mask:0xf
; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_shr:4 row_mask:0xf bank_mask:0xe
; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_shr:8 row_mask:0xf bank_mask:0xc
; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_bcast:15 row_mask:0xa bank_mask:0xf
Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -376,26 +376,24 @@
CallInst *const SetInactive =
B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, Ty, {V, Identity});
- CallInst *const FirstDPP =
+ ExclScan =
B.CreateIntrinsic(Intrinsic::amdgcn_update_dpp, Ty,
{Identity, SetInactive, B.getInt32(DPP_WF_SR1),
B.getInt32(0xf), B.getInt32(0xf), B.getFalse()});
- ExclScan = FirstDPP;
- const unsigned Iters = 7;
- const unsigned DPPCtrl[Iters] = {
- DPP_ROW_SR1, DPP_ROW_SR2, DPP_ROW_SR3, DPP_ROW_SR4,
- DPP_ROW_SR8, DPP_ROW_BCAST15, DPP_ROW_BCAST31};
- const unsigned RowMask[Iters] = {0xf, 0xf, 0xf, 0xf, 0xf, 0xa, 0xc};
- const unsigned BankMask[Iters] = {0xf, 0xf, 0xf, 0xe, 0xc, 0xf, 0xf};
+ const unsigned Iters = 6;
+ const unsigned DPPCtrl[Iters] = {DPP_ROW_SR1, DPP_ROW_SR2,
+ DPP_ROW_SR4, DPP_ROW_SR8,
+ DPP_ROW_BCAST15, DPP_ROW_BCAST31};
+ const unsigned RowMask[Iters] = {0xf, 0xf, 0xf, 0xf, 0xa, 0xc};
+ const unsigned BankMask[Iters] = {0xf, 0xf, 0xe, 0xc, 0xf, 0xf};
// This loop performs an exclusive scan across the wavefront, with all lanes
// active (by using the WWM intrinsic).
for (unsigned Idx = 0; Idx < Iters; Idx++) {
- Value *const UpdateValue = Idx < 3 ? FirstDPP : ExclScan;
CallInst *const DPP = B.CreateIntrinsic(
Intrinsic::amdgcn_update_dpp, Ty,
- {Identity, UpdateValue, B.getInt32(DPPCtrl[Idx]),
+ {Identity, ExclScan, B.getInt32(DPPCtrl[Idx]),
B.getInt32(RowMask[Idx]), B.getInt32(BankMask[Idx]), B.getFalse()});
ExclScan = buildNonAtomicBinOp(B, Op, ExclScan, DPP);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D64411.210771.patch
Type: text/x-patch
Size: 3598 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190719/6cc81738/attachment.bin>
More information about the llvm-commits
mailing list