[PATCH] D64207: [AMDGPU] DPP combiner: recognize identities for more opcodes
Jay Foad via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 4 07:05:52 PDT 2019
foad created this revision.
foad added reviewers: arsenm, vpykhtin.
Herald added subscribers: jfb, MaskRay, kbarton, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, jvesely, nemanjai, kzhuravl.
Herald added a project: LLVM.
This allows the DPP combiner to kick in more often. For example the
exclusive scan generated by the atomic optimizer for a divergent atomic
add used to look like this:
v_mov_b32_e32 v3, v1
v_mov_b32_e32 v5, v1
v_mov_b32_e32 v6, v1
v_mov_b32_dpp v3, v2 wave_shr:1 row_mask:0xf bank_mask:0xf
s_nop 1
v_add_u32_dpp v4, v3, v3 row_shr:1 row_mask:0xf bank_mask:0xf bound_ctrl:0
v_mov_b32_dpp v5, v3 row_shr:2 row_mask:0xf bank_mask:0xf
v_mov_b32_dpp v6, v3 row_shr:3 row_mask:0xf bank_mask:0xf
v_add3_u32 v3, v4, v5, v6
v_mov_b32_e32 v4, v1
s_nop 1
v_mov_b32_dpp v4, v3 row_shr:4 row_mask:0xf bank_mask:0xe
v_add_u32_e32 v3, v3, v4
v_mov_b32_e32 v4, v1
s_nop 1
v_mov_b32_dpp v4, v3 row_shr:8 row_mask:0xf bank_mask:0xc
v_add_u32_e32 v3, v3, v4
v_mov_b32_e32 v4, v1
s_nop 1
v_mov_b32_dpp v4, v3 row_bcast:15 row_mask:0xa bank_mask:0xf
v_add_u32_e32 v3, v3, v4
s_nop 1
v_mov_b32_dpp v1, v3 row_bcast:31 row_mask:0xc bank_mask:0xf
v_add_u32_e32 v1, v3, v1
v_add_u32_e32 v1, v2, v1
v_readlane_b32 s0, v1, 63
But now most of the dpp movs are combined into adds:
v_mov_b32_e32 v3, v1
v_mov_b32_e32 v5, v1
s_nop 0
v_mov_b32_dpp v3, v2 wave_shr:1 row_mask:0xf bank_mask:0xf
s_nop 1
v_add_u32_dpp v4, v3, v3 row_shr:1 row_mask:0xf bank_mask:0xf bound_ctrl:0
v_mov_b32_dpp v5, v3 row_shr:2 row_mask:0xf bank_mask:0xf
v_mov_b32_dpp v1, v3 row_shr:3 row_mask:0xf bank_mask:0xf
v_add3_u32 v1, v4, v5, v1
s_nop 1
v_add_u32_dpp v1, v1, v1 row_shr:4 row_mask:0xf bank_mask:0xe
s_nop 1
v_add_u32_dpp v1, v1, v1 row_shr:8 row_mask:0xf bank_mask:0xc
s_nop 1
v_add_u32_dpp v1, v1, v1 row_bcast:15 row_mask:0xa bank_mask:0xf
s_nop 1
v_add_u32_dpp v1, v1, v1 row_bcast:31 row_mask:0xc bank_mask:0xf
v_add_u32_e32 v1, v2, v1
v_readlane_b32 s0, v1, 63
Also fix some typos in comments and debug output.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D64207
Files:
llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
Index: llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -32,9 +32,9 @@
// -> $combined_old = src1,
// $combined_bound_ctrl = DPP_BOUND_OFF
//
-// Othervise cancel.
+// Otherwise cancel.
//
-// The mov_dpp instruction should recide in the same BB as all it's uses
+// The mov_dpp instruction should reside in the same BB as all its uses
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
@@ -253,33 +253,46 @@
switch (OrigMIOp) {
default: break;
case AMDGPU::V_ADD_U32_e32:
+ case AMDGPU::V_ADD_U32_e64:
case AMDGPU::V_ADD_I32_e32:
+ case AMDGPU::V_ADD_I32_e64:
case AMDGPU::V_OR_B32_e32:
+ case AMDGPU::V_OR_B32_e64:
case AMDGPU::V_SUBREV_U32_e32:
+ case AMDGPU::V_SUBREV_U32_e64:
case AMDGPU::V_SUBREV_I32_e32:
+ case AMDGPU::V_SUBREV_I32_e64:
case AMDGPU::V_MAX_U32_e32:
+ case AMDGPU::V_MAX_U32_e64:
case AMDGPU::V_XOR_B32_e32:
+ case AMDGPU::V_XOR_B32_e64:
if (OldOpnd->getImm() == 0)
return true;
break;
case AMDGPU::V_AND_B32_e32:
+ case AMDGPU::V_AND_B32_e64:
case AMDGPU::V_MIN_U32_e32:
+ case AMDGPU::V_MIN_U32_e64:
if (static_cast<uint32_t>(OldOpnd->getImm()) ==
std::numeric_limits<uint32_t>::max())
return true;
break;
case AMDGPU::V_MIN_I32_e32:
+ case AMDGPU::V_MIN_I32_e64:
if (static_cast<int32_t>(OldOpnd->getImm()) ==
std::numeric_limits<int32_t>::max())
return true;
break;
case AMDGPU::V_MAX_I32_e32:
+ case AMDGPU::V_MAX_I32_e64:
if (static_cast<int32_t>(OldOpnd->getImm()) ==
std::numeric_limits<int32_t>::min())
return true;
break;
case AMDGPU::V_MUL_I32_I24_e32:
+ case AMDGPU::V_MUL_I32_I24_e64:
case AMDGPU::V_MUL_U32_U24_e32:
+ case AMDGPU::V_MUL_U32_U24_e64:
if (OldOpnd->getImm() == 1)
return true;
break;
@@ -300,7 +313,7 @@
return nullptr;
}
if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) {
- LLVM_DEBUG(dbgs() << " failed: old immediate ins't an identity\n");
+ LLVM_DEBUG(dbgs() << " failed: old immediate isn't an identity\n");
return nullptr;
}
CombOldVGPR = getRegSubRegPair(*Src1);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D64207.208037.patch
Type: text/x-patch
Size: 2358 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190704/dbb770c3/attachment.bin>
More information about the llvm-commits
mailing list