[PATCH] D80256: [AMDGPU] Process V_MOV_B32_indirect in SET_GPR_IDX optimization
Stanislav Mekhanoshin via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue May 19 16:00:22 PDT 2020
rampitec created this revision.
rampitec added a reviewer: arsenm.
Herald added subscribers: kerbowa, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, jvesely, kzhuravl.
Herald added a project: LLVM.
https://reviews.llvm.org/D80256
Files:
llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
Index: llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
+++ llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
@@ -336,3 +336,23 @@
$vgpr15 = V_MOV_B32_e32 undef $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
S_SET_GPR_IDX_OFF
...
+
+---
+name: indirect_mov
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: indirect_mov
+ ; GCN: S_SET_GPR_IDX_ON $sgpr2, 1, implicit-def $m0, implicit undef $m0
+ ; GCN: $vgpr16 = V_MOV_B32_e32 undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+ ; GCN: V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3)
+ ; GCN: V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3)
+ ; GCN: S_SET_GPR_IDX_OFF
+ S_SET_GPR_IDX_ON $sgpr2, 1, implicit-def $m0, implicit undef $m0
+ $vgpr16 = V_MOV_B32_e32 undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+ V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3)
+ S_SET_GPR_IDX_OFF
+ S_SET_GPR_IDX_ON killed $sgpr2, 1, implicit-def $m0, implicit undef $m0
+ V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3)
+ S_SET_GPR_IDX_OFF
+...
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
@@ -1066,8 +1066,6 @@
; GPRIDX-NEXT: s_lshl_b32 s0, s18, 1
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v2, v0
-; GPRIDX-NEXT: s_set_gpr_idx_off
-; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v3, v1
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
@@ -1138,8 +1136,6 @@
; GPRIDX-NEXT: s_lshl_b32 s0, s4, 1
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v0, s2
-; GPRIDX-NEXT: s_set_gpr_idx_off
-; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v1, s3
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
@@ -1422,8 +1418,6 @@
; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v0, v16
-; GPRIDX-NEXT: s_set_gpr_idx_off
-; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v1, v17
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
@@ -3046,8 +3040,6 @@
; GPRIDX-NEXT: v_mov_b32_e32 v2, s0
; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v2, v0
-; GPRIDX-NEXT: s_set_gpr_idx_off
-; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v3, v1
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2
@@ -3262,8 +3254,6 @@
; GPRIDX-NEXT: v_mov_b32_e32 v2, s0
; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v2, v0
-; GPRIDX-NEXT: s_set_gpr_idx_off
-; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v3, v1
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2
Index: llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -177,10 +177,12 @@
return MO.isReg() &&
TRI->isVectorRegister(MRI, MO.getReg());
})) {
- // The only exception allowed here is another indirect V_MOV_B32_e32
+ // The only exception allowed here is another indirect vector move
// with the same mode.
- if (!IdxOn || I->getOpcode() != AMDGPU::V_MOV_B32_e32 ||
- !I->hasRegisterImplicitUseOperand(AMDGPU::M0))
+ if (!IdxOn ||
+ !((I->getOpcode() == AMDGPU::V_MOV_B32_e32 &&
+ I->hasRegisterImplicitUseOperand(AMDGPU::M0)) ||
+ I->getOpcode() == AMDGPU::V_MOV_B32_indirect))
return false;
}
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D80256.265072.patch
Type: text/x-patch
Size: 4983 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200519/47d71c1b/attachment.bin>
More information about the llvm-commits
mailing list