[llvm] 677929e - [AMDGPU] Process V_MOV_B32_indirect in SET_GPR_IDX optimization
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Tue May 19 21:49:42 PDT 2020
Author: Stanislav Mekhanoshin
Date: 2020-05-19T21:37:14-07:00
New Revision: 677929e35218f74bacdf0244e3197321abf2a4b3
URL: https://github.com/llvm/llvm-project/commit/677929e35218f74bacdf0244e3197321abf2a4b3
DIFF: https://github.com/llvm/llvm-project/commit/677929e35218f74bacdf0244e3197321abf2a4b3.diff
LOG: [AMDGPU] Process V_MOV_B32_indirect in SET_GPR_IDX optimization
Differential Revision: https://reviews.llvm.org/D80256
Added:
Modified:
llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index dc8f27cda5fd..62df8ef488c6 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -177,10 +177,12 @@ bool SIPreEmitPeephole::optimizeSetGPR(MachineInstr &First,
return MO.isReg() &&
TRI->isVectorRegister(MRI, MO.getReg());
})) {
- // The only exception allowed here is another indirect V_MOV_B32_e32
+ // The only exception allowed here is another indirect vector move
// with the same mode.
- if (!IdxOn || I->getOpcode() != AMDGPU::V_MOV_B32_e32 ||
- !I->hasRegisterImplicitUseOperand(AMDGPU::M0))
+ if (!IdxOn ||
+ !((I->getOpcode() == AMDGPU::V_MOV_B32_e32 &&
+ I->hasRegisterImplicitUseOperand(AMDGPU::M0)) ||
+ I->getOpcode() == AMDGPU::V_MOV_B32_indirect))
return false;
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
index f1196bd34b60..5fb0ef97932f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
@@ -1066,8 +1066,6 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_v_s(<8 x double> inreg %vec, do
; GPRIDX-NEXT: s_lshl_b32 s0, s18, 1
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v2, v0
-; GPRIDX-NEXT: s_set_gpr_idx_off
-; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v3, v1
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
@@ -1138,8 +1136,6 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_s_s(<8 x double> %vec, double i
; GPRIDX-NEXT: s_lshl_b32 s0, s4, 1
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v0, s2
-; GPRIDX-NEXT: s_set_gpr_idx_off
-; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v1, s3
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
@@ -1422,8 +1418,6 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_s(<8 x double> %vec, double %
; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v0, v16
-; GPRIDX-NEXT: s_set_gpr_idx_off
-; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v1, v17
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
@@ -3046,8 +3040,6 @@ define amdgpu_ps <16 x i64> @dyn_insertelement_v16i64_s_v_s(<16 x i64> inreg %ve
; GPRIDX-NEXT: v_mov_b32_e32 v2, s0
; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v2, v0
-; GPRIDX-NEXT: s_set_gpr_idx_off
-; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v3, v1
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2
@@ -3262,8 +3254,6 @@ define amdgpu_ps <16 x double> @dyn_insertelement_v16f64_s_v_s(<16 x double> inr
; GPRIDX-NEXT: v_mov_b32_e32 v2, s0
; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v2, v0
-; GPRIDX-NEXT: s_set_gpr_idx_off
-; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v3, v1
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2
diff --git a/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir b/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
index 4dfa6eee34f1..3f2344cad20a 100644
--- a/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
+++ b/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
@@ -336,3 +336,23 @@ body: |
$vgpr15 = V_MOV_B32_e32 undef $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
S_SET_GPR_IDX_OFF
...
+
+---
+name: indirect_mov
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: indirect_mov
+ ; GCN: S_SET_GPR_IDX_ON $sgpr2, 1, implicit-def $m0, implicit undef $m0
+ ; GCN: $vgpr16 = V_MOV_B32_e32 undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+ ; GCN: V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3)
+ ; GCN: V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3)
+ ; GCN: S_SET_GPR_IDX_OFF
+ S_SET_GPR_IDX_ON $sgpr2, 1, implicit-def $m0, implicit undef $m0
+ $vgpr16 = V_MOV_B32_e32 undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+ V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3)
+ S_SET_GPR_IDX_OFF
+ S_SET_GPR_IDX_ON killed $sgpr2, 1, implicit-def $m0, implicit undef $m0
+ V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3)
+ S_SET_GPR_IDX_OFF
+...
More information about the llvm-commits
mailing list