[llvm] 677929e - [AMDGPU] Process V_MOV_B32_indirect in SET_GPR_IDX optimization

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Tue May 19 21:49:42 PDT 2020


Author: Stanislav Mekhanoshin
Date: 2020-05-19T21:37:14-07:00
New Revision: 677929e35218f74bacdf0244e3197321abf2a4b3

URL: https://github.com/llvm/llvm-project/commit/677929e35218f74bacdf0244e3197321abf2a4b3
DIFF: https://github.com/llvm/llvm-project/commit/677929e35218f74bacdf0244e3197321abf2a4b3.diff

LOG: [AMDGPU] Process V_MOV_B32_indirect in SET_GPR_IDX optimization

Differential Revision: https://reviews.llvm.org/D80256

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
    llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index dc8f27cda5fd..62df8ef488c6 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -177,10 +177,12 @@ bool SIPreEmitPeephole::optimizeSetGPR(MachineInstr &First,
                          return MO.isReg() &&
                                 TRI->isVectorRegister(MRI, MO.getReg());
                        })) {
-        // The only exception allowed here is another indirect V_MOV_B32_e32
+        // The only exception allowed here is another indirect vector move
         // with the same mode.
-        if (!IdxOn || I->getOpcode() != AMDGPU::V_MOV_B32_e32 ||
-            !I->hasRegisterImplicitUseOperand(AMDGPU::M0))
+        if (!IdxOn ||
+            !((I->getOpcode() == AMDGPU::V_MOV_B32_e32 &&
+               I->hasRegisterImplicitUseOperand(AMDGPU::M0)) ||
+              I->getOpcode() == AMDGPU::V_MOV_B32_indirect))
           return false;
       }
     }

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
index f1196bd34b60..5fb0ef97932f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
@@ -1066,8 +1066,6 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_v_s(<8 x double> inreg %vec, do
 ; GPRIDX-NEXT:    s_lshl_b32 s0, s18, 1
 ; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(DST)
 ; GPRIDX-NEXT:    v_mov_b32_e32 v2, v0
-; GPRIDX-NEXT:    s_set_gpr_idx_off
-; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(DST)
 ; GPRIDX-NEXT:    v_mov_b32_e32 v3, v1
 ; GPRIDX-NEXT:    s_set_gpr_idx_off
 ; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
@@ -1138,8 +1136,6 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_s_s(<8 x double> %vec, double i
 ; GPRIDX-NEXT:    s_lshl_b32 s0, s4, 1
 ; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(DST)
 ; GPRIDX-NEXT:    v_mov_b32_e32 v0, s2
-; GPRIDX-NEXT:    s_set_gpr_idx_off
-; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(DST)
 ; GPRIDX-NEXT:    v_mov_b32_e32 v1, s3
 ; GPRIDX-NEXT:    s_set_gpr_idx_off
 ; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
@@ -1422,8 +1418,6 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_s(<8 x double> %vec, double %
 ; GPRIDX-NEXT:    s_lshl_b32 s0, s2, 1
 ; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(DST)
 ; GPRIDX-NEXT:    v_mov_b32_e32 v0, v16
-; GPRIDX-NEXT:    s_set_gpr_idx_off
-; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(DST)
 ; GPRIDX-NEXT:    v_mov_b32_e32 v1, v17
 ; GPRIDX-NEXT:    s_set_gpr_idx_off
 ; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
@@ -3046,8 +3040,6 @@ define amdgpu_ps <16 x i64> @dyn_insertelement_v16i64_s_v_s(<16 x i64> inreg %ve
 ; GPRIDX-NEXT:    v_mov_b32_e32 v2, s0
 ; GPRIDX-NEXT:    s_set_gpr_idx_on s33, gpr_idx(DST)
 ; GPRIDX-NEXT:    v_mov_b32_e32 v2, v0
-; GPRIDX-NEXT:    s_set_gpr_idx_off
-; GPRIDX-NEXT:    s_set_gpr_idx_on s33, gpr_idx(DST)
 ; GPRIDX-NEXT:    v_mov_b32_e32 v3, v1
 ; GPRIDX-NEXT:    s_set_gpr_idx_off
 ; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v2
@@ -3262,8 +3254,6 @@ define amdgpu_ps <16 x double> @dyn_insertelement_v16f64_s_v_s(<16 x double> inr
 ; GPRIDX-NEXT:    v_mov_b32_e32 v2, s0
 ; GPRIDX-NEXT:    s_set_gpr_idx_on s33, gpr_idx(DST)
 ; GPRIDX-NEXT:    v_mov_b32_e32 v2, v0
-; GPRIDX-NEXT:    s_set_gpr_idx_off
-; GPRIDX-NEXT:    s_set_gpr_idx_on s33, gpr_idx(DST)
 ; GPRIDX-NEXT:    v_mov_b32_e32 v3, v1
 ; GPRIDX-NEXT:    s_set_gpr_idx_off
 ; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v2

diff  --git a/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir b/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
index 4dfa6eee34f1..3f2344cad20a 100644
--- a/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
+++ b/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
@@ -336,3 +336,23 @@ body:             |
   $vgpr15 = V_MOV_B32_e32 undef $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
   S_SET_GPR_IDX_OFF
 ...
+
+---
+name:            indirect_mov
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: indirect_mov
+    ; GCN: S_SET_GPR_IDX_ON $sgpr2, 1, implicit-def $m0, implicit undef $m0
+    ; GCN: $vgpr16 = V_MOV_B32_e32 undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+    ; GCN: V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3)
+    ; GCN: V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3)
+    ; GCN: S_SET_GPR_IDX_OFF
+  S_SET_GPR_IDX_ON $sgpr2, 1, implicit-def $m0, implicit undef $m0
+  $vgpr16 = V_MOV_B32_e32 undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+  V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3)
+  S_SET_GPR_IDX_OFF
+  S_SET_GPR_IDX_ON killed $sgpr2, 1, implicit-def $m0, implicit undef $m0
+  V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3)
+  S_SET_GPR_IDX_OFF
+...


        


More information about the llvm-commits mailing list