[PATCH] D80256: [AMDGPU] Process V_MOV_B32_indirect in SET_GPR_IDX optimization

Stanislav Mekhanoshin via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue May 19 21:58:30 PDT 2020


This revision was automatically updated to reflect the committed changes.
Closed by commit rG677929e35218: [AMDGPU] Process V_MOV_B32_indirect in SET_GPR_IDX optimization (authored by rampitec).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80256/new/

https://reviews.llvm.org/D80256

Files:
  llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
  llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
  llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir


Index: llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
+++ llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
@@ -336,3 +336,23 @@
   $vgpr15 = V_MOV_B32_e32 undef $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
   S_SET_GPR_IDX_OFF
 ...
+
+---
+name:            indirect_mov
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: indirect_mov
+    ; GCN: S_SET_GPR_IDX_ON $sgpr2, 1, implicit-def $m0, implicit undef $m0
+    ; GCN: $vgpr16 = V_MOV_B32_e32 undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+    ; GCN: V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3)
+    ; GCN: V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3)
+    ; GCN: S_SET_GPR_IDX_OFF
+  S_SET_GPR_IDX_ON $sgpr2, 1, implicit-def $m0, implicit undef $m0
+  $vgpr16 = V_MOV_B32_e32 undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+  V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3)
+  S_SET_GPR_IDX_OFF
+  S_SET_GPR_IDX_ON killed $sgpr2, 1, implicit-def $m0, implicit undef $m0
+  V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3)
+  S_SET_GPR_IDX_OFF
+...
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
@@ -1066,8 +1066,6 @@
 ; GPRIDX-NEXT:    s_lshl_b32 s0, s18, 1
 ; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(DST)
 ; GPRIDX-NEXT:    v_mov_b32_e32 v2, v0
-; GPRIDX-NEXT:    s_set_gpr_idx_off
-; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(DST)
 ; GPRIDX-NEXT:    v_mov_b32_e32 v3, v1
 ; GPRIDX-NEXT:    s_set_gpr_idx_off
 ; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
@@ -1138,8 +1136,6 @@
 ; GPRIDX-NEXT:    s_lshl_b32 s0, s4, 1
 ; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(DST)
 ; GPRIDX-NEXT:    v_mov_b32_e32 v0, s2
-; GPRIDX-NEXT:    s_set_gpr_idx_off
-; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(DST)
 ; GPRIDX-NEXT:    v_mov_b32_e32 v1, s3
 ; GPRIDX-NEXT:    s_set_gpr_idx_off
 ; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
@@ -1422,8 +1418,6 @@
 ; GPRIDX-NEXT:    s_lshl_b32 s0, s2, 1
 ; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(DST)
 ; GPRIDX-NEXT:    v_mov_b32_e32 v0, v16
-; GPRIDX-NEXT:    s_set_gpr_idx_off
-; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(DST)
 ; GPRIDX-NEXT:    v_mov_b32_e32 v1, v17
 ; GPRIDX-NEXT:    s_set_gpr_idx_off
 ; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
@@ -3046,8 +3040,6 @@
 ; GPRIDX-NEXT:    v_mov_b32_e32 v2, s0
 ; GPRIDX-NEXT:    s_set_gpr_idx_on s33, gpr_idx(DST)
 ; GPRIDX-NEXT:    v_mov_b32_e32 v2, v0
-; GPRIDX-NEXT:    s_set_gpr_idx_off
-; GPRIDX-NEXT:    s_set_gpr_idx_on s33, gpr_idx(DST)
 ; GPRIDX-NEXT:    v_mov_b32_e32 v3, v1
 ; GPRIDX-NEXT:    s_set_gpr_idx_off
 ; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v2
@@ -3262,8 +3254,6 @@
 ; GPRIDX-NEXT:    v_mov_b32_e32 v2, s0
 ; GPRIDX-NEXT:    s_set_gpr_idx_on s33, gpr_idx(DST)
 ; GPRIDX-NEXT:    v_mov_b32_e32 v2, v0
-; GPRIDX-NEXT:    s_set_gpr_idx_off
-; GPRIDX-NEXT:    s_set_gpr_idx_on s33, gpr_idx(DST)
 ; GPRIDX-NEXT:    v_mov_b32_e32 v3, v1
 ; GPRIDX-NEXT:    s_set_gpr_idx_off
 ; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v2
Index: llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -177,10 +177,12 @@
                          return MO.isReg() &&
                                 TRI->isVectorRegister(MRI, MO.getReg());
                        })) {
-        // The only exception allowed here is another indirect V_MOV_B32_e32
+        // The only exception allowed here is another indirect vector move
         // with the same mode.
-        if (!IdxOn || I->getOpcode() != AMDGPU::V_MOV_B32_e32 ||
-            !I->hasRegisterImplicitUseOperand(AMDGPU::M0))
+        if (!IdxOn ||
+            !((I->getOpcode() == AMDGPU::V_MOV_B32_e32 &&
+               I->hasRegisterImplicitUseOperand(AMDGPU::M0)) ||
+              I->getOpcode() == AMDGPU::V_MOV_B32_indirect))
           return false;
       }
     }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D80256.265134.patch
Type: text/x-patch
Size: 4983 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200520/1d0fdad4/attachment.bin>


More information about the llvm-commits mailing list