[llvm] b76c090 - [AMDGPU] Allow index optimisation in SIPreEmitPeephole for bundles

Carl Ritson via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 19 18:26:51 PDT 2021


Author: Carl Ritson
Date: 2021-03-20T10:26:23+09:00
New Revision: b76c09023d9a341353d7bcae1782154d80121838

URL: https://github.com/llvm/llvm-project/commit/b76c09023d9a341353d7bcae1782154d80121838
DIFF: https://github.com/llvm/llvm-project/commit/b76c09023d9a341353d7bcae1782154d80121838.diff

LOG: [AMDGPU] Allow index optimisation in SIPreEmitPeephole for bundles

Add code so duplication index register changes can be removed from
inside bundles.

Reviewed By: rampitec, foad

Differential Revision: https://reviews.llvm.org/D98940

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
    llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 9ca43512cd91..5f10fefa469f 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -219,8 +219,11 @@ bool SIPreEmitPeephole::optimizeSetGPR(MachineInstr &First,
     return false;
 
   // Scan back to find an identical S_SET_GPR_IDX_ON
-  for (MachineBasicBlock::iterator I = std::next(First.getIterator()),
-       E = MI.getIterator(); I != E; ++I) {
+  for (MachineBasicBlock::instr_iterator I = std::next(First.getIterator()),
+                                         E = MI.getIterator();
+       I != E; ++I) {
+    if (I->isBundle())
+      continue;
     switch (I->getOpcode()) {
     case AMDGPU::S_SET_GPR_IDX_MODE:
       return false;
@@ -249,9 +252,9 @@ bool SIPreEmitPeephole::optimizeSetGPR(MachineInstr &First,
     }
   }
 
-  MI.eraseFromParent();
+  MI.eraseFromBundle();
   for (MachineInstr *RI : ToRemove)
-    RI->eraseFromParent();
+    RI->eraseFromBundle();
   return true;
 }
 
@@ -315,7 +318,10 @@ bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
     // Scan the block for two S_SET_GPR_IDX_ON instructions to see if a
     // second is not needed. Do expensive checks in the optimizeSetGPR()
     // and limit the distance to 20 instructions for compile time purposes.
-    for (MachineBasicBlock::iterator MBBI = MBB.begin(); MBBI != MBBE; ) {
+    // Note: this needs to work on bundles as S_SET_GPR_IDX* instructions
+    // may be bundled with the instructions they modify.
+    for (MachineBasicBlock::instr_iterator MBBI = MBB.instr_begin();
+         MBBI != MBBE;) {
       MachineInstr &MI = *MBBI;
       ++MBBI;
 

diff  --git a/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir b/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
index c423f757c38d..95f62e5e7cd3 100644
--- a/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
+++ b/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
@@ -356,3 +356,113 @@ body:             |
   V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3)
   S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
 ...
+
+---
+name:            simple_bundle
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: simple_bundle
+    ; GCN: BUNDLE implicit-def $m0, implicit-def $m0_lo16, implicit-def $m0_hi16, implicit-def $mode, implicit-def $vgpr16, implicit-def $vgpr16_lo16, implicit-def $vgpr16_hi16, implicit $sgpr2, implicit $m0, implicit $mode, implicit undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 {
+    ; GCN:   S_SET_GPR_IDX_ON $sgpr2, 1, implicit-def $mode, implicit-def $m0, implicit $mode, implicit undef $m0
+    ; GCN:   $vgpr16 = V_MOV_B32_e32 undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+    ; GCN: }
+    ; GCN: BUNDLE implicit-def $m0, implicit-def $m0_lo16, implicit-def $m0_hi16, implicit-def $mode, implicit-def $vgpr15, implicit-def $vgpr15_lo16, implicit-def $vgpr15_hi16, implicit $sgpr2, implicit $m0, implicit $mode, implicit undef $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 {
+    ; GCN:   $vgpr15 = V_MOV_B32_e32 undef $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+    ; GCN:   S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
+    ; GCN: }
+  BUNDLE implicit-def $m0, implicit-def $m0_lo16, implicit-def $m0_hi16, implicit-def $mode, implicit-def $vgpr16, implicit-def $vgpr16_lo16, implicit-def $vgpr16_hi16, implicit $sgpr2, implicit $m0, implicit $mode, implicit undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 {
+    S_SET_GPR_IDX_ON $sgpr2, 1, implicit-def $mode, implicit-def $m0, implicit $mode, implicit undef $m0
+    $vgpr16 = V_MOV_B32_e32 undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+    S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
+  }
+  BUNDLE implicit-def $m0, implicit-def $m0_lo16, implicit-def $m0_hi16, implicit-def $mode, implicit-def $vgpr15, implicit-def $vgpr15_lo16, implicit-def $vgpr15_hi16, implicit $sgpr2, implicit $m0, implicit $mode, implicit undef $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 {
+    S_SET_GPR_IDX_ON killed $sgpr2, 1, implicit-def $mode, implicit-def  $m0, implicit $mode, implicit undef $m0
+    $vgpr15 = V_MOV_B32_e32 undef $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+    S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
+  }
+...
+
+---
+name:            salu_in_between_bundle
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: salu_in_between_bundle
+    ; GCN: BUNDLE implicit-def $m0, implicit-def $m0_lo16, implicit-def $m0_hi16, implicit-def $mode, implicit-def $vgpr16, implicit-def $vgpr16_lo16, implicit-def $vgpr16_hi16, implicit $sgpr2, implicit $m0, implicit $mode, implicit undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 {
+    ; GCN:   S_SET_GPR_IDX_ON $sgpr2, 1, implicit-def $mode, implicit-def $m0, implicit $mode, implicit undef $m0
+    ; GCN:   $vgpr16 = V_MOV_B32_e32 undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+    ; GCN: }
+    ; GCN: $sgpr0 = S_MOV_B32 $sgpr2
+    ; GCN: BUNDLE implicit-def $m0, implicit-def $m0_lo16, implicit-def $m0_hi16, implicit-def $mode, implicit-def $vgpr15, implicit-def $vgpr15_lo16, implicit-def $vgpr15_hi16, implicit $sgpr2, implicit $m0, implicit $mode, implicit undef $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 {
+    ; GCN:   $vgpr15 = V_MOV_B32_e32 undef $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+    ; GCN:   S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
+    ; GCN: }
+  BUNDLE implicit-def $m0, implicit-def $m0_lo16, implicit-def $m0_hi16, implicit-def $mode, implicit-def $vgpr16, implicit-def $vgpr16_lo16, implicit-def $vgpr16_hi16, implicit $sgpr2, implicit $m0, implicit $mode, implicit undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 {
+    S_SET_GPR_IDX_ON $sgpr2, 1, implicit-def $mode, implicit-def $m0, implicit $mode, implicit undef $m0
+    $vgpr16 = V_MOV_B32_e32 undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+    S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
+  }
+  $sgpr0 = S_MOV_B32 $sgpr2
+  BUNDLE implicit-def $m0, implicit-def $m0_lo16, implicit-def $m0_hi16, implicit-def $mode, implicit-def $vgpr15, implicit-def $vgpr15_lo16, implicit-def $vgpr15_hi16, implicit $sgpr2, implicit $m0, implicit $mode, implicit undef $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 {
+    S_SET_GPR_IDX_ON killed $sgpr2, 1, implicit-def $mode, implicit-def  $m0, implicit $mode, implicit undef $m0
+    $vgpr15 = V_MOV_B32_e32 undef $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+    S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
+  }
+...
+
+---
+name:            valu_in_between_bundle
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: valu_in_between_bundle
+    ; GCN: BUNDLE implicit-def $m0, implicit-def $m0_lo16, implicit-def $m0_hi16, implicit-def $mode, implicit-def $vgpr16, implicit-def $vgpr16_lo16, implicit-def $vgpr16_hi16, implicit $sgpr2, implicit $m0, implicit $mode, implicit undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 {
+    ; GCN:   S_SET_GPR_IDX_ON $sgpr2, 1, implicit-def $mode, implicit-def $m0, implicit $mode, implicit undef $m0
+    ; GCN:   $vgpr16 = V_MOV_B32_e32 undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+    ; GCN:   S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
+    ; GCN: }
+    ; GCN: $vgpr20 = V_MOV_B32_e32 1, implicit $exec
+    ; GCN: BUNDLE implicit-def $m0, implicit-def $m0_lo16, implicit-def $m0_hi16, implicit-def $mode, implicit-def $vgpr15, implicit-def $vgpr15_lo16, implicit-def $vgpr15_hi16, implicit $sgpr2, implicit $m0, implicit $mode, implicit undef $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 {
+    ; GCN:   S_SET_GPR_IDX_ON killed $sgpr2, 1, implicit-def $mode, implicit-def $m0, implicit $mode, implicit undef $m0
+    ; GCN:   $vgpr15 = V_MOV_B32_e32 undef $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+    ; GCN:   S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
+    ; GCN: }
+  BUNDLE implicit-def $m0, implicit-def $m0_lo16, implicit-def $m0_hi16, implicit-def $mode, implicit-def $vgpr16, implicit-def $vgpr16_lo16, implicit-def $vgpr16_hi16, implicit $sgpr2, implicit $m0, implicit $mode, implicit undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 {
+    S_SET_GPR_IDX_ON $sgpr2, 1, implicit-def $mode, implicit-def $m0, implicit $mode, implicit undef $m0
+    $vgpr16 = V_MOV_B32_e32 undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+    S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
+  }
+  $vgpr20 = V_MOV_B32_e32 1, implicit $exec
+  BUNDLE implicit-def $m0, implicit-def $m0_lo16, implicit-def $m0_hi16, implicit-def $mode, implicit-def $vgpr15, implicit-def $vgpr15_lo16, implicit-def $vgpr15_hi16, implicit $sgpr2, implicit $m0, implicit $mode, implicit undef $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 {
+    S_SET_GPR_IDX_ON killed $sgpr2, 1, implicit-def $mode, implicit-def  $m0, implicit $mode, implicit undef $m0
+    $vgpr15 = V_MOV_B32_e32 undef $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+    S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
+  }
+...
+
+---
+name:            changed_index_bundle
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: changed_index_bundle
+    ; GCN: BUNDLE implicit-def $m0, implicit-def $m0_lo16, implicit-def $m0_hi16, implicit-def $mode, implicit-def $vgpr16, implicit-def $vgpr16_lo16, implicit-def $vgpr16_hi16, implicit $sgpr2, implicit $m0, implicit $mode, implicit undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 {
+    ; GCN:   S_SET_GPR_IDX_ON $sgpr2, 1, implicit-def $mode, implicit-def $m0, implicit $mode, implicit undef $m0
+    ; GCN:   $vgpr16 = V_MOV_B32_e32 undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+    ; GCN:   S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
+    ; GCN: }
+    ; GCN: $sgpr2 = S_MOV_B32 1
+    ; GCN: BUNDLE implicit-def $m0, implicit-def $m0_lo16, implicit-def $m0_hi16, implicit-def $mode, implicit-def $vgpr15, implicit-def $vgpr15_lo16, implicit-def $vgpr15_hi16, implicit $sgpr2, implicit $m0, implicit $mode, implicit undef $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 {
+    ; GCN:   S_SET_GPR_IDX_ON killed $sgpr2, 1, implicit-def $mode, implicit-def $m0, implicit $mode, implicit undef $m0
+    ; GCN:   $vgpr15 = V_MOV_B32_e32 undef $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+    ; GCN:   S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
+    ; GCN: }
+  BUNDLE implicit-def $m0, implicit-def $m0_lo16, implicit-def $m0_hi16, implicit-def $mode, implicit-def $vgpr16, implicit-def $vgpr16_lo16, implicit-def $vgpr16_hi16, implicit $sgpr2, implicit $m0, implicit $mode, implicit undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 {
+    S_SET_GPR_IDX_ON $sgpr2, 1, implicit-def $mode, implicit-def $m0, implicit $mode, implicit undef $m0
+    $vgpr16 = V_MOV_B32_e32 undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+    S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
+  }
+  $sgpr2 = S_MOV_B32 1
+  BUNDLE implicit-def $m0, implicit-def $m0_lo16, implicit-def $m0_hi16, implicit-def $mode, implicit-def $vgpr15, implicit-def $vgpr15_lo16, implicit-def $vgpr15_hi16, implicit $sgpr2, implicit $m0, implicit $mode, implicit undef $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 {
+    S_SET_GPR_IDX_ON killed $sgpr2, 1, implicit-def $mode, implicit-def  $m0, implicit $mode, implicit undef $m0
+    $vgpr15 = V_MOV_B32_e32 undef $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+    S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
+  }
+...


        


More information about the llvm-commits mailing list