[llvm] r371671 - AMDGPU: Move m0 initializations earlier

Austin Kerbow via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 11 14:28:42 PDT 2019


Author: kerbowa
Date: Wed Sep 11 14:28:41 2019
New Revision: 371671

URL: http://llvm.org/viewvc/llvm-project?rev=371671&view=rev
Log:
AMDGPU: Move m0 initializations earlier

Summary:
After hoisting and merging m0 initializations schedule them as early as
possible in the MBB. This helps the scheduler avoid hazards in some
cases.

Reviewers: rampitec, arsenm

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, arphaman, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D67450

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
    llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll
    llvm/trunk/test/CodeGen/AMDGPU/merge-m0.mir

Modified: llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp?rev=371671&r1=371670&r2=371671&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp Wed Sep 11 14:28:41 2019
@@ -466,6 +466,7 @@ getFirstNonPrologue(MachineBasicBlock *M
 // executioon.
 static bool hoistAndMergeSGPRInits(unsigned Reg,
                                    const MachineRegisterInfo &MRI,
+                                   const TargetRegisterInfo *TRI,
                                    MachineDominatorTree &MDT,
                                    const TargetInstrInfo *TII) {
   // List of inits by immediate value.
@@ -480,7 +481,7 @@ static bool hoistAndMergeSGPRInits(unsig
 
   for (auto &MI : MRI.def_instructions(Reg)) {
     MachineOperand *Imm = nullptr;
-    for (auto &MO: MI.operands()) {
+    for (auto &MO : MI.operands()) {
       if ((MO.isReg() && ((MO.isDef() && MO.getReg() != Reg) || !MO.isDef())) ||
           (!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) {
         Imm = nullptr;
@@ -585,8 +586,41 @@ static bool hoistAndMergeSGPRInits(unsig
     }
   }
 
-  for (auto MI : MergedInstrs)
-    MI->removeFromParent();
+  // Remove initializations that were merged into another.
+  for (auto &Init : Inits) {
+    auto &Defs = Init.second;
+    for (auto I = Defs.begin(); I != Defs.end(); ++I)
+      if (MergedInstrs.count(*I)) {
+        (*I)->eraseFromParent();
+        I = Defs.erase(I);
+      }
+  }
+
+  // Try to schedule SGPR initializations as early as possible in the MBB.
+  for (auto &Init : Inits) {
+    auto &Defs = Init.second;
+    for (auto MI : Defs) {
+      auto MBB = MI->getParent();
+      MachineInstr &BoundaryMI = *getFirstNonPrologue(MBB, TII);
+      MachineBasicBlock::reverse_iterator B(BoundaryMI);
+      // Check if B should actually be a bondary. If not set the previous
+      // instruction as the boundary instead.
+      if (!TII->isBasicBlockPrologue(*B))
+        B++;
+
+      auto R = std::next(MI->getReverseIterator());
+      const unsigned Threshold = 50;
+      // Search until B or Threashold for a place to insert the initialization.
+      for (unsigned I = 0; R != B && I < Threshold; ++R, ++I)
+        if (R->readsRegister(Reg, TRI) || R->definesRegister(Reg, TRI) ||
+            TII->isSchedulingBoundary(*R, MBB, *MBB->getParent()))
+          break;
+
+      // Move to directly after R.
+      if (&*--R != MI)
+        MBB->splice(*R, MBB, MI);
+    }
+  }
 
   if (Changed)
     MRI.clearKillFlags(Reg);
@@ -755,7 +789,7 @@ bool SIFixSGPRCopies::runOnMachineFuncti
   }
 
   if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
-    hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT, TII);
+    hoistAndMergeSGPRInits(AMDGPU::M0, MRI, TRI, *MDT, TII);
 
   return true;
 }

Modified: llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll?rev=371671&r1=371670&r2=371671&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll Wed Sep 11 14:28:41 2019
@@ -26,12 +26,12 @@ define void @func_mov_fi_i32() #0 {
 
 ; CI: s_sub_u32 [[SUB0:s[0-9]+|vcc_lo|vcc_hi]], s32, s33
 ; CI-NEXT: s_sub_u32 [[SUB1:s[0-9]+|vcc_lo|vcc_hi]], s32, s33
-; CI-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], [[SUB1]], 6
-; CI-NEXT: v_lshr_b32_e64 v0, [[SUB0]], 6
-; CI-NEXT: v_add_i32_e64 v1, s{{\[[0-9]+:[0-9]+\]}}, 4, [[SCALED]]
+; CI-DAG: v_lshr_b32_e64 v0, [[SUB0]], 6
+; CI-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], [[SUB1]], 6
 ; CI-NOT: v_mov
 ; CI: ds_write_b32 v0, v0
-; CI-NEXT: ds_write_b32 v0, v1
+; CI-NEXT: v_add_i32_e64 v0, s{{\[[0-9]+:[0-9]+\]}}, 4, [[SCALED]]
+; CI-NEXT: ds_write_b32 v0, v0
 
 ; GFX9: s_sub_u32 [[SUB0:s[0-9]+|vcc_lo|vcc_hi]], s32, s33
 ; GFX9-NEXT: s_sub_u32 [[SUB1:s[0-9]+|vcc_lo|vcc_hi]], s32, s33

Modified: llvm/trunk/test/CodeGen/AMDGPU/merge-m0.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/merge-m0.mir?rev=371671&r1=371670&r2=371671&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/merge-m0.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/merge-m0.mir Wed Sep 11 14:28:41 2019
@@ -1,7 +1,10 @@
 # RUN: llc -march=amdgcn -amdgpu-enable-merge-m0 -verify-machineinstrs -run-pass si-fix-sgpr-copies %s -o - | FileCheck -check-prefix=GCN %s
 
+# GCN-LABEL: name: merge-m0-many-init
 # GCN:    bb.0.entry:
 # GCN:      SI_INIT_M0 -1
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: IMPLICIT_DEF
 # GCN-NEXT: DS_WRITE_B32
 # GCN-NEXT: DS_WRITE_B32
 # GCN-NEXT: SI_INIT_M0 65536
@@ -45,9 +48,8 @@
 # GCN-NEXT: DS_WRITE_B32
 # GCN-NEXT: SI_INIT_M0 -1
 # GCN-NEXT: DS_WRITE_B32
-
 ---
-name:            merge-m0-many-init
+name: merge-m0-many-init
 registers:
   - { id: 0, class: vgpr_32 }
   - { id: 1, class: vgpr_32 }
@@ -124,22 +126,24 @@ body:             |
 
 ...
 
+# GCN-LABEL: name: merge-m0-dont-hoist-past-init-with-different-initializer
 # GCN:    bb.0.entry:
 # GCN:      SI_INIT_M0 65536
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: IMPLICIT_DEF
 # GCN-NEXT: DS_WRITE_B32
 
-#GCN:     bb.1:
-#GCN-NOT:   SI_INIT_M0 65536
-#GCN-NOT:   SI_INIT_M0 -1
-
-#GCN:     bb.2:
-#GCN:       SI_INIT_M0 -1
+# GCN:    bb.1:
+# GCN-NOT:  SI_INIT_M0 65536
+# GCN-NOT:  SI_INIT_M0 -1
 
-#GCN:     bb.3:
-#GCN:       SI_INIT_M0 -1
+# GCN:    bb.2:
+# GCN:      SI_INIT_M0 -1
 
+# GCN:    bb.3:
+# GCN:      SI_INIT_M0 -1
 ---
-name:            merge-m0-dont-hoist-past-init-with-different-initializer
+name: merge-m0-dont-hoist-past-init-with-different-initializer
 registers:
   - { id: 0, class: vgpr_32 }
   - { id: 1, class: vgpr_32 }
@@ -179,19 +183,19 @@ body:             |
     S_ENDPGM 0
 ...
 
+# GCN-LABEL: name: merge-m0-after-prologue
 # GCN:    bb.0.entry:
 # GCN-NOT:  SI_INIT_M0
 # GCN:      S_OR_B64
 # GCN-NEXT: SI_INIT_M0
 
-#GCN:     bb.1:
-#GCN-NOT:   SI_INIT_M0 -1
-
-#GCN:     bb.2:
-#GCN-NOT:   SI_INIT_MO -1
+# GCN:     bb.1:
+# GCN-NOT:   SI_INIT_M0 -1
 
+# GCN:     bb.2:
+# GCN-NOT:   SI_INIT_MO -1
 ---
-name:            merge-m0-after-prologue
+name: merge-m0-after-prologue
 registers:
   - { id: 0, class: vgpr_32 }
   - { id: 1, class: vgpr_32 }
@@ -223,3 +227,71 @@ body:             |
   bb.3:
     S_ENDPGM 0
 ...
+
+# GCN-LABEL: name: move-m0-avoid-hazard
+# GCN: $m0 = S_MOV_B32 -1
+# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+# GCN-NEXT: DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
+---
+name: move-m0-avoid-hazard
+body:             |
+  bb.0:
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $m0 = S_MOV_B32 -1
+    DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
+...
+
+# GCN-LABEL: name: move-m0-with-prologue
+# GCN $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
+# GCN: $m0 = S_MOV_B32 -1
+# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+# GCN-NEXT: DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
+---
+name: move-m0-with-prologue
+body:             |
+  bb.0:
+    liveins: $sgpr0_sgpr1
+
+    $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $m0 = S_MOV_B32 -1
+    DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
+...
+
+# GCN-LABEL: name: move-m0-different-initializer
+# GCN: SI_INIT_M0 -1
+# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
+# GCN: SI_INIT_M0 65536
+# GCN-NEXT: S_NOP
+---
+name: move-m0-different-initializer
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: vgpr_32 }
+body:             |
+  bb.0:
+    %0 = IMPLICIT_DEF
+    %1 = IMPLICIT_DEF
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_NOP 0
+    SI_INIT_M0 65536, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+...
+
+# GCN-LABEL: name: move-m0-schedule-boundary
+# GCN: S_SETREG
+# GCN-NEXT: SI_INIT_M0 -1
+---
+name: move-m0-schedule-boundary
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: vgpr_32 }
+body:             |
+  bb.0:
+    %0 = IMPLICIT_DEF
+    %1 = IMPLICIT_DEF
+    S_SETREG_IMM32_B32 0, 1
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+...




More information about the llvm-commits mailing list