[llvm] r366135 - [AMDGPU] Enable merging m0 initializations.

Austin Kerbow via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 15 15:07:05 PDT 2019


Author: kerbowa
Date: Mon Jul 15 15:07:05 2019
New Revision: 366135

URL: http://llvm.org/viewvc/llvm-project?rev=366135&view=rev
Log:
[AMDGPU] Enable merging m0 initializations.

Summary:
Enable hoisting and merging m0 defs that are initialized with the same
immediate value. Fixes bug where removed instructions are not considered
to interfere with other inits, and make sure to not hoist inits before block
prologues.

Reviewers: rampitec, arsenm

Reviewed By: rampitec

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64766

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
    llvm/trunk/test/CodeGen/AMDGPU/merge-m0.mir

Modified: llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp?rev=366135&r1=366134&r2=366135&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp Mon Jul 15 15:07:05 2019
@@ -103,7 +103,7 @@ using namespace llvm;
 static cl::opt<bool> EnableM0Merge(
   "amdgpu-enable-merge-m0",
   cl::desc("Merge and hoist M0 initializations"),
-  cl::init(false));
+  cl::init(true));
 
 namespace {
 
@@ -452,18 +452,32 @@ static bool isReachable(const MachineIns
            (const MachineBasicBlock *MBB) { return MBB == MBBFrom; });
 }
 
+// Return the first non-prologue instruction in the block.
+static MachineBasicBlock::iterator
+getFirstNonPrologue(MachineBasicBlock *MBB, const TargetInstrInfo *TII) {
+  MachineBasicBlock::iterator I = MBB->getFirstNonPHI();
+  while (I != MBB->end() && TII->isBasicBlockPrologue(*I))
+    ++I;
+
+  return I;
+}
+
 // Hoist and merge identical SGPR initializations into a common predecessor.
 // This is intended to combine M0 initializations, but can work with any
 // SGPR. A VGPR cannot be processed since we cannot guarantee vector
 // executioon.
 static bool hoistAndMergeSGPRInits(unsigned Reg,
                                    const MachineRegisterInfo &MRI,
-                                   MachineDominatorTree &MDT) {
+                                   MachineDominatorTree &MDT,
+                                   const TargetInstrInfo *TII) {
   // List of inits by immediate value.
   using InitListMap = std::map<unsigned, std::list<MachineInstr *>>;
   InitListMap Inits;
   // List of clobbering instructions.
   SmallVector<MachineInstr*, 8> Clobbers;
+  // List of instructions marked for deletion.
+  SmallSet<MachineInstr*, 8> MergedInstrs;
+
   bool Changed = false;
 
   for (auto &MI : MRI.def_instructions(Reg)) {
@@ -492,8 +506,8 @@ static bool hoistAndMergeSGPRInits(unsig
         MachineInstr *MI2 = *I2;
 
         // Check any possible interference
-        auto intereferes = [&](MachineBasicBlock::iterator From,
-                               MachineBasicBlock::iterator To) -> bool {
+        auto interferes = [&](MachineBasicBlock::iterator From,
+                              MachineBasicBlock::iterator To) -> bool {
 
           assert(MDT.dominates(&*To, &*From));
 
@@ -525,23 +539,23 @@ static bool hoistAndMergeSGPRInits(unsig
         };
 
         if (MDT.dominates(MI1, MI2)) {
-          if (!intereferes(MI2, MI1)) {
+          if (!interferes(MI2, MI1)) {
             LLVM_DEBUG(dbgs()
                        << "Erasing from "
                        << printMBBReference(*MI2->getParent()) << " " << *MI2);
-            MI2->eraseFromParent();
-            Defs.erase(I2++);
+            MergedInstrs.insert(MI2);
             Changed = true;
+            ++I2;
             continue;
           }
         } else if (MDT.dominates(MI2, MI1)) {
-          if (!intereferes(MI1, MI2)) {
+          if (!interferes(MI1, MI2)) {
             LLVM_DEBUG(dbgs()
                        << "Erasing from "
                        << printMBBReference(*MI1->getParent()) << " " << *MI1);
-            MI1->eraseFromParent();
-            Defs.erase(I1++);
+            MergedInstrs.insert(MI1);
             Changed = true;
+            ++I1;
             break;
           }
         } else {
@@ -552,8 +566,8 @@ static bool hoistAndMergeSGPRInits(unsig
             continue;
           }
 
-          MachineBasicBlock::iterator I = MBB->getFirstNonPHI();
-          if (!intereferes(MI1, I) && !intereferes(MI2, I)) {
+          MachineBasicBlock::iterator I = getFirstNonPrologue(MBB, TII);
+          if (!interferes(MI1, I) && !interferes(MI2, I)) {
             LLVM_DEBUG(dbgs()
                        << "Erasing from "
                        << printMBBReference(*MI1->getParent()) << " " << *MI1
@@ -561,9 +575,9 @@ static bool hoistAndMergeSGPRInits(unsig
                        << printMBBReference(*MI2->getParent()) << " to "
                        << printMBBReference(*I->getParent()) << " " << *MI2);
             I->getParent()->splice(I, MI2->getParent(), MI2);
-            MI1->eraseFromParent();
-            Defs.erase(I1++);
+            MergedInstrs.insert(MI1);
             Changed = true;
+            ++I1;
             break;
           }
         }
@@ -573,6 +587,9 @@ static bool hoistAndMergeSGPRInits(unsig
     }
   }
 
+  for (auto MI : MergedInstrs)
+    MI->removeFromParent();
+
   if (Changed)
     MRI.clearKillFlags(Reg);
 
@@ -723,7 +740,7 @@ bool SIFixSGPRCopies::runOnMachineFuncti
   }
 
   if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
-    hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT);
+    hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT, TII);
 
   return true;
 }

Modified: llvm/trunk/test/CodeGen/AMDGPU/merge-m0.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/merge-m0.mir?rev=366135&r1=366134&r2=366135&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/merge-m0.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/merge-m0.mir Mon Jul 15 15:07:05 2019
@@ -47,13 +47,7 @@
 # GCN-NEXT: DS_WRITE_B32
 
 ---
-name:            test
-alignment:       0
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
+name:            merge-m0-many-init
 registers:
   - { id: 0, class: vgpr_32 }
   - { id: 1, class: vgpr_32 }
@@ -129,3 +123,103 @@ body:             |
     S_BRANCH %bb.0.entry
 
 ...
+
+# GCN:    bb.0.entry:
+# GCN:      SI_INIT_M0 65536
+# GCN-NEXT: DS_WRITE_B32
+
+#GCN:     bb.1:
+#GCN-NOT:   SI_INIT_M0 65536
+#GCN-NOT:   SI_INIT_M0 -1
+
+#GCN:     bb.2:
+#GCN:       SI_INIT_M0 -1
+
+#GCN:     bb.3:
+#GCN:       SI_INIT_M0 -1
+
+---
+name:            merge-m0-dont-hoist-past-init-with-different-initializer
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: vgpr_32 }
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+
+    %0 = IMPLICIT_DEF
+    %1 = IMPLICIT_DEF
+    SI_INIT_M0 65536, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2, %bb.3
+
+    SI_INIT_M0 65536, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_CBRANCH_VCCZ %bb.2, implicit undef $vcc
+    S_BRANCH %bb.3
+
+  bb.2:
+    successors: %bb.4
+
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_BRANCH %bb.4
+
+  bb.3:
+    successors: %bb.4
+
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_BRANCH %bb.4
+
+  bb.4:
+    S_ENDPGM 0
+...
+
+# GCN:    bb.0.entry:
+# GCN-NOT:  SI_INIT_M0
+# GCN:      S_OR_B64
+# GCN-NEXT: SI_INIT_M0
+
+#GCN:     bb.1:
+#GCN-NOT:   SI_INIT_M0 -1
+
+#GCN:     bb.2:
+#GCN-NOT:   SI_INIT_MO -1
+
+---
+name:            merge-m0-after-prologue
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: vgpr_32 }
+body:             |
+  bb.0.entry:
+    successors: %bb.1, %bb.2
+    liveins: $sgpr0_sgpr1
+
+    $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
+    %0 = IMPLICIT_DEF
+    %1 = IMPLICIT_DEF
+    S_CBRANCH_VCCZ %bb.1, implicit undef $vcc
+    S_BRANCH %bb.2
+
+  bb.1:
+    successors: %bb.3
+
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    successors: %bb.3
+
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.3:
+    S_ENDPGM 0
+...




More information about the llvm-commits mailing list