[llvm] r313213 - Revert r312719 "[MachineCombiner] Update instruction depths incrementally for large BBs."

Hans Wennborg via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 13 16:23:09 PDT 2017


Author: hans
Date: Wed Sep 13 16:23:09 2017
New Revision: 313213

URL: http://llvm.org/viewvc/llvm-project?rev=313213&view=rev
Log:
Revert r312719 "[MachineCombiner] Update instruction depths incrementally for large BBs."

This caused PR34596.

> [MachineCombiner] Update instruction depths incrementally for large BBs.
>
> Summary:
> For large basic blocks with lots of combinable instructions, the
> MachineTraceMetrics computations in MachineCombiner can dominate the compile
> time, as computing the trace information is quadratic in the number of
> instructions in a BB and it's relevant successors/predecessors.
>
> In most cases, knowing the instruction depth should be enough to make
> combination decisions. As we already iterate over all instructions in a basic
> block, the instruction depth can be computed incrementally. This reduces the
> cost of machine-combine drastically in cases where lots of instructions
> are combined. The major drawback is that AFAIK, computing the critical path
> length cannot be done incrementally. Therefore we only compute
> instruction depths incrementally, for basic blocks with more
> instructions than inc_threshold. The -machine-combiner-inc-threshold
> option can be used to set the threshold and allows for easier
> experimenting and checking if using incremental updates for all basic
> blocks has any impact on the performance.
>
> Reviewers: sanjoy, Gerolf, MatzeB, efriedma, fhahn
>
> Reviewed By: fhahn
>
> Subscribers: kiranchandramohan, javed.absar, efriedma, llvm-commits
>
> Differential Revision: https://reviews.llvm.org/D36619

Modified:
    llvm/trunk/include/llvm/CodeGen/MachineTraceMetrics.h
    llvm/trunk/lib/CodeGen/MachineCombiner.cpp
    llvm/trunk/lib/CodeGen/MachineTraceMetrics.cpp
    llvm/trunk/test/CodeGen/AArch64/machine-combiner.ll
    llvm/trunk/test/CodeGen/X86/machine-combiner.ll
    llvm/trunk/test/CodeGen/X86/mul-constant-result.ll

Modified: llvm/trunk/include/llvm/CodeGen/MachineTraceMetrics.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineTraceMetrics.h?rev=313213&r1=313212&r2=313213&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/MachineTraceMetrics.h (original)
+++ llvm/trunk/include/llvm/CodeGen/MachineTraceMetrics.h Wed Sep 13 16:23:09 2017
@@ -366,12 +366,6 @@ public:
                      SparseSet<LiveRegUnit> &RegUnits);
     void updateDepth(const MachineBasicBlock *, const MachineInstr&,
                      SparseSet<LiveRegUnit> &RegUnits);
-
-    /// Updates the depth of the instructions from Start to End.
-    void updateDepths(MachineBasicBlock::iterator Start,
-                      MachineBasicBlock::iterator End,
-                      SparseSet<LiveRegUnit> &RegUnits);
-
   };
 
   /// Strategies for selecting traces.

Modified: llvm/trunk/lib/CodeGen/MachineCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineCombiner.cpp?rev=313213&r1=313212&r2=313213&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MachineCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachineCombiner.cpp Wed Sep 13 16:23:09 2017
@@ -22,7 +22,6 @@
 #include "llvm/CodeGen/MachineTraceMetrics.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetSchedule.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -35,11 +34,6 @@ using namespace llvm;
 
 STATISTIC(NumInstCombined, "Number of machineinst combined");
 
-static cl::opt<unsigned>
-inc_threshold("machine-combiner-inc-threshold", cl::Hidden,
-              cl::desc("Incremental depth computation will be used for basic "
-                       "blocks with more instructions."), cl::init(500));
-
 namespace {
 class MachineCombiner : public MachineFunctionPass {
   const TargetInstrInfo *TII;
@@ -79,7 +73,7 @@ private:
                           SmallVectorImpl<MachineInstr *> &InsInstrs,
                           SmallVectorImpl<MachineInstr *> &DelInstrs,
                           DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
-                          MachineCombinerPattern Pattern, bool SlackIsAccurate);
+                          MachineCombinerPattern Pattern);
   bool preservesResourceLen(MachineBasicBlock *MBB,
                             MachineTraceMetrics::Trace BlockTrace,
                             SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -253,8 +247,7 @@ bool MachineCombiner::improvesCriticalPa
     SmallVectorImpl<MachineInstr *> &InsInstrs,
     SmallVectorImpl<MachineInstr *> &DelInstrs,
     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
-    MachineCombinerPattern Pattern,
-    bool SlackIsAccurate) {
+    MachineCombinerPattern Pattern) {
   assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
          "Missing machine model\n");
   // NewRoot is the last instruction in the \p InsInstrs vector.
@@ -265,7 +258,7 @@ bool MachineCombiner::improvesCriticalPa
   unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
   unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth;
 
-  DEBUG(dbgs() << "DEPENDENCE DATA FOR " << *Root << "\n";
+  DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
         dbgs() << " NewRootDepth: " << NewRootDepth << "\n";
         dbgs() << " RootDepth: " << RootDepth << "\n");
 
@@ -288,18 +281,17 @@ bool MachineCombiner::improvesCriticalPa
     RootLatency += TSchedModel.computeInstrLatency(I);
 
   unsigned RootSlack = BlockTrace.getInstrSlack(*Root);
-  unsigned NewCycleCount = NewRootDepth + NewRootLatency;
-  unsigned OldCycleCount = RootDepth + RootLatency +
-                           (SlackIsAccurate ? RootSlack : 0);
+
   DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n";
         dbgs() << " RootLatency: " << RootLatency << "\n";
-        dbgs() << " RootSlack: " << RootSlack << " SlackIsAccurate="
-               << SlackIsAccurate << "\n";
+        dbgs() << " RootSlack: " << RootSlack << "\n";
         dbgs() << " NewRootDepth + NewRootLatency = "
-               << NewCycleCount << "\n";
+               << NewRootDepth + NewRootLatency << "\n";
         dbgs() << " RootDepth + RootLatency + RootSlack = "
-               << OldCycleCount << "\n";
-        );
+               << RootDepth + RootLatency + RootSlack << "\n";);
+
+  unsigned NewCycleCount = NewRootDepth + NewRootLatency;
+  unsigned OldCycleCount = RootDepth + RootLatency + RootSlack;
 
   return NewCycleCount <= OldCycleCount;
 }
@@ -362,44 +354,17 @@ bool MachineCombiner::doSubstitute(unsig
   return false;
 }
 
-/// Inserts InsInstrs and deletes DelInstrs. Incrementally updates instruction
-/// depths if requested.
-///
-/// \param MBB basic block to insert instructions in
-/// \param MI current machine instruction
-/// \param InsInstrs new instructions to insert in \p MBB
-/// \param DelInstrs instruction to delete from \p MBB
-/// \param MinInstr is a pointer to the machine trace information
-/// \param RegUnits set of live registers, needed to compute instruction depths
-/// \param IncrementalUpdate if true, compute instruction depths incrementally,
-///                          otherwise invalidate the trace
 static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
                                      SmallVector<MachineInstr *, 16> InsInstrs,
                                      SmallVector<MachineInstr *, 16> DelInstrs,
-                                     MachineTraceMetrics::Ensemble *MinInstr,
-                                     SparseSet<LiveRegUnit> &RegUnits,
-                                     bool IncrementalUpdate) {
+                                     MachineTraceMetrics *Traces) {
   for (auto *InstrPtr : InsInstrs)
     MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr);
-
-  for (auto *InstrPtr : DelInstrs) {
+  for (auto *InstrPtr : DelInstrs)
     InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
-    // Erase all LiveRegs defined by the removed instruction
-    for (auto I = RegUnits.begin(); I != RegUnits.end(); ) {
-      if (I->MI == InstrPtr)
-        I = RegUnits.erase(I);
-      else
-        I++;
-    }
-  }
-
-  if (IncrementalUpdate)
-    for (auto *InstrPtr : InsInstrs)
-      MinInstr->updateDepth(MBB, *InstrPtr, RegUnits);
-  else
-    MinInstr->invalidate(MBB);
-
-  NumInstCombined++;
+  ++NumInstCombined;
+  Traces->invalidate(MBB);
+  Traces->verifyAnalysis();
 }
 
 /// Substitute a slow code sequence with a faster one by
@@ -413,16 +378,9 @@ bool MachineCombiner::combineInstruction
   bool Changed = false;
   DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
 
-  bool IncrementalUpdate = false;
   auto BlockIter = MBB->begin();
-  auto LastUpdate = BlockIter;
   // Check if the block is in a loop.
   const MachineLoop *ML = MLI->getLoopFor(MBB);
-  if (!MinInstr)
-    MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
-
-  SparseSet<LiveRegUnit> RegUnits;
-  RegUnits.setUniverse(TRI->getNumRegUnits());
 
   while (BlockIter != MBB->end()) {
     auto &MI = *BlockIter++;
@@ -461,6 +419,9 @@ bool MachineCombiner::combineInstruction
       SmallVector<MachineInstr *, 16> InsInstrs;
       SmallVector<MachineInstr *, 16> DelInstrs;
       DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
+      if (!MinInstr)
+        MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+      Traces->verifyAnalysis();
       TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
                                       InstrIdxForVirtReg);
       unsigned NewInstCount = InsInstrs.size();
@@ -475,41 +436,23 @@ bool MachineCombiner::combineInstruction
       if (ML && TII->isThroughputPattern(P))
         SubstituteAlways = true;
 
-      if (IncrementalUpdate) {
-        // Update depths since the last incremental update.
-        MinInstr->updateDepths(LastUpdate, std::next(BlockIter), RegUnits);
-        LastUpdate = BlockIter;
-      }
-
       // Substitute when we optimize for codesize and the new sequence has
       // fewer instructions OR
       // the new sequence neither lengthens the critical path nor increases
       // resource pressure.
       if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount)) {
-        insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
-                                 RegUnits, IncrementalUpdate);
+        insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, Traces);
         // Eagerly stop after the first pattern fires.
         Changed = true;
         break;
       } else {
-        // For big basic blocks, we only compute the full trace the first time
-        // we hit this. We do not invalidate the trace, but instead update the
-        // instruction depths incrementally.
-        // NOTE: Only the instruction depths up to MI are accurate. All other
-        // trace information is not updated.
+        // Calculating the trace metrics may be expensive,
+        // so only do this when necessary.
         MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
-        Traces->verifyAnalysis();
         if (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, DelInstrs,
-                                    InstrIdxForVirtReg, P,
-                                    !IncrementalUpdate) &&
+                                    InstrIdxForVirtReg, P) &&
             preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs)) {
-          if (MBB->size() > inc_threshold)
-            // Use incremental depth updates for basic blocks above treshold
-            IncrementalUpdate = true;
-
-          insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
-                                   RegUnits, IncrementalUpdate);
-
+          insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, Traces);
           // Eagerly stop after the first pattern fires.
           Changed = true;
           break;
@@ -524,8 +467,6 @@ bool MachineCombiner::combineInstruction
     }
   }
 
-  if (Changed && IncrementalUpdate)
-    Traces->invalidate(MBB);
   return Changed;
 }
 

Modified: llvm/trunk/lib/CodeGen/MachineTraceMetrics.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineTraceMetrics.cpp?rev=313213&r1=313212&r2=313213&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MachineTraceMetrics.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachineTraceMetrics.cpp Wed Sep 13 16:23:09 2017
@@ -824,14 +824,6 @@ updateDepth(const MachineBasicBlock *MBB
   updateDepth(BlockInfo[MBB->getNumber()], UseMI, RegUnits);
 }
 
-void MachineTraceMetrics::Ensemble::
-updateDepths(MachineBasicBlock::iterator Start,
-             MachineBasicBlock::iterator End,
-             SparseSet<LiveRegUnit> &RegUnits) {
-    for (; Start != End; Start++)
-      updateDepth(Start->getParent(), *Start, RegUnits);
-}
-
 /// Compute instruction depths for all instructions above or in MBB in its
 /// trace. This assumes that the trace through MBB has already been computed.
 void MachineTraceMetrics::Ensemble::

Modified: llvm/trunk/test/CodeGen/AArch64/machine-combiner.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/machine-combiner.ll?rev=313213&r1=313212&r2=313213&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/machine-combiner.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/machine-combiner.ll Wed Sep 13 16:23:09 2017
@@ -1,10 +1,5 @@
 ; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a57 -enable-unsafe-fp-math -disable-post-ra < %s | FileCheck %s
 
-; Incremental updates of the instruction depths should be enough for this test
-; case.
-; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a57 -enable-unsafe-fp-math \
-; RUN:     -disable-post-ra -machine-combiner-inc-threshold=0 < %s | FileCheck %s
-
 ; Verify that the first two adds are independent regardless of how the inputs are
 ; commuted. The destination registers are used as source registers for the third add.
 

Modified: llvm/trunk/test/CodeGen/X86/machine-combiner.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/machine-combiner.ll?rev=313213&r1=313212&r2=313213&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/machine-combiner.ll (original)
+++ llvm/trunk/test/CodeGen/X86/machine-combiner.ll Wed Sep 13 16:23:09 2017
@@ -1,11 +1,6 @@
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=SSE
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=AVX
 
-; Incremental updates of the instruction depths should be enough for this test
-; case.
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=SSE
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=AVX
-
 ; Verify that the first two adds are independent regardless of how the inputs are
 ; commuted. The destination registers are used as source registers for the third add.
 

Modified: llvm/trunk/test/CodeGen/X86/mul-constant-result.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-constant-result.ll?rev=313213&r1=313212&r2=313213&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul-constant-result.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul-constant-result.ll Wed Sep 13 16:23:09 2017
@@ -1,9 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
-
-; Incremental updates of the instruction depths should be enough for this test
-; case.
-; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=haswell -machine-combiner-inc-threshold=0| FileCheck %s --check-prefix=X64-HSW
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW
 
 ; Function Attrs: norecurse nounwind readnone uwtable
 define i32 @mult(i32, i32) local_unnamed_addr #0 {




More information about the llvm-commits mailing list