[llvm-branch-commits] [llvm-branch] r86910 - in /llvm/branches/Apple/Leela: lib/CodeGen/ lib/Target/Alpha/ lib/Target/Blackfin/ lib/Target/CellSPU/ lib/Target/MSP430/ lib/Target/Sparc/ lib/Target/XCore/ lib/Transforms/Utils/ test/CodeGen/Thumb2/ test/CodeGen/X86/ test/Transforms/LCSSA/

Dan Gohman gohman at apple.com
Wed Nov 11 16:39:53 PST 2009


Author: djg
Date: Wed Nov 11 18:39:53 2009
New Revision: 86910

URL: http://llvm.org/viewvc/llvm-project?rev=86910&view=rev
Log:
$ svn merge -c 86569 https://djg@llvm.org/svn/llvm-project/llvm/trunk
--- Merging r86569 into '.':
A    test/Transforms/LCSSA/indirectbr.ll
U    lib/Transforms/Utils/LCSSA.cpp
$ svn merge -c 86641 https://djg@llvm.org/svn/llvm-project/llvm/trunk
--- Merging r86641 into '.':
U    lib/CodeGen/BranchFolding.cpp
$ svn merge -c 86642 https://djg@llvm.org/svn/llvm-project/llvm/trunk
--- Merging r86642 into '.':
G    lib/CodeGen/BranchFolding.cpp
$ svn merge -c 86732 https://djg@llvm.org/svn/llvm-project/llvm/trunk
--- Merging r86732 into '.':
U    lib/Target/CellSPU/SPUInstrInfo.td
$ svn merge -c 86851 https://djg@llvm.org/svn/llvm-project/llvm/trunk
--- Merging r86851 into '.':
U    lib/Target/XCore/XCoreInstrInfo.td
U    lib/Target/Alpha/AlphaInstrInfo.td
U    lib/Target/MSP430/MSP430InstrInfo.td
U    lib/Target/Sparc/SparcInstrInfo.td
U    lib/Target/Blackfin/BlackfinInstrInfo.td
$ svn merge -c 86853 https://djg@llvm.org/svn/llvm-project/llvm/trunk
--- Merging r86853 into '.':
G    lib/CodeGen/BranchFolding.cpp
$ svn merge -c 86854 https://djg@llvm.org/svn/llvm-project/llvm/trunk
--- Merging r86854 into '.':
G    lib/CodeGen/BranchFolding.cpp
$ svn merge -c 86855 https://djg@llvm.org/svn/llvm-project/llvm/trunk
--- Merging r86855 into '.':
G    lib/CodeGen/BranchFolding.cpp
$ svn merge -c 86856 https://djg@llvm.org/svn/llvm-project/llvm/trunk
--- Merging r86856 into '.':
G    lib/CodeGen/BranchFolding.cpp
$ svn merge -c 86871 https://djg@llvm.org/svn/llvm-project/llvm/trunk
--- Merging r86871 into '.':
U    test/CodeGen/X86/loop-blocks.ll
A    test/CodeGen/X86/tail-opts.ll
U    test/CodeGen/Thumb2/thumb2-cbnz.ll
U    test/CodeGen/Thumb2/thumb2-ifcvt3.ll
G    lib/CodeGen/BranchFolding.cpp
U    lib/CodeGen/MachineBasicBlock.cpp
$ svn merge -c 86873 https://djg@llvm.org/svn/llvm-project/llvm/trunk
--- Merging r86873 into '.':
U    lib/CodeGen/BranchFolding.h
$ svn merge -c 86875 https://djg@llvm.org/svn/llvm-project/llvm/trunk
--- Merging r86875 into '.':
G    lib/CodeGen/BranchFolding.cpp

Added:
    llvm/branches/Apple/Leela/test/CodeGen/X86/tail-opts.ll
      - copied unchanged from r86871, llvm/trunk/test/CodeGen/X86/tail-opts.ll
    llvm/branches/Apple/Leela/test/Transforms/LCSSA/indirectbr.ll
      - copied unchanged from r86569, llvm/trunk/test/Transforms/LCSSA/indirectbr.ll
Modified:
    llvm/branches/Apple/Leela/lib/CodeGen/BranchFolding.cpp
    llvm/branches/Apple/Leela/lib/CodeGen/BranchFolding.h
    llvm/branches/Apple/Leela/lib/CodeGen/MachineBasicBlock.cpp
    llvm/branches/Apple/Leela/lib/Target/Alpha/AlphaInstrInfo.td
    llvm/branches/Apple/Leela/lib/Target/Blackfin/BlackfinInstrInfo.td
    llvm/branches/Apple/Leela/lib/Target/CellSPU/SPUInstrInfo.td
    llvm/branches/Apple/Leela/lib/Target/MSP430/MSP430InstrInfo.td
    llvm/branches/Apple/Leela/lib/Target/Sparc/SparcInstrInfo.td
    llvm/branches/Apple/Leela/lib/Target/XCore/XCoreInstrInfo.td
    llvm/branches/Apple/Leela/lib/Transforms/Utils/LCSSA.cpp
    llvm/branches/Apple/Leela/test/CodeGen/Thumb2/thumb2-cbnz.ll
    llvm/branches/Apple/Leela/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
    llvm/branches/Apple/Leela/test/CodeGen/X86/loop-blocks.ll

Modified: llvm/branches/Apple/Leela/lib/CodeGen/BranchFolding.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Leela/lib/CodeGen/BranchFolding.cpp?rev=86910&r1=86909&r2=86910&view=diff

==============================================================================
--- llvm/branches/Apple/Leela/lib/CodeGen/BranchFolding.cpp (original)
+++ llvm/branches/Apple/Leela/lib/CodeGen/BranchFolding.cpp Wed Nov 11 18:39:53 2009
@@ -32,6 +32,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include <algorithm>
@@ -40,18 +41,24 @@
 STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
 STATISTIC(NumBranchOpts, "Number of branches optimized");
 STATISTIC(NumTailMerge , "Number of block tails merged");
-static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge", 
+static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
                               cl::init(cl::BOU_UNSET), cl::Hidden);
 // Throttle for huge numbers of predecessors (compile speed problems)
 static cl::opt<unsigned>
-TailMergeThreshold("tail-merge-threshold", 
+TailMergeThreshold("tail-merge-threshold",
           cl::desc("Max number of predecessors to consider tail merging"),
           cl::init(150), cl::Hidden);
 
+// Heuristic for tail merging (and, inversely, tail duplication).
+// TODO: This should be replaced with a target query.
+static cl::opt<unsigned>
+TailMergeSize("tail-merge-size", 
+          cl::desc("Min number of instructions to consider tail merging"),
+                              cl::init(3), cl::Hidden);
 
 char BranchFolderPass::ID = 0;
 
-FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) { 
+FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) {
   return new BranchFolderPass(DefaultEnableTailMerge);
 }
 
@@ -63,7 +70,6 @@
 }
 
 
-
 BranchFolder::BranchFolder(bool defaultEnableTailMerge) {
   switch (FlagEnableTailMerge) {
   case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
@@ -77,12 +83,12 @@
 void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
   assert(MBB->pred_empty() && "MBB must be dead!");
   DEBUG(errs() << "\nRemoving MBB: " << *MBB);
-  
+
   MachineFunction *MF = MBB->getParent();
   // drop all successors.
   while (!MBB->succ_empty())
     MBB->removeSuccessor(MBB->succ_end()-1);
-  
+
   // If there are any labels in the basic block, unregister them from
   // MachineModuleInfo.
   if (MMI && !MBB->empty()) {
@@ -93,7 +99,7 @@
         MMI->InvalidateLabel(I->getOperand(0).getImm());
     }
   }
-  
+
   // Remove the block.
   MF->erase(MBB);
 }
@@ -190,7 +196,7 @@
     // Figure out how these jump tables should be merged.
     std::vector<unsigned> JTMapping;
     JTMapping.reserve(JTs.size());
-    
+
     // We always keep the 0th jump table.
     JTMapping.push_back(0);
 
@@ -202,7 +208,7 @@
       else
         JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs));
     }
-    
+
     // If a jump table was merge with another one, walk the function rewriting
     // references to jump tables to reference the new JT ID's.  Keep track of
     // whether we see a jump table idx, if not, we can delete the JT.
@@ -221,7 +227,7 @@
           JTIsLive.set(NewIdx);
         }
     }
-   
+
     // Finally, remove dead jump tables.  This happens either because the
     // indirect jump was unreachable (and thus deleted) or because the jump
     // table was merged with some other one.
@@ -245,7 +251,7 @@
   unsigned Hash = MI->getOpcode();
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &Op = MI->getOperand(i);
-    
+
     // Merge in bits from the operand if easy.
     unsigned OperandHash = 0;
     switch (Op.getType()) {
@@ -267,31 +273,30 @@
       break;
     default: break;
     }
-    
+
     Hash += ((OperandHash << 3) | Op.getType()) << (i&31);
   }
   return Hash;
 }
 
 /// HashEndOfMBB - Hash the last few instructions in the MBB.  For blocks
-/// with no successors, we hash two instructions, because cross-jumping 
-/// only saves code when at least two instructions are removed (since a 
+/// with no successors, we hash two instructions, because cross-jumping
+/// only saves code when at least two instructions are removed (since a
 /// branch must be inserted).  For blocks with a successor, one of the
 /// two blocks to be tail-merged will end with a branch already, so
 /// it gains to cross-jump even for one instruction.
-
 static unsigned HashEndOfMBB(const MachineBasicBlock *MBB,
                              unsigned minCommonTailLength) {
   MachineBasicBlock::const_iterator I = MBB->end();
   if (I == MBB->begin())
     return 0;   // Empty MBB.
-  
+
   --I;
   unsigned Hash = HashMachineInstr(I);
-    
+
   if (I == MBB->begin() || minCommonTailLength == 1)
     return Hash;   // Single instr MBB.
-  
+
   --I;
   // Hash in the second-to-last instruction.
   Hash ^= HashMachineInstr(I) << 2;
@@ -307,11 +312,11 @@
                                         MachineBasicBlock::iterator &I2) {
   I1 = MBB1->end();
   I2 = MBB2->end();
-  
+
   unsigned TailLen = 0;
   while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
     --I1; --I2;
-    if (!I1->isIdenticalTo(I2) || 
+    if (!I1->isIdenticalTo(I2) ||
         // FIXME: This check is dubious. It's used to get around a problem where
         // people incorrectly expect inline asm directives to remain in the same
         // relative order. This is untenable because normal compiler
@@ -332,11 +337,11 @@
 void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
                                            MachineBasicBlock *NewDest) {
   MachineBasicBlock *OldBB = OldInst->getParent();
-  
+
   // Remove all the old successors of OldBB from the CFG.
   while (!OldBB->succ_empty())
     OldBB->removeSuccessor(OldBB->succ_begin());
-  
+
   // Remove all the dead instructions from the end of OldBB.
   OldBB->erase(OldInst, OldBB->end());
 
@@ -361,10 +366,10 @@
 
   // Move all the successors of this block to the specified block.
   NewMBB->transferSuccessors(&CurMBB);
- 
+
   // Add an edge from CurMBB to NewMBB for the fall-through.
   CurMBB.addSuccessor(NewMBB);
-  
+
   // Splice the code over.
   NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
 
@@ -404,7 +409,6 @@
 // branches temporarily for tail merging).  In the case where CurMBB ends
 // with a conditional branch to the next block, optimize by reversing the
 // test and conditionally branching to SuccMBB instead.
-
 static void FixTail(MachineBasicBlock* CurMBB, MachineBasicBlock *SuccBB,
                     const TargetInstrInfo *TII) {
   MachineFunction *MF = CurMBB->getParent();
@@ -445,6 +449,25 @@
     }
 }
 
+/// CountTerminators - Count the number of terminators in the given
+/// block and set I to the position of the first non-terminator, if there
+/// is one, or MBB->end() otherwise.
+static unsigned CountTerminators(MachineBasicBlock *MBB,
+                                 MachineBasicBlock::iterator &I) {
+  I = MBB->end();
+  unsigned NumTerms = 0;
+  for (;;) {
+    if (I == MBB->begin()) {
+      I = MBB->end();
+      break;
+    }
+    --I;
+    if (!I->getDesc().isTerminator()) break;
+    ++NumTerms;
+  }
+  return NumTerms;
+}
+
 /// ProfitableToMerge - Check if two machine basic blocks have a common tail
 /// and decide if it would be profitable to merge those tails.  Return the
 /// length of the common tail and iterators to the first common instruction
@@ -454,16 +477,35 @@
                               unsigned minCommonTailLength,
                               unsigned &CommonTailLen,
                               MachineBasicBlock::iterator &I1,
-                              MachineBasicBlock::iterator &I2) {
+                              MachineBasicBlock::iterator &I2,
+                              MachineBasicBlock *SuccBB,
+                              MachineBasicBlock *PredBB) {
   CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
   MachineFunction *MF = MBB1->getParent();
 
-  if (CommonTailLen >= minCommonTailLength)
-    return true;
-
   if (CommonTailLen == 0)
     return false;
 
+  // It's almost always profitable to merge any number of non-terminator
+  // instructions with the block that falls through into the common successor.
+  if (MBB1 == PredBB || MBB2 == PredBB) {
+    MachineBasicBlock::iterator I;
+    unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I);
+    if (CommonTailLen > NumTerms)
+      return true;
+  }
+
+  // If both blocks have an unconditional branch temporarily stripped out,
+  // treat that as an additional common instruction.
+  if (MBB1 != PredBB && MBB2 != PredBB && 
+      !MBB1->back().getDesc().isBarrier() &&
+      !MBB2->back().getDesc().isBarrier())
+    --minCommonTailLength;
+
+  // Check if the common tail is long enough to be worthwhile.
+  if (CommonTailLen >= minCommonTailLength)
+    return true;
+
   // If we are optimizing for code size, 1 instruction in common is enough if
   // we don't have to split a block.  At worst we will be replacing a
   // fallthrough into the common tail with a branch, which at worst breaks
@@ -476,29 +518,32 @@
 }
 
 /// ComputeSameTails - Look through all the blocks in MergePotentials that have
-/// hash CurHash (guaranteed to match the last element).   Build the vector 
+/// hash CurHash (guaranteed to match the last element).  Build the vector
 /// SameTails of all those that have the (same) largest number of instructions
 /// in common of any pair of these blocks.  SameTails entries contain an
-/// iterator into MergePotentials (from which the MachineBasicBlock can be 
-/// found) and a MachineBasicBlock::iterator into that MBB indicating the 
+/// iterator into MergePotentials (from which the MachineBasicBlock can be
+/// found) and a MachineBasicBlock::iterator into that MBB indicating the
 /// instruction where the matching code sequence begins.
 /// Order of elements in SameTails is the reverse of the order in which
 /// those blocks appear in MergePotentials (where they are not necessarily
 /// consecutive).
-unsigned BranchFolder::ComputeSameTails(unsigned CurHash, 
-                                        unsigned minCommonTailLength) {
+unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
+                                        unsigned minCommonTailLength,
+                                        MachineBasicBlock *SuccBB,
+                                        MachineBasicBlock *PredBB) {
   unsigned maxCommonTailLength = 0U;
   SameTails.clear();
   MachineBasicBlock::iterator TrialBBI1, TrialBBI2;
   MPIterator HighestMPIter = prior(MergePotentials.end());
   for (MPIterator CurMPIter = prior(MergePotentials.end()),
-                  B = MergePotentials.begin(); 
-       CurMPIter!=B && CurMPIter->first==CurHash;
+                  B = MergePotentials.begin();
+       CurMPIter!=B && CurMPIter->first == CurHash;
        --CurMPIter) {
-    for (MPIterator I = prior(CurMPIter); I->first==CurHash ; --I) {
+    for (MPIterator I = prior(CurMPIter); I->first == CurHash ; --I) {
       unsigned CommonTailLen;
       if (ProfitableToMerge(CurMPIter->second, I->second, minCommonTailLength,
-                            CommonTailLen, TrialBBI1, TrialBBI2)) {
+                            CommonTailLen, TrialBBI1, TrialBBI2,
+                            SuccBB, PredBB)) {
         if (CommonTailLen > maxCommonTailLength) {
           SameTails.clear();
           maxCommonTailLength = CommonTailLen;
@@ -509,7 +554,7 @@
             CommonTailLen == maxCommonTailLength)
           SameTails.push_back(std::make_pair(I, TrialBBI2));
       }
-      if (I==B)
+      if (I == B)
         break;
     }
   }
@@ -518,18 +563,18 @@
 
 /// RemoveBlocksWithHash - Remove all blocks with hash CurHash from
 /// MergePotentials, restoring branches at ends of blocks as appropriate.
-void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, 
+void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
                                         MachineBasicBlock* SuccBB,
                                         MachineBasicBlock* PredBB) {
   MPIterator CurMPIter, B;
-  for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin(); 
-       CurMPIter->first==CurHash;
+  for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin();
+       CurMPIter->first == CurHash;
        --CurMPIter) {
     // Put the unconditional branch back, if we need one.
     MachineBasicBlock *CurMBB = CurMPIter->second;
     if (SuccBB && CurMBB != PredBB)
       FixTail(CurMBB, SuccBB, TII);
-    if (CurMPIter==B)
+    if (CurMPIter == B)
       break;
   }
   if (CurMPIter->first!=CurHash)
@@ -545,15 +590,15 @@
   unsigned TimeEstimate = ~0U;
   for (i=0, commonTailIndex=0; i<SameTails.size(); i++) {
     // Use PredBB if possible; that doesn't require a new branch.
-    if (SameTails[i].first->second==PredBB) {
+    if (SameTails[i].first->second == PredBB) {
       commonTailIndex = i;
       break;
     }
     // Otherwise, make a (fairly bogus) choice based on estimate of
     // how long it will take the various blocks to execute.
-    unsigned t = EstimateRuntime(SameTails[i].first->second->begin(), 
+    unsigned t = EstimateRuntime(SameTails[i].first->second->begin(),
                                  SameTails[i].second);
-    if (t<=TimeEstimate) {
+    if (t <= TimeEstimate) {
       TimeEstimate = t;
       commonTailIndex = i;
     }
@@ -562,14 +607,15 @@
   MachineBasicBlock::iterator BBI = SameTails[commonTailIndex].second;
   MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second;
 
-  DEBUG(errs() << "\nSplitting " << MBB->getNumber() << ", size "
+  DEBUG(errs() << "\nSplitting BB#" << MBB->getNumber() << ", size "
                << maxCommonTailLength);
 
   MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI);
   SameTails[commonTailIndex].first->second = newMBB;
   SameTails[commonTailIndex].second = newMBB->begin();
+
   // If we split PredBB, newMBB is the new predecessor.
-  if (PredBB==MBB)
+  if (PredBB == MBB)
     PredBB = newMBB;
 
   return commonTailIndex;
@@ -579,20 +625,66 @@
 // successor, or all have no successor) can be tail-merged.  If there is a
 // successor, any blocks in MergePotentials that are not tail-merged and
 // are not immediately before Succ must have an unconditional branch to
-// Succ added (but the predecessor/successor lists need no adjustment).  
+// Succ added (but the predecessor/successor lists need no adjustment).
 // The lone predecessor of Succ that falls through into Succ,
 // if any, is given in PredBB.
 
-bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB,
-                                  MachineBasicBlock* PredBB) {
+bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
+                                      MachineBasicBlock* PredBB) {
   bool MadeChange = false;
 
-  // It doesn't make sense to save a single instruction since tail merging
-  // will add a jump.
-  // FIXME: Ask the target to provide the threshold?
-  unsigned minCommonTailLength = (SuccBB ? 1 : 2) + 1;
-  
-  DEBUG(errs() << "\nTryMergeBlocks " << MergePotentials.size() << '\n');
+  // Except for the special cases below, tail-merge if there are at least
+  // this many instructions in common.
+  unsigned minCommonTailLength = TailMergeSize;
+
+  // If there's a successor block, there are some cases which don't require
+  // new branching and as such are very likely to be profitable.
+  if (SuccBB) {
+    if (SuccBB->pred_size() == MergePotentials.size() &&
+        !MergePotentials[0].second->empty()) {
+      // If all the predecessors have at least one tail instruction in common,
+      // merging is very likely to be a win since it won't require an increase
+      // in static branches, and it will decrease the static instruction count.
+      bool AllPredsMatch = true;
+      MachineBasicBlock::iterator FirstNonTerm;
+      unsigned MinNumTerms = CountTerminators(MergePotentials[0].second,
+                                              FirstNonTerm);
+      if (FirstNonTerm != MergePotentials[0].second->end()) {
+        for (unsigned i = 1, e = MergePotentials.size(); i != e; ++i) {
+          MachineBasicBlock::iterator OtherFirstNonTerm;
+          unsigned NumTerms = CountTerminators(MergePotentials[0].second,
+                                               OtherFirstNonTerm);
+          if (NumTerms < MinNumTerms)
+            MinNumTerms = NumTerms;
+          if (OtherFirstNonTerm == MergePotentials[i].second->end() ||
+              OtherFirstNonTerm->isIdenticalTo(FirstNonTerm)) {
+            AllPredsMatch = false;
+            break;
+          }
+        }
+
+        // If they all have an instruction in common, do any amount of merging.
+        if (AllPredsMatch)
+          minCommonTailLength = MinNumTerms + 1;
+      }
+    }
+  }
+
+  DEBUG(errs() << "\nTryTailMergeBlocks: ";
+        for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+          errs() << "BB#" << MergePotentials[i].second->getNumber()
+                 << (i == e-1 ? "" : ", ");
+        errs() << "\n";
+        if (SuccBB) {
+          errs() << "  with successor BB#" << SuccBB->getNumber() << '\n';
+          if (PredBB)
+            errs() << "  which has fall-through from BB#"
+                   << PredBB->getNumber() << "\n";
+        }
+        errs() << "Looking for common tails of at least "
+               << minCommonTailLength << " instruction"
+               << (minCommonTailLength == 1 ? "" : "s") << '\n';
+       );
 
   // Sort by hash value so that blocks with identical end sequences sort
   // together.
@@ -600,14 +692,15 @@
 
   // Walk through equivalence sets looking for actual exact matches.
   while (MergePotentials.size() > 1) {
-    unsigned CurHash  = prior(MergePotentials.end())->first;
-    
+    unsigned CurHash  = MergePotentials.back().first;
+
     // Build SameTails, identifying the set of blocks with this hash code
     // and with the maximum number of instructions in common.
-    unsigned maxCommonTailLength = ComputeSameTails(CurHash, 
-                                                    minCommonTailLength);
+    unsigned maxCommonTailLength = ComputeSameTails(CurHash,
+                                                    minCommonTailLength,
+                                                    SuccBB, PredBB);
 
-    // If we didn't find any pair that has at least minCommonTailLength 
+    // If we didn't find any pair that has at least minCommonTailLength
     // instructions in common, remove all blocks with this hash code and retry.
     if (SameTails.empty()) {
       RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
@@ -623,27 +716,35 @@
     unsigned int commonTailIndex, i;
     for (commonTailIndex=SameTails.size(), i=0; i<SameTails.size(); i++) {
       MachineBasicBlock *MBB = SameTails[i].first->second;
-      if (MBB->begin() == SameTails[i].second && MBB != EntryBB) {
+      if (MBB == EntryBB)
+        continue;
+      if (MBB == PredBB) {
         commonTailIndex = i;
-        if (MBB==PredBB)
-          break;
+        break;
       }
+      if (MBB->begin() == SameTails[i].second)
+        commonTailIndex = i;
     }
 
-    if (commonTailIndex==SameTails.size()) {
+    if (commonTailIndex == SameTails.size() ||
+        (SameTails[commonTailIndex].first->second == PredBB &&
+         SameTails[commonTailIndex].first->second->begin() !=
+           SameTails[i].second)) {
       // None of the blocks consist entirely of the common tail.
       // Split a block so that one does.
-      commonTailIndex = CreateCommonTailOnlyBlock(PredBB,  maxCommonTailLength);
+      commonTailIndex = CreateCommonTailOnlyBlock(PredBB, maxCommonTailLength);
     }
 
     MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second;
     // MBB is common tail.  Adjust all other BB's to jump to this one.
     // Traversal must be forwards so erases work.
-    DEBUG(errs() << "\nUsing common tail " << MBB->getNumber() << " for ");
-    for (unsigned int i=0; i<SameTails.size(); ++i) {
-      if (commonTailIndex==i)
+    DEBUG(errs() << "\nUsing common tail in BB#" << MBB->getNumber()
+                 << " for ");
+    for (unsigned int i=0, e = SameTails.size(); i != e; ++i) {
+      if (commonTailIndex == i)
         continue;
-      DEBUG(errs() << SameTails[i].first->second->getNumber() << ",");
+      DEBUG(errs() << "BB#" << SameTails[i].first->second->getNumber()
+                   << (i == e-1 ? "" : ", "));
       // Hack the end off BB i, making it jump to BB commonTailIndex instead.
       ReplaceTailWithBranchTo(SameTails[i].second, MBB);
       // BB i is no longer a predecessor of SuccBB; remove it from the worklist.
@@ -660,7 +761,7 @@
 bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
 
   if (!EnableTailMerge) return false;
- 
+
   bool MadeChange = false;
 
   // First find blocks with no successors.
@@ -669,17 +770,18 @@
     if (I->succ_empty())
       MergePotentials.push_back(std::make_pair(HashEndOfMBB(I, 2U), I));
   }
+
   // See if we can do any tail merging on those.
   if (MergePotentials.size() < TailMergeThreshold &&
       MergePotentials.size() >= 2)
-    MadeChange |= TryMergeBlocks(NULL, NULL);
+    MadeChange |= TryTailMergeBlocks(NULL, NULL);
 
   // Look at blocks (IBB) with multiple predecessors (PBB).
   // We change each predecessor to a canonical form, by
   // (1) temporarily removing any unconditional branch from the predecessor
   // to IBB, and
   // (2) alter conditional branches so they branch to the other block
-  // not IBB; this may require adding back an unconditional branch to IBB 
+  // not IBB; this may require adding back an unconditional branch to IBB
   // later, where there wasn't one coming in.  E.g.
   //   Bcc IBB
   //   fallthrough to QBB
@@ -693,18 +795,19 @@
   // a compile-time infinite loop repeatedly doing and undoing the same
   // transformations.)
 
-  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+  for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
+       I != E; ++I) {
     if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) {
       SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
       MachineBasicBlock *IBB = I;
       MachineBasicBlock *PredBB = prior(I);
       MergePotentials.clear();
-      for (MachineBasicBlock::pred_iterator P = I->pred_begin(), 
+      for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
                                             E2 = I->pred_end();
            P != E2; ++P) {
         MachineBasicBlock* PBB = *P;
         // Skip blocks that loop to themselves, can't tail merge these.
-        if (PBB==IBB)
+        if (PBB == IBB)
           continue;
         // Visit each predecessor only once.
         if (!UniquePreds.insert(PBB))
@@ -715,7 +818,7 @@
           // Failing case:  IBB is the target of a cbr, and
           // we cannot reverse the branch.
           SmallVector<MachineOperand, 4> NewCond(Cond);
-          if (!Cond.empty() && TBB==IBB) {
+          if (!Cond.empty() && TBB == IBB) {
             if (TII->ReverseBranchCondition(NewCond))
               continue;
             // This is the QBB case described above
@@ -730,7 +833,7 @@
             MachineBasicBlock* PredNextBB = NULL;
             if (IP!=MF.end())
               PredNextBB = IP;
-            if (TBB==NULL) {
+            if (TBB == NULL) {
               if (IBB!=PredNextBB)      // fallthrough
                 continue;
             } else if (FBB) {
@@ -749,19 +852,19 @@
             TII->RemoveBranch(*PBB);
             if (!Cond.empty())
               // reinsert conditional branch only, for now
-              TII->InsertBranch(*PBB, (TBB==IBB) ? FBB : TBB, 0, NewCond);
+              TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond);
           }
           MergePotentials.push_back(std::make_pair(HashEndOfMBB(PBB, 1U), *P));
         }
       }
-    if (MergePotentials.size() >= 2)
-      MadeChange |= TryMergeBlocks(I, PredBB);
-    // Reinsert an unconditional branch if needed.
-    // The 1 below can occur as a result of removing blocks in TryMergeBlocks.
-    PredBB = prior(I);      // this may have been changed in TryMergeBlocks
-    if (MergePotentials.size()==1 && 
-        MergePotentials.begin()->second != PredBB)
-      FixTail(MergePotentials.begin()->second, I, TII);
+      if (MergePotentials.size() >= 2)
+        MadeChange |= TryTailMergeBlocks(IBB, PredBB);
+      // Reinsert an unconditional branch if needed.
+      // The 1 below can occur as a result of removing blocks in TryTailMergeBlocks.
+      PredBB = prior(I);      // this may have been changed in TryTailMergeBlocks
+      if (MergePotentials.size() == 1 &&
+          MergePotentials.begin()->second != PredBB)
+        FixTail(MergePotentials.begin()->second, IBB, TII);
     }
   }
   return MadeChange;
@@ -773,14 +876,14 @@
 
 bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
   bool MadeChange = false;
-  
+
   // Make sure blocks are numbered in order
   MF.RenumberBlocks();
 
   for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
     MachineBasicBlock *MBB = I++;
     MadeChange |= OptimizeBlock(MBB);
-    
+
     // If it is dead, remove it.
     if (MBB->pred_empty()) {
       RemoveDeadBlock(MBB);
@@ -801,7 +904,7 @@
 ///
 bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB,
                                   bool BranchUnAnalyzable,
-                                  MachineBasicBlock *TBB, 
+                                  MachineBasicBlock *TBB,
                                   MachineBasicBlock *FBB,
                                   const SmallVectorImpl<MachineOperand> &Cond) {
   MachineFunction::iterator Fallthrough = CurBB;
@@ -809,13 +912,21 @@
   // If FallthroughBlock is off the end of the function, it can't fall through.
   if (Fallthrough == CurBB->getParent()->end())
     return false;
-  
+
   // If FallthroughBlock isn't a successor of CurBB, no fallthrough is possible.
   if (!CurBB->isSuccessor(Fallthrough))
     return false;
-  
-  // If we couldn't analyze the branch, assume it could fall through.
-  if (BranchUnAnalyzable) return true;
+
+  // If we couldn't analyze the branch, examine the last instruction.
+  // If the block doesn't end in a known control barrier, assume fallthrough
+  // is possible. The isPredicable check is needed because this code can be
+  // called during IfConversion, where an instruction which is normally a
+  // Barrier is predicated and thus no longer an actual control barrier. This
+  // is over-conservative though, because if an instruction isn't actually
+  // predicated we could still treat it like a barrier.
+  if (BranchUnAnalyzable)
+    return CurBB->empty() || !CurBB->back().getDesc().isBarrier() ||
+           CurBB->back().getDesc().isPredicable();
   
   // If there is no branch, control always falls through.
   if (TBB == 0) return true;
@@ -825,11 +936,11 @@
   if (MachineFunction::iterator(TBB) == Fallthrough ||
       MachineFunction::iterator(FBB) == Fallthrough)
     return true;
-  
-  // If it's an unconditional branch to some block not the fall through, it 
+
+  // If it's an unconditional branch to some block not the fall through, it
   // doesn't fall through.
   if (Cond.empty()) return false;
-  
+
   // Otherwise, if it is conditional and has no explicit false block, it falls
   // through.
   return FBB == 0;
@@ -853,14 +964,14 @@
 /// fall-through to MBB1 than to fall through into MBB2.  This has to return
 /// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will
 /// result in infinite loops.
-static bool IsBetterFallthrough(MachineBasicBlock *MBB1, 
+static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
                                 MachineBasicBlock *MBB2) {
   // Right now, we use a simple heuristic.  If MBB2 ends with a call, and
   // MBB1 doesn't, we prefer to fall through into MBB1.  This allows us to
   // optimize branches that branch to either a return block or an assert block
   // into a fallthrough to the return.
   if (MBB1->empty() || MBB2->empty()) return false;
- 
+
   // If there is a clear successor ordering we make sure that one block
   // will fall through to the next
   if (MBB1->isSuccessor(MBB2)) return true;
@@ -871,14 +982,112 @@
   return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall();
 }
 
+/// TailDuplicate - MBB unconditionally branches to SuccBB. If it is profitable,
+/// duplicate SuccBB's contents in MBB to eliminate the branch.
+bool BranchFolder::TailDuplicate(MachineBasicBlock *TailBB,
+                                 bool PrevFallsThrough,
+                                 MachineFunction &MF) {
+  // Don't try to tail-duplicate single-block loops.
+  if (TailBB->isSuccessor(TailBB))
+    return false;
+
+  // Don't tail-duplicate a block which will soon be folded into its successor.
+  if (TailBB->succ_size() == 1 &&
+      TailBB->succ_begin()[0]->pred_size() == 1)
+    return false;
+
+  // Duplicate up to one less that the tail-merge threshold, so that we don't
+  // get into an infinite loop between duplicating and merging. When optimizing
+  // for size, duplicate only one, because one branch instruction can be
+  // eliminated to compensate for the duplication.
+  unsigned MaxDuplicateCount = 
+    MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) ?
+      1 : (TailMergeSize - 1);
+
+  // Check the instructions in the block to determine whether tail-duplication
+  // is invalid or unlikely to be unprofitable.
+  unsigned i = 0;
+  bool HasCall = false;
+  for (MachineBasicBlock::iterator I = TailBB->begin();
+       I != TailBB->end(); ++I, ++i) {
+    // Non-duplicable things shouldn't be tail-duplicated.
+    if (I->getDesc().isNotDuplicable()) return false;
+    // Don't duplicate more than the threshold.
+    if (i == MaxDuplicateCount) return false;
+    // Remember if we saw a call.
+    if (I->getDesc().isCall()) HasCall = true;
+  }
+  // Heuristically, don't tail-duplicate calls if it would expand code size,
+  // as it's less likely to be worth the extra cost.
+  if (i > 1 && HasCall)
+    return false;
+
+  // Iterate through all the unique predecessors and tail-duplicate this
+  // block into them, if possible. Copying the list ahead of time also
+  // avoids trouble with the predecessor list reallocating.
+  bool Changed = false;
+  SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
+                                               TailBB->pred_end());
+  for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+       PE = Preds.end(); PI != PE; ++PI) {
+    MachineBasicBlock *PredBB = *PI;
+
+    assert(TailBB != PredBB &&
+           "Single-block loop should have been rejected earlier!");
+    if (PredBB->succ_size() > 1) continue;
+
+    MachineBasicBlock *PredTBB, *PredFBB;
+    SmallVector<MachineOperand, 4> PredCond;
+    if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+      continue;
+    if (!PredCond.empty())
+      continue;
+    // EH edges are ignored by AnalyzeBranch.
+    if (PredBB->succ_size() != 1)
+      continue;
+    // Don't duplicate into a fall-through predecessor unless its the
+    // only predecessor.
+    if (&*next(MachineFunction::iterator(PredBB)) == TailBB &&
+        PrevFallsThrough &&
+        TailBB->pred_size() != 1)
+      continue;
+
+    DEBUG(errs() << "\nTail-duplicating into PredBB: " << *PredBB
+                 << "From Succ: " << *TailBB);
+
+    // Remove PredBB's unconditional branch.
+    TII->RemoveBranch(*PredBB);
+    // Clone the contents of TailBB into PredBB.
+    for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end();
+         I != E; ++I) {
+      MachineInstr *NewMI = MF.CloneMachineInstr(I);
+      PredBB->insert(PredBB->end(), NewMI);
+    }
+
+    // Update the CFG.
+    PredBB->removeSuccessor(PredBB->succ_begin());
+    assert(PredBB->succ_empty() &&
+           "TailDuplicate called on block with multiple successors!");
+    for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
+         E = TailBB->succ_end(); I != E; ++I)
+       PredBB->addSuccessor(*I);
+
+    Changed = true;
+  }
+
+  return Changed;
+}
+
 /// OptimizeBlock - Analyze and optimize control flow related to the specified
 /// block.  This is never called on the entry block.
 bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
   bool MadeChange = false;
+  MachineFunction &MF = *MBB->getParent();
+ReoptimizeBlock:
 
   MachineFunction::iterator FallThrough = MBB;
   ++FallThrough;
-  
+
   // If this block is empty, make everyone use its fall-through, not the block
   // explicitly.  Landing pads should not do this since the landing-pad table
   // points to this block.  Blocks with their addresses taken shouldn't be
@@ -886,8 +1095,8 @@
   if (MBB->empty() && !MBB->isLandingPad() && !MBB->hasAddressTaken()) {
     // Dead block?  Leave for cleanup later.
     if (MBB->pred_empty()) return MadeChange;
-    
-    if (FallThrough == MBB->getParent()->end()) {
+
+    if (FallThrough == MF.end()) {
       // TODO: Simplify preds to not branch here if possible!
     } else {
       // Rewrite all predecessors of the old block to go to the fallthrough
@@ -898,8 +1107,7 @@
       }
       // If MBB was the target of a jump table, update jump tables to go to the
       // fallthrough instead.
-      MBB->getParent()->getJumpTableInfo()->
-        ReplaceMBBInJumpTables(MBB, FallThrough);
+      MF.getJumpTableInfo()->ReplaceMBBInJumpTables(MBB, FallThrough);
       MadeChange = true;
     }
     return MadeChange;
@@ -917,18 +1125,38 @@
     // If the CFG for the prior block has extra edges, remove them.
     MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB,
                                               !PriorCond.empty());
-    
+
     // If the previous branch is conditional and both conditions go to the same
     // destination, remove the branch, replacing it with an unconditional one or
     // a fall-through.
     if (PriorTBB && PriorTBB == PriorFBB) {
       TII->RemoveBranch(PrevBB);
-      PriorCond.clear(); 
+      PriorCond.clear();
       if (PriorTBB != MBB)
         TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond);
       MadeChange = true;
       ++NumBranchOpts;
-      return OptimizeBlock(MBB);
+      goto ReoptimizeBlock;
+    }
+
+    // If the previous block unconditionally falls through to this block and
+    // this block has no other predecessors, move the contents of this block
+    // into the prior block. This doesn't usually happen when SimplifyCFG
+    // has been used, but it can happen tail duplication eliminates all the
+    // non-branch predecessors of a block leaving only the fall-through edge.
+    // This has to check PrevBB->succ_size() because EH edges are ignored by
+    // AnalyzeBranch.
+    if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&
+        PrevBB.succ_size() == 1 &&
+        !MBB->hasAddressTaken()) {
+      DEBUG(errs() << "\nMerging into block: " << PrevBB
+                   << "From MBB: " << *MBB);
+      PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end());
+      PrevBB.removeSuccessor(PrevBB.succ_begin());;
+      assert(PrevBB.succ_empty());
+      PrevBB.transferSuccessors(MBB);
+      MadeChange = true;
+      return MadeChange;
     }
     
     // If the previous branch *only* branches to *this* block (conditional or
@@ -937,9 +1165,9 @@
       TII->RemoveBranch(PrevBB);
       MadeChange = true;
       ++NumBranchOpts;
-      return OptimizeBlock(MBB);
+      goto ReoptimizeBlock;
     }
-    
+
     // If the prior block branches somewhere else on the condition and here if
     // the condition is false, remove the uncond second branch.
     if (PriorFBB == MBB) {
@@ -947,9 +1175,9 @@
       TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond);
       MadeChange = true;
       ++NumBranchOpts;
-      return OptimizeBlock(MBB);
+      goto ReoptimizeBlock;
     }
-    
+
     // If the prior block branches here on true and somewhere else on false, and
     // if the branch condition is reversible, reverse the branch to create a
     // fall-through.
@@ -960,10 +1188,10 @@
         TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond);
         MadeChange = true;
         ++NumBranchOpts;
-        return OptimizeBlock(MBB);
+        goto ReoptimizeBlock;
       }
     }
-    
+
     // If this block has no successors (e.g. it is a return block or ends with
     // a call to a no-return function like abort or __cxa_throw) and if the pred
     // falls through into this block, and if it would otherwise fall through
@@ -976,13 +1204,13 @@
         MachineFunction::iterator(PriorTBB) == FallThrough &&
         !CanFallThrough(MBB)) {
       bool DoTransform = true;
-      
+
       // We have to be careful that the succs of PredBB aren't both no-successor
       // blocks.  If neither have successors and if PredBB is the second from
       // last block in the function, we'd just keep swapping the two blocks for
       // last.  Only do the swap if one is clearly better to fall through than
       // the other.
-      if (FallThrough == --MBB->getParent()->end() &&
+      if (FallThrough == --MF.end() &&
           !IsBetterFallthrough(PriorTBB, MBB))
         DoTransform = false;
 
@@ -1000,20 +1228,20 @@
       if (DoTransform && !MBB->succ_empty() &&
           (!CanFallThrough(PriorTBB) || PriorTBB->empty()))
         DoTransform = false;
-      
-      
+
+
       if (DoTransform) {
         // Reverse the branch so we will fall through on the previous true cond.
         SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
         if (!TII->ReverseBranchCondition(NewPriorCond)) {
           DEBUG(errs() << "\nMoving MBB: " << *MBB
                        << "To make fallthrough to: " << *PriorTBB << "\n");
-          
+
           TII->RemoveBranch(PrevBB);
           TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond);
 
           // Move this block to the end of the function.
-          MBB->moveAfter(--MBB->getParent()->end());
+          MBB->moveAfter(--MF.end());
           MadeChange = true;
           ++NumBranchOpts;
           return MadeChange;
@@ -1021,7 +1249,7 @@
       }
     }
   }
-  
+
   // Analyze the branch in the current block.
   MachineBasicBlock *CurTBB = 0, *CurFBB = 0;
   SmallVector<MachineOperand, 4> CurCond;
@@ -1030,7 +1258,7 @@
     // If the CFG for the prior block has extra edges, remove them.
     MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty());
 
-    // If this is a two-way branch, and the FBB branches to this block, reverse 
+    // If this is a two-way branch, and the FBB branches to this block, reverse
     // the condition so the single-basic-block loop is faster.  Instead of:
     //    Loop: xxx; jcc Out; jmp Loop
     // we want:
@@ -1042,14 +1270,14 @@
         TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond);
         MadeChange = true;
         ++NumBranchOpts;
-        return OptimizeBlock(MBB);
+        goto ReoptimizeBlock;
       }
     }
-    
-    
+
+
     // If this branch is the only thing in its block, see if we can forward
     // other blocks across it.
-    if (CurTBB && CurCond.empty() && CurFBB == 0 && 
+    if (CurTBB && CurCond.empty() && CurFBB == 0 &&
         MBB->begin()->getDesc().isBranch() && CurTBB != MBB &&
         !MBB->hasAddressTaken()) {
       // This block may contain just an unconditional branch.  Because there can
@@ -1068,7 +1296,7 @@
             !PrevBB.isSuccessor(MBB)) {
           // If the prior block falls through into us, turn it into an
           // explicit branch to us to make updates simpler.
-          if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) && 
+          if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) &&
               PriorTBB != MBB && PriorFBB != MBB) {
             if (PriorTBB == 0) {
               assert(PriorCond.empty() && PriorFBB == 0 &&
@@ -1104,18 +1332,17 @@
                       NewCurFBB, NewCurCond, true);
               if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
                 TII->RemoveBranch(*PMBB);
-                NewCurCond.clear(); 
+                NewCurCond.clear();
                 TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond);
                 MadeChange = true;
                 ++NumBranchOpts;
-                PMBB->CorrectExtraCFGEdges(NewCurTBB, NewCurFBB, false);
+                PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false);
               }
             }
           }
 
           // Change any jumptables to go to the new MBB.
-          MBB->getParent()->getJumpTableInfo()->
-            ReplaceMBBInJumpTables(MBB, CurTBB);
+          MF.getJumpTableInfo()->ReplaceMBBInJumpTables(MBB, CurTBB);
           if (DidChange) {
             ++NumBranchOpts;
             MadeChange = true;
@@ -1123,22 +1350,32 @@
           }
         }
       }
-      
+
       // Add the branch back if the block is more than just an uncond branch.
       TII->InsertBranch(*MBB, CurTBB, 0, CurCond);
     }
   }
 
+  // Now we know that there was no fall-through into this block, check to
+  // see if it has a fall-through into its successor.
+  bool CurFallsThru = CanFallThrough(MBB, CurUnAnalyzable, CurTBB, CurFBB, 
+                                     CurCond);
+  bool PrevFallsThru = CanFallThrough(&PrevBB, PriorUnAnalyzable,
+                                      PriorTBB, PriorFBB, PriorCond);
+
+  // If this block is small, unconditionally branched to, and does not
+  // fall through, tail-duplicate its instructions into its predecessors
+  // to eliminate a (dynamic) branch.
+  if (!CurFallsThru)
+    if (TailDuplicate(MBB, PrevFallsThru, MF)) {
+      MadeChange = true;
+      return MadeChange;
+    }
+
   // If the prior block doesn't fall through into this block, and if this
   // block doesn't fall through into some other block, see if we can find a
   // place to move this block where a fall-through will happen.
-  if (!CanFallThrough(&PrevBB, PriorUnAnalyzable,
-                      PriorTBB, PriorFBB, PriorCond)) {
-    // Now we know that there was no fall-through into this block, check to
-    // see if it has a fall-through into its successor.
-    bool CurFallsThru = CanFallThrough(MBB, CurUnAnalyzable, CurTBB, CurFBB, 
-                                       CurCond);
-
+  if (!PrevFallsThru) {
     if (!MBB->isLandingPad()) {
       // Check all the predecessors of this block.  If one of them has no fall
       // throughs, move this block right after it.
@@ -1147,12 +1384,15 @@
         // Analyze the branch at the end of the pred.
         MachineBasicBlock *PredBB = *PI;
         MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough;
-        if (PredBB != MBB && !CanFallThrough(PredBB)
+        MachineBasicBlock *PredTBB, *PredFBB;
+        SmallVector<MachineOperand, 4> PredCond;
+        if (PredBB != MBB && !CanFallThrough(PredBB) &&
+            !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)
             && (!CurFallsThru || !CurTBB || !CurFBB)
             && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
           // If the current block doesn't fall through, just move it.
           // If the current block can fall through and does not end with a
-          // conditional branch, we need to append an unconditional jump to 
+          // conditional branch, we need to append an unconditional jump to
           // the (current) next block.  To avoid a possible compile-time
           // infinite loop, move blocks only backward in this case.
           // Also, if there are already 2 branches here, we cannot add a third;
@@ -1167,11 +1407,11 @@
           }
           MBB->moveAfter(PredBB);
           MadeChange = true;
-          return OptimizeBlock(MBB);
+          goto ReoptimizeBlock;
         }
       }
     }
-        
+
     if (!CurFallsThru) {
       // Check all successors to see if we can move this block before it.
       for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
@@ -1179,26 +1419,29 @@
         // Analyze the branch at the end of the block before the succ.
         MachineBasicBlock *SuccBB = *SI;
         MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev;
-        std::vector<MachineOperand> SuccPrevCond;
-        
+
         // If this block doesn't already fall-through to that successor, and if
         // the succ doesn't already have a block that can fall through into it,
         // and if the successor isn't an EH destination, we can arrange for the
         // fallthrough to happen.
-        if (SuccBB != MBB && !CanFallThrough(SuccPrev) &&
+        if (SuccBB != MBB && &*SuccPrev != MBB &&
+            !CanFallThrough(SuccPrev) && !CurUnAnalyzable &&
             !SuccBB->isLandingPad()) {
           MBB->moveBefore(SuccBB);
           MadeChange = true;
-          return OptimizeBlock(MBB);
+          goto ReoptimizeBlock;
         }
       }
-      
+
       // Okay, there is no really great place to put this block.  If, however,
       // the block before this one would be a fall-through if this block were
       // removed, move this block to the end of the function.
-      if (FallThrough != MBB->getParent()->end() &&
+      MachineBasicBlock *PrevTBB, *PrevFBB;
+      SmallVector<MachineOperand, 4> PrevCond;
+      if (FallThrough != MF.end() &&
+          !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
           PrevBB.isSuccessor(FallThrough)) {
-        MBB->moveAfter(--MBB->getParent()->end());
+        MBB->moveAfter(--MF.end());
         MadeChange = true;
         return MadeChange;
       }

Modified: llvm/branches/Apple/Leela/lib/CodeGen/BranchFolding.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Leela/lib/CodeGen/BranchFolding.h?rev=86910&r1=86909&r2=86910&view=diff

==============================================================================
--- llvm/branches/Apple/Leela/lib/CodeGen/BranchFolding.h (original)
+++ llvm/branches/Apple/Leela/lib/CodeGen/BranchFolding.h Wed Nov 11 18:39:53 2009
@@ -44,18 +44,24 @@
     RegScavenger *RS;
 
     bool TailMergeBlocks(MachineFunction &MF);
-    bool TryMergeBlocks(MachineBasicBlock* SuccBB,
-                        MachineBasicBlock* PredBB);
+    bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
+                       MachineBasicBlock* PredBB);
     void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
                                  MachineBasicBlock *NewDest);
     MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
                                   MachineBasicBlock::iterator BBI1);
-    unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength);
+    unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength,
+                              MachineBasicBlock *SuccBB,
+                              MachineBasicBlock *PredBB);
     void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
                                                 MachineBasicBlock* PredBB);
     unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
                                        unsigned maxCommonTailLength);
 
+    bool TailDuplicate(MachineBasicBlock *TailBB,
+                       bool PrevFallsThrough,
+                       MachineFunction &MF);
+    
     bool OptimizeBranches(MachineFunction &MF);
     bool OptimizeBlock(MachineBasicBlock *MBB);
     void RemoveDeadBlock(MachineBasicBlock *MBB);

Modified: llvm/branches/Apple/Leela/lib/CodeGen/MachineBasicBlock.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Leela/lib/CodeGen/MachineBasicBlock.cpp?rev=86910&r1=86909&r2=86910&view=diff

==============================================================================
--- llvm/branches/Apple/Leela/lib/CodeGen/MachineBasicBlock.cpp (original)
+++ llvm/branches/Apple/Leela/lib/CodeGen/MachineBasicBlock.cpp Wed Nov 11 18:39:53 2009
@@ -371,10 +371,7 @@
   MachineBasicBlock::succ_iterator SI = succ_begin();
   MachineBasicBlock *OrigDestA = DestA, *OrigDestB = DestB;
   while (SI != succ_end()) {
-    if (*SI == DestA && DestA == DestB) {
-      DestA = DestB = 0;
-      ++SI;
-    } else if (*SI == DestA) {
+    if (*SI == DestA) {
       DestA = 0;
       ++SI;
     } else if (*SI == DestB) {

Modified: llvm/branches/Apple/Leela/lib/Target/Alpha/AlphaInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Leela/lib/Target/Alpha/AlphaInstrInfo.td?rev=86910&r1=86909&r2=86910&view=diff

==============================================================================
--- llvm/branches/Apple/Leela/lib/Target/Alpha/AlphaInstrInfo.td (original)
+++ llvm/branches/Apple/Leela/lib/Target/Alpha/AlphaInstrInfo.td Wed Nov 11 18:39:53 2009
@@ -391,7 +391,7 @@
 def : Pat<(setune GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQ GPRC:$X, immUExt8:$Y), 0)>;
 
 
-let isReturn = 1, isTerminator = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in {
+let isReturn = 1, isTerminator = 1, isBarrier = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in {
   def RETDAG : MbrForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", s_jsr>; //Return from subroutine
   def RETDAGp : MbrpForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine
 }

Modified: llvm/branches/Apple/Leela/lib/Target/Blackfin/BlackfinInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Leela/lib/Target/Blackfin/BlackfinInstrInfo.td?rev=86910&r1=86909&r2=86910&view=diff

==============================================================================
--- llvm/branches/Apple/Leela/lib/Target/Blackfin/BlackfinInstrInfo.td (original)
+++ llvm/branches/Apple/Leela/lib/Target/Blackfin/BlackfinInstrInfo.td Wed Nov 11 18:39:53 2009
@@ -174,6 +174,7 @@
 
 let isReturn     = 1,
     isTerminator = 1,
+    isBarrier    = 1,
     Uses         = [RETS] in
 def RTS: F1<(outs), (ins), "rts;", [(BfinRet)]>;
 

Modified: llvm/branches/Apple/Leela/lib/Target/CellSPU/SPUInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Leela/lib/Target/CellSPU/SPUInstrInfo.td?rev=86910&r1=86909&r2=86910&view=diff

==============================================================================
--- llvm/branches/Apple/Leela/lib/Target/CellSPU/SPUInstrInfo.td (original)
+++ llvm/branches/Apple/Leela/lib/Target/CellSPU/SPUInstrInfo.td Wed Nov 11 18:39:53 2009
@@ -3601,21 +3601,23 @@
           (BRASL texternalsym:$func)>;
 
 // Unconditional branches:
-let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
-  def BR :
-    UncondBranch<0b001001100, (outs), (ins brtarget:$dest),
-      "br\t$dest",
-      [(br bb:$dest)]>;
-
-  // Unconditional, absolute address branch
-  def BRA:
-    UncondBranch<0b001100000, (outs), (ins brtarget:$dest),
-      "bra\t$dest",
-      [/* no pattern */]>;
-
-  // Indirect branch
-  def BI:
-    BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>;
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+  let isBarrier = 1 in {
+    def BR :
+      UncondBranch<0b001001100, (outs), (ins brtarget:$dest),
+        "br\t$dest",
+        [(br bb:$dest)]>;
+
+    // Unconditional, absolute address branch
+    def BRA:
+      UncondBranch<0b001100000, (outs), (ins brtarget:$dest),
+        "bra\t$dest",
+        [/* no pattern */]>;
+
+    // Indirect branch
+    def BI:
+      BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>;
+  }
 
   // Conditional branches:
   class BRNZInst<dag IOL, list<dag> pattern>:

Modified: llvm/branches/Apple/Leela/lib/Target/MSP430/MSP430InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Leela/lib/Target/MSP430/MSP430InstrInfo.td?rev=86910&r1=86909&r2=86910&view=diff

==============================================================================
--- llvm/branches/Apple/Leela/lib/Target/MSP430/MSP430InstrInfo.td (original)
+++ llvm/branches/Apple/Leela/lib/Target/MSP430/MSP430InstrInfo.td Wed Nov 11 18:39:53 2009
@@ -127,7 +127,7 @@
 //
 
 // FIXME: Provide proper encoding!
-let isReturn = 1, isTerminator = 1 in {
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
   def RET : Pseudo<(outs), (ins), "ret", [(MSP430retflag)]>;
 }
 

Modified: llvm/branches/Apple/Leela/lib/Target/Sparc/SparcInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Leela/lib/Target/Sparc/SparcInstrInfo.td?rev=86910&r1=86909&r2=86910&view=diff

==============================================================================
--- llvm/branches/Apple/Leela/lib/Target/Sparc/SparcInstrInfo.td (original)
+++ llvm/branches/Apple/Leela/lib/Target/Sparc/SparcInstrInfo.td Wed Nov 11 18:39:53 2009
@@ -277,7 +277,7 @@
 
 // Section A.3 - Synthetic Instructions, p. 85
 // special cases of JMPL:
-let isReturn = 1, isTerminator = 1, hasDelaySlot = 1 in {
+let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in {
   let rd = O7.Num, rs1 = G0.Num, simm13 = 8 in
     def RETL: F3_2<2, 0b111000, (outs), (ins), "retl", [(retflag)]>;
 }

Modified: llvm/branches/Apple/Leela/lib/Target/XCore/XCoreInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Leela/lib/Target/XCore/XCoreInstrInfo.td?rev=86910&r1=86909&r2=86910&view=diff

==============================================================================
--- llvm/branches/Apple/Leela/lib/Target/XCore/XCoreInstrInfo.td (original)
+++ llvm/branches/Apple/Leela/lib/Target/XCore/XCoreInstrInfo.td Wed Nov 11 18:39:53 2009
@@ -617,7 +617,7 @@
 let mayStore = 1 in
 defm ENTSP : FU6_LU6_np<"entsp">;
 
-let isReturn = 1, isTerminator = 1, mayLoad = 1 in {
+let isReturn = 1, isTerminator = 1, mayLoad = 1, isBarrier = 1 in {
 defm RETSP : FU6_LU6<"retsp", XCoreRetsp>;
 }
 }

Modified: llvm/branches/Apple/Leela/lib/Transforms/Utils/LCSSA.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Leela/lib/Transforms/Utils/LCSSA.cpp?rev=86910&r1=86909&r2=86910&view=diff

==============================================================================
--- llvm/branches/Apple/Leela/lib/Transforms/Utils/LCSSA.cpp (original)
+++ llvm/branches/Apple/Leela/lib/Transforms/Utils/LCSSA.cpp Wed Nov 11 18:39:53 2009
@@ -63,6 +63,9 @@
     ///
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
+
+      // LCSSA doesn't actually require LoopSimplify, but the PassManager
+      // doesn't know how to schedule LoopSimplify by itself.
       AU.addRequiredID(LoopSimplifyID);
       AU.addPreservedID(LoopSimplifyID);
       AU.addRequiredTransitive<LoopInfo>();
@@ -214,7 +217,7 @@
   SSAUpdate.Initialize(Inst);
   
   // Insert the LCSSA phi's into all of the exit blocks dominated by the
-  // value., and add them to the Phi's map.
+  // value, and add them to the Phi's map.
   for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(),
       BBE = ExitBlocks.end(); BBI != BBE; ++BBI) {
     BasicBlock *ExitBB = *BBI;
@@ -228,8 +231,17 @@
     PN->reserveOperandSpace(PredCache.GetNumPreds(ExitBB));
 
     // Add inputs from inside the loop for this PHI.
-    for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI)
+    for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) {
       PN->addIncoming(Inst, *PI);
+
+      // If the exit block has a predecessor not within the loop, arrange for
+      // the incomging value use corresponding to that predecessor to be
+      // rewritten in terms of a different LCSSA PHI.
+      if (!inLoop(*PI))
+        UsesToRewrite.push_back(
+          &PN->getOperandUse(
+            PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1)));
+    }
     
     // Remember that this phi makes the value alive in this block.
     SSAUpdate.AddAvailableValue(ExitBB, PN);

Modified: llvm/branches/Apple/Leela/test/CodeGen/Thumb2/thumb2-cbnz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Leela/test/CodeGen/Thumb2/thumb2-cbnz.ll?rev=86910&r1=86909&r2=86910&view=diff

==============================================================================
--- llvm/branches/Apple/Leela/test/CodeGen/Thumb2/thumb2-cbnz.ll (original)
+++ llvm/branches/Apple/Leela/test/CodeGen/Thumb2/thumb2-cbnz.ll Wed Nov 11 18:39:53 2009
@@ -20,7 +20,8 @@
   br i1 %a, label %bb11, label %bb9
 
 bb9:                                              ; preds = %bb7
-; CHECK:      @ BB#2:
+; CHECK:      cmp r0, #0
+; CHECK-NEXT: cmp r0, #0
 ; CHECK-NEXT: cbnz
   %0 = tail call arm_apcscc  double @floor(double %b) nounwind readnone ; <double> [#uses=0]
   br label %bb11

Modified: llvm/branches/Apple/Leela/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Leela/test/CodeGen/Thumb2/thumb2-ifcvt3.ll?rev=86910&r1=86909&r2=86910&view=diff

==============================================================================
--- llvm/branches/Apple/Leela/test/CodeGen/Thumb2/thumb2-ifcvt3.ll (original)
+++ llvm/branches/Apple/Leela/test/CodeGen/Thumb2/thumb2-ifcvt3.ll Wed Nov 11 18:39:53 2009
@@ -23,7 +23,7 @@
 ; CHECK: movne
 ; CHECK: moveq
 ; CHECK: pop
-; CHECK-NEXT: LBB1_2:
+; CHECK-NEXT: LBB1_1:
   %0 = load i64* @posed, align 4                  ; <i64> [#uses=3]
   %1 = sub i64 %0, %.reload78                     ; <i64> [#uses=1]
   %2 = ashr i64 %1, 1                             ; <i64> [#uses=3]

Modified: llvm/branches/Apple/Leela/test/CodeGen/X86/loop-blocks.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Leela/test/CodeGen/X86/loop-blocks.ll?rev=86910&r1=86909&r2=86910&view=diff

==============================================================================
--- llvm/branches/Apple/Leela/test/CodeGen/X86/loop-blocks.ll (original)
+++ llvm/branches/Apple/Leela/test/CodeGen/X86/loop-blocks.ll Wed Nov 11 18:39:53 2009
@@ -74,16 +74,16 @@
 ; CHECK: yet_more_involved:
 ;      CHECK:   jmp .LBB3_1
 ; CHECK-NEXT:   align
-; CHECK-NEXT: .LBB3_3:
+; CHECK-NEXT: .LBB3_4:
 ; CHECK-NEXT:   call bar99
 ; CHECK-NEXT:   call get
 ; CHECK-NEXT:   cmpl $2999, %eax
-; CHECK-NEXT:   jg .LBB3_5
+; CHECK-NEXT:   jg .LBB3_6
 ; CHECK-NEXT:   call block_a_true_func
-; CHECK-NEXT:   jmp .LBB3_6
-; CHECK-NEXT: .LBB3_5:
-; CHECK-NEXT:   call block_a_false_func
+; CHECK-NEXT:   jmp .LBB3_7
 ; CHECK-NEXT: .LBB3_6:
+; CHECK-NEXT:   call block_a_false_func
+; CHECK-NEXT: .LBB3_7:
 ; CHECK-NEXT:   call block_a_merge_func
 ; CHECK-NEXT: .LBB3_1:
 ; CHECK-NEXT:   call body





More information about the llvm-branch-commits mailing list