[llvm] r265790 - Revert r265547 "Recommit r265309 after fixed an invalid memory reference bug happened"

Hans Wennborg via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 8 08:17:44 PDT 2016


Author: hans
Date: Fri Apr  8 10:17:43 2016
New Revision: 265790

URL: http://llvm.org/viewvc/llvm-project?rev=265790&view=rev
Log:
Revert r265547 "Recommit r265309 after fixed an invalid memory reference bug happened"

It caused PR27275: "ARM: Bad machine code: Using an undefined physical register"

Also reverting the following commits that were landed on top:
r265610 "Fix the compare-clang diff error introduced by r265547."
r265639 "Fix the sanitizer bootstrap error in r265547."
r265657 "InlineSpiller.cpp: Escap \@ in r265547. [-Wdocumentation]"

Added:
    llvm/trunk/test/CodeGen/AArch64/aarch64-deferred-spilling.ll
Removed:
    llvm/trunk/test/CodeGen/X86/hoist-spill.ll
    llvm/trunk/test/CodeGen/X86/new-remat.ll
Modified:
    llvm/trunk/include/llvm/CodeGen/LiveRangeEdit.h
    llvm/trunk/lib/CodeGen/InlineSpiller.cpp
    llvm/trunk/lib/CodeGen/LiveRangeEdit.cpp
    llvm/trunk/lib/CodeGen/RegAllocBase.cpp
    llvm/trunk/lib/CodeGen/RegAllocBase.h
    llvm/trunk/lib/CodeGen/RegAllocBasic.cpp
    llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp
    llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp
    llvm/trunk/lib/CodeGen/Spiller.h
    llvm/trunk/lib/CodeGen/SplitKit.cpp
    llvm/trunk/lib/CodeGen/SplitKit.h
    llvm/trunk/test/CodeGen/X86/fp128-compare.ll
    llvm/trunk/test/CodeGen/X86/ragreedy-hoist-spill.ll

Modified: llvm/trunk/include/llvm/CodeGen/LiveRangeEdit.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/LiveRangeEdit.h?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/LiveRangeEdit.h (original)
+++ llvm/trunk/include/llvm/CodeGen/LiveRangeEdit.h Fri Apr  8 10:17:43 2016
@@ -72,10 +72,6 @@ private:
   /// ScannedRemattable - true when remattable values have been identified.
   bool ScannedRemattable;
 
-  /// DeadRemats - The saved instructions which have already been dead after
-  /// rematerialization but not deleted yet -- to be done in postOptimization.
-  SmallPtrSet<MachineInstr *, 32> *DeadRemats;
-
   /// Remattable - Values defined by remattable instructions as identified by
   /// tii.isTriviallyReMaterializable().
   SmallPtrSet<const VNInfo*,4> Remattable;
@@ -120,16 +116,13 @@ public:
   /// @param vrm Map of virtual registers to physical registers for this
   ///            function.  If NULL, no virtual register map updates will
   ///            be done.  This could be the case if called before Regalloc.
-  /// @param deadRemats The collection of all the instructions defining an
-  ///                   original reg and are dead after remat.
   LiveRangeEdit(LiveInterval *parent, SmallVectorImpl<unsigned> &newRegs,
                 MachineFunction &MF, LiveIntervals &lis, VirtRegMap *vrm,
-                Delegate *delegate = nullptr,
-                SmallPtrSet<MachineInstr *, 32> *deadRemats = nullptr)
+                Delegate *delegate = nullptr)
       : Parent(parent), NewRegs(newRegs), MRI(MF.getRegInfo()), LIS(lis),
-        VRM(vrm), TII(*MF.getSubtarget().getInstrInfo()), TheDelegate(delegate),
-        FirstNew(newRegs.size()), ScannedRemattable(false),
-        DeadRemats(deadRemats) {
+        VRM(vrm), TII(*MF.getSubtarget().getInstrInfo()),
+        TheDelegate(delegate), FirstNew(newRegs.size()),
+        ScannedRemattable(false) {
     MRI.setDelegate(this);
   }
 
@@ -149,16 +142,6 @@ public:
   bool empty() const { return size() == 0; }
   unsigned get(unsigned idx) const { return NewRegs[idx+FirstNew]; }
 
-  /// pop_back - It allows LiveRangeEdit users to drop new registers.
-  /// The context is when an original def instruction of a register is
-  /// dead after rematerialization, we still want to keep it for following
-  /// rematerializations. We save the def instruction in DeadRemats,
-  /// and replace the original dst register with a new dummy register so
-  /// the live range of original dst register can be shrinked normally.
-  /// We don't want to allocate phys register for the dummy register, so
-  /// we want to drop it from the NewRegs set.
-  void pop_back() { NewRegs.pop_back(); }
-
   ArrayRef<unsigned> regs() const {
     return makeArrayRef(NewRegs).slice(FirstNew);
   }
@@ -192,15 +175,15 @@ public:
   /// Remat - Information needed to rematerialize at a specific location.
   struct Remat {
     VNInfo *ParentVNI;      // parent_'s value at the remat location.
-    MachineInstr *OrigMI;   // Instruction defining OrigVNI. It contains the
-                            // real expr for remat.
+    MachineInstr *OrigMI;   // Instruction defining ParentVNI.
     explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(nullptr) {}
   };
 
   /// canRematerializeAt - Determine if ParentVNI can be rematerialized at
   /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI.
   /// When cheapAsAMove is set, only cheap remats are allowed.
-  bool canRematerializeAt(Remat &RM, VNInfo *OrigVNI, SlotIndex UseIdx,
+  bool canRematerializeAt(Remat &RM,
+                          SlotIndex UseIdx,
                           bool cheapAsAMove);
 
   /// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an
@@ -225,12 +208,6 @@ public:
     return Rematted.count(ParentVNI);
   }
 
-  void markDeadRemat(MachineInstr *inst) {
-    // DeadRemats is an optional field.
-    if (DeadRemats)
-      DeadRemats->insert(inst);
-  }
-
   /// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try
   /// to erase it from LIS.
   void eraseVirtReg(unsigned Reg);
@@ -241,11 +218,8 @@ public:
   /// RegsBeingSpilled lists registers currently being spilled by the register
   /// allocator.  These registers should not be split into new intervals
   /// as currently those new intervals are not guaranteed to spill.
-  /// NoSplit indicates this func is used after the iterations of selectOrSplit
-  /// where registers should not be split into new intervals.
-  void eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
-                         ArrayRef<unsigned> RegsBeingSpilled = None,
-                         bool NoSplit = false);
+  void eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
+                         ArrayRef<unsigned> RegsBeingSpilled = None);
 
   /// calculateRegClassAndHint - Recompute register class and hint for each new
   /// register.

Modified: llvm/trunk/lib/CodeGen/InlineSpiller.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/InlineSpiller.cpp?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/InlineSpiller.cpp (original)
+++ llvm/trunk/lib/CodeGen/InlineSpiller.cpp Fri Apr  8 10:17:43 2016
@@ -13,7 +13,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "Spiller.h"
-#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/TinyPtrVector.h"
@@ -49,77 +48,13 @@ STATISTIC(NumReloadsRemoved,  "Number of
 STATISTIC(NumFolded,          "Number of folded stack accesses");
 STATISTIC(NumFoldedLoads,     "Number of folded loads");
 STATISTIC(NumRemats,          "Number of rematerialized defs for spilling");
+STATISTIC(NumOmitReloadSpill, "Number of omitted spills of reloads");
+STATISTIC(NumHoists,          "Number of hoisted spills");
 
 static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden,
                                      cl::desc("Disable inline spill hoisting"));
 
 namespace {
-class HoistSpillHelper {
-  LiveIntervals &LIS;
-  LiveStacks &LSS;
-  AliasAnalysis *AA;
-  MachineDominatorTree &MDT;
-  MachineLoopInfo &Loops;
-  VirtRegMap &VRM;
-  MachineFrameInfo &MFI;
-  MachineRegisterInfo &MRI;
-  const TargetInstrInfo &TII;
-  const TargetRegisterInfo &TRI;
-  const MachineBlockFrequencyInfo &MBFI;
-
-  // Map from StackSlot to its original register.
-  DenseMap<int, unsigned> StackSlotToReg;
-  // Map from pair of (StackSlot and Original VNI) to a set of spills which
-  // have the same stackslot and have equal values defined by Original VNI.
-  // These spills are mergeable and are hoist candiates.
-  typedef MapVector<std::pair<int, VNInfo *>, SmallPtrSet<MachineInstr *, 16>>
-      MergeableSpillsMap;
-  MergeableSpillsMap MergeableSpills;
-
-  /// This is the map from original register to a set containing all its
-  /// siblings. To hoist a spill to another BB, we need to find out a live
-  /// sibling there and use it as the source of the new spill.
-  DenseMap<unsigned, SmallSetVector<unsigned, 16>> Virt2SiblingsMap;
-
-  bool isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI, MachineBasicBlock &BB,
-                     unsigned &LiveReg);
-
-  void rmRedundantSpills(
-      SmallPtrSet<MachineInstr *, 16> &Spills,
-      SmallVectorImpl<MachineInstr *> &SpillsToRm,
-      DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill);
-
-  void getVisitOrders(
-      MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills,
-      SmallVectorImpl<MachineDomTreeNode *> &Orders,
-      SmallVectorImpl<MachineInstr *> &SpillsToRm,
-      DenseMap<MachineDomTreeNode *, unsigned> &SpillsToKeep,
-      DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill);
-
-  void runHoistSpills(unsigned OrigReg, VNInfo &OrigVNI,
-                      SmallPtrSet<MachineInstr *, 16> &Spills,
-                      SmallVectorImpl<MachineInstr *> &SpillsToRm,
-                      DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns);
-
-public:
-  HoistSpillHelper(MachineFunctionPass &pass, MachineFunction &mf,
-                   VirtRegMap &vrm)
-      : LIS(pass.getAnalysis<LiveIntervals>()),
-        LSS(pass.getAnalysis<LiveStacks>()),
-        AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
-        MDT(pass.getAnalysis<MachineDominatorTree>()),
-        Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
-        MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()),
-        TII(*mf.getSubtarget().getInstrInfo()),
-        TRI(*mf.getSubtarget().getRegisterInfo()),
-        MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {}
-
-  void addToMergeableSpills(MachineInstr *Spill, int StackSlot,
-                            unsigned Original);
-  bool rmFromMergeableSpills(MachineInstr *Spill, int StackSlot);
-  void hoistAllSpills(LiveRangeEdit &Edit);
-};
-
 class InlineSpiller : public Spiller {
   MachineFunction &MF;
   LiveIntervals &LIS;
@@ -150,12 +85,56 @@ class InlineSpiller : public Spiller {
   // Values that failed to remat at some point.
   SmallPtrSet<VNInfo*, 8> UsedValues;
 
+public:
+  // Information about a value that was defined by a copy from a sibling
+  // register.
+  struct SibValueInfo {
+    // True when all reaching defs were reloads: No spill is necessary.
+    bool AllDefsAreReloads;
+
+    // True when value is defined by an original PHI not from splitting.
+    bool DefByOrigPHI;
+
+    // True when the COPY defining this value killed its source.
+    bool KillsSource;
+
+    // The preferred register to spill.
+    unsigned SpillReg;
+
+    // The value of SpillReg that should be spilled.
+    VNInfo *SpillVNI;
+
+    // The block where SpillVNI should be spilled. Currently, this must be the
+    // block containing SpillVNI->def.
+    MachineBasicBlock *SpillMBB;
+
+    // A defining instruction that is not a sibling copy or a reload, or NULL.
+    // This can be used as a template for rematerialization.
+    MachineInstr *DefMI;
+
+    // List of values that depend on this one.  These values are actually the
+    // same, but live range splitting has placed them in different registers,
+    // or SSA update needed to insert PHI-defs to preserve SSA form.  This is
+    // copies of the current value and phi-kills.  Usually only phi-kills cause
+    // more than one dependent value.
+    TinyPtrVector<VNInfo*> Deps;
+
+    SibValueInfo(unsigned Reg, VNInfo *VNI)
+      : AllDefsAreReloads(true), DefByOrigPHI(false), KillsSource(false),
+        SpillReg(Reg), SpillVNI(VNI), SpillMBB(nullptr), DefMI(nullptr) {}
+
+    // Returns true when a def has been found.
+    bool hasDef() const { return DefByOrigPHI || DefMI; }
+  };
+
+private:
+  // Values in RegsToSpill defined by sibling copies.
+  typedef DenseMap<VNInfo*, SibValueInfo> SibValueMap;
+  SibValueMap SibValues;
+
   // Dead defs generated during spilling.
   SmallVector<MachineInstr*, 8> DeadDefs;
 
-  // Object records spills information and does the hoisting.
-  HoistSpillHelper HSpiller;
-
   ~InlineSpiller() override {}
 
 public:
@@ -168,11 +147,9 @@ public:
         MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()),
         TII(*mf.getSubtarget().getInstrInfo()),
         TRI(*mf.getSubtarget().getRegisterInfo()),
-        MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
-        HSpiller(pass, mf, vrm) {}
+        MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {}
 
   void spill(LiveRangeEdit &) override;
-  void postOptimization() override;
 
 private:
   bool isSnippet(const LiveInterval &SnipLI);
@@ -184,7 +161,11 @@ private:
   }
 
   bool isSibling(unsigned Reg);
-  bool hoistSpillInsideBB(LiveInterval &SpillLI, MachineInstr &CopyMI);
+  MachineInstr *traceSiblingValue(unsigned, VNInfo*, VNInfo*);
+  void propagateSiblingValue(SibValueMap::iterator, VNInfo *VNI = nullptr);
+  void analyzeSiblingValues();
+
+  bool hoistSpill(LiveInterval &SpillLI, MachineInstr &CopyMI);
   void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI);
 
   void markValueUsed(LiveInterval*, VNInfo*);
@@ -316,45 +297,417 @@ void InlineSpiller::collectRegsToSpill()
   }
 }
 
+
+//===----------------------------------------------------------------------===//
+//                            Sibling Values
+//===----------------------------------------------------------------------===//
+
+// After live range splitting, some values to be spilled may be defined by
+// copies from sibling registers. We trace the sibling copies back to the
+// original value if it still exists. We need it for rematerialization.
+//
+// Even when the value can't be rematerialized, we still want to determine if
+// the value has already been spilled, or we may want to hoist the spill from a
+// loop.
+
 bool InlineSpiller::isSibling(unsigned Reg) {
   return TargetRegisterInfo::isVirtualRegister(Reg) &&
            VRM.getOriginal(Reg) == Original;
 }
 
-/// It is beneficial to spill to earlier place in the same BB in case
-/// as follows:
-/// There is an alternative def earlier in the same MBB.
-/// Hoist the spill as far as possible in SpillMBB. This can ease
-/// register pressure:
+#ifndef NDEBUG
+static raw_ostream &operator<<(raw_ostream &OS,
+                               const InlineSpiller::SibValueInfo &SVI) {
+  OS << "spill " << PrintReg(SVI.SpillReg) << ':'
+     << SVI.SpillVNI->id << '@' << SVI.SpillVNI->def;
+  if (SVI.SpillMBB)
+    OS << " in BB#" << SVI.SpillMBB->getNumber();
+  if (SVI.AllDefsAreReloads)
+    OS << " all-reloads";
+  if (SVI.DefByOrigPHI)
+    OS << " orig-phi";
+  if (SVI.KillsSource)
+    OS << " kill";
+  OS << " deps[";
+  for (VNInfo *Dep : SVI.Deps)
+    OS << ' ' << Dep->id << '@' << Dep->def;
+  OS << " ]";
+  if (SVI.DefMI)
+    OS << " def: " << *SVI.DefMI;
+  else
+    OS << '\n';
+  return OS;
+}
+#endif
+
+/// propagateSiblingValue - Propagate the value in SVI to dependents if it is
+/// known.  Otherwise remember the dependency for later.
 ///
-///   x = def
-///   y = use x
-///   s = copy x
+/// @param SVIIter SibValues entry to propagate.
+/// @param VNI Dependent value, or NULL to propagate to all saved dependents.
+void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVIIter,
+                                          VNInfo *VNI) {
+  SibValueMap::value_type *SVI = &*SVIIter;
+
+  // When VNI is non-NULL, add it to SVI's deps, and only propagate to that.
+  TinyPtrVector<VNInfo*> FirstDeps;
+  if (VNI) {
+    FirstDeps.push_back(VNI);
+    SVI->second.Deps.push_back(VNI);
+  }
+
+  // Has the value been completely determined yet?  If not, defer propagation.
+  if (!SVI->second.hasDef())
+    return;
+
+  // Work list of values to propagate.
+  SmallSetVector<SibValueMap::value_type *, 8> WorkList;
+  WorkList.insert(SVI);
+
+  do {
+    SVI = WorkList.pop_back_val();
+    TinyPtrVector<VNInfo*> *Deps = VNI ? &FirstDeps : &SVI->second.Deps;
+    VNI = nullptr;
+
+    SibValueInfo &SV = SVI->second;
+    if (!SV.SpillMBB)
+      SV.SpillMBB = LIS.getMBBFromIndex(SV.SpillVNI->def);
+
+    DEBUG(dbgs() << "  prop to " << Deps->size() << ": "
+                 << SVI->first->id << '@' << SVI->first->def << ":\t" << SV);
+
+    assert(SV.hasDef() && "Propagating undefined value");
+
+    // Should this value be propagated as a preferred spill candidate?  We don't
+    // propagate values of registers that are about to spill.
+    bool PropSpill = !DisableHoisting && !isRegToSpill(SV.SpillReg);
+    unsigned SpillDepth = ~0u;
+
+    for (VNInfo *Dep : *Deps) {
+      SibValueMap::iterator DepSVI = SibValues.find(Dep);
+      assert(DepSVI != SibValues.end() && "Dependent value not in SibValues");
+      SibValueInfo &DepSV = DepSVI->second;
+      if (!DepSV.SpillMBB)
+        DepSV.SpillMBB = LIS.getMBBFromIndex(DepSV.SpillVNI->def);
+
+      bool Changed = false;
+
+      // Propagate defining instruction.
+      if (!DepSV.hasDef()) {
+        Changed = true;
+        DepSV.DefMI = SV.DefMI;
+        DepSV.DefByOrigPHI = SV.DefByOrigPHI;
+      }
+
+      // Propagate AllDefsAreReloads.  For PHI values, this computes an AND of
+      // all predecessors.
+      if (!SV.AllDefsAreReloads && DepSV.AllDefsAreReloads) {
+        Changed = true;
+        DepSV.AllDefsAreReloads = false;
+      }
+
+      // Propagate best spill value.
+      if (PropSpill && SV.SpillVNI != DepSV.SpillVNI) {
+        if (SV.SpillMBB == DepSV.SpillMBB) {
+          // DepSV is in the same block.  Hoist when dominated.
+          if (DepSV.KillsSource && SV.SpillVNI->def < DepSV.SpillVNI->def) {
+            // This is an alternative def earlier in the same MBB.
+            // Hoist the spill as far as possible in SpillMBB. This can ease
+            // register pressure:
+            //
+            //   x = def
+            //   y = use x
+            //   s = copy x
+            //
+            // Hoisting the spill of s to immediately after the def removes the
+            // interference between x and y:
+            //
+            //   x = def
+            //   spill x
+            //   y = use x<kill>
+            //
+            // This hoist only helps when the DepSV copy kills its source.
+            Changed = true;
+            DepSV.SpillReg = SV.SpillReg;
+            DepSV.SpillVNI = SV.SpillVNI;
+            DepSV.SpillMBB = SV.SpillMBB;
+          }
+        } else {
+          // DepSV is in a different block.
+          if (SpillDepth == ~0u)
+            SpillDepth = Loops.getLoopDepth(SV.SpillMBB);
+
+          // Also hoist spills to blocks with smaller loop depth, but make sure
+          // that the new value dominates.  Non-phi dependents are always
+          // dominated, phis need checking.
+
+          const BranchProbability MarginProb(4, 5); // 80%
+          // Hoist a spill to outer loop if there are multiple dependents (it
+          // can be beneficial if more than one dependents are hoisted) or
+          // if DepSV (the hoisting source) is hotter than SV (the hoisting
+          // destination) (we add a 80% margin to bias a little towards
+          // loop depth).
+          bool HoistCondition =
+            (MBFI.getBlockFreq(DepSV.SpillMBB) >=
+             (MBFI.getBlockFreq(SV.SpillMBB) * MarginProb)) ||
+            Deps->size() > 1;
+
+          if ((Loops.getLoopDepth(DepSV.SpillMBB) > SpillDepth) &&
+              HoistCondition &&
+              (!DepSVI->first->isPHIDef() ||
+               MDT.dominates(SV.SpillMBB, DepSV.SpillMBB))) {
+            Changed = true;
+            DepSV.SpillReg = SV.SpillReg;
+            DepSV.SpillVNI = SV.SpillVNI;
+            DepSV.SpillMBB = SV.SpillMBB;
+          }
+        }
+      }
+
+      if (!Changed)
+        continue;
+
+      // Something changed in DepSVI. Propagate to dependents.
+      WorkList.insert(&*DepSVI);
+
+      DEBUG(dbgs() << "  update " << DepSVI->first->id << '@'
+            << DepSVI->first->def << " to:\t" << DepSV);
+    }
+  } while (!WorkList.empty());
+}
+
+/// traceSiblingValue - Trace a value that is about to be spilled back to the
+/// real defining instructions by looking through sibling copies. Always stay
+/// within the range of OrigVNI so the registers are known to carry the same
+/// value.
 ///
-/// Hoisting the spill of s to immediately after the def removes the
-/// interference between x and y:
+/// Determine if the value is defined by all reloads, so spilling isn't
+/// necessary - the value is already in the stack slot.
 ///
-///   x = def
-///   spill x
-///   y = use x<kill>
+/// Return a defining instruction that may be a candidate for rematerialization.
 ///
-/// This hoist only helps when the copy kills its source.
+MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
+                                               VNInfo *OrigVNI) {
+  // Check if a cached value already exists.
+  SibValueMap::iterator SVI;
+  bool Inserted;
+  std::tie(SVI, Inserted) =
+    SibValues.insert(std::make_pair(UseVNI, SibValueInfo(UseReg, UseVNI)));
+  if (!Inserted) {
+    DEBUG(dbgs() << "Cached value " << PrintReg(UseReg) << ':'
+                 << UseVNI->id << '@' << UseVNI->def << ' ' << SVI->second);
+    return SVI->second.DefMI;
+  }
+
+  DEBUG(dbgs() << "Tracing value " << PrintReg(UseReg) << ':'
+               << UseVNI->id << '@' << UseVNI->def << '\n');
+
+  // List of (Reg, VNI) that have been inserted into SibValues, but need to be
+  // processed.
+  SmallVector<std::pair<unsigned, VNInfo*>, 8> WorkList;
+  WorkList.push_back(std::make_pair(UseReg, UseVNI));
+
+  LiveInterval &OrigLI = LIS.getInterval(Original);
+  do {
+    unsigned Reg;
+    VNInfo *VNI;
+    std::tie(Reg, VNI) = WorkList.pop_back_val();
+    DEBUG(dbgs() << "  " << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def
+                 << ":\t");
+
+    // First check if this value has already been computed.
+    SVI = SibValues.find(VNI);
+    assert(SVI != SibValues.end() && "Missing SibValues entry");
+
+    // Trace through PHI-defs created by live range splitting.
+    if (VNI->isPHIDef()) {
+      // Stop at original PHIs.  We don't know the value at the
+      // predecessors. Look up the VNInfo for the current definition
+      // in OrigLI, to properly determine whether or not this phi was
+      // added by splitting.
+      if (VNI->def == OrigLI.getVNInfoAt(VNI->def)->def) {
+        DEBUG(dbgs() << "orig phi value\n");
+        SVI->second.DefByOrigPHI = true;
+        SVI->second.AllDefsAreReloads = false;
+        propagateSiblingValue(SVI);
+        continue;
+      }
+
+      // This is a PHI inserted by live range splitting.  We could trace the
+      // live-out value from predecessor blocks, but that search can be very
+      // expensive if there are many predecessors and many more PHIs as
+      // generated by tail-dup when it sees an indirectbr.  Instead, look at
+      // all the non-PHI defs that have the same value as OrigVNI.  They must
+      // jointly dominate VNI->def.  This is not optimal since VNI may actually
+      // be jointly dominated by a smaller subset of defs, so there is a change
+      // we will miss a AllDefsAreReloads optimization.
+
+      // Separate all values dominated by OrigVNI into PHIs and non-PHIs.
+      SmallVector<VNInfo*, 8> PHIs, NonPHIs;
+      LiveInterval &LI = LIS.getInterval(Reg);
+
+      for (LiveInterval::vni_iterator VI = LI.vni_begin(), VE = LI.vni_end();
+           VI != VE; ++VI) {
+        VNInfo *VNI2 = *VI;
+        if (VNI2->isUnused())
+          continue;
+        if (!OrigLI.containsOneValue() &&
+            OrigLI.getVNInfoAt(VNI2->def) != OrigVNI)
+          continue;
+        if (VNI2->isPHIDef() && VNI2->def != OrigVNI->def)
+          PHIs.push_back(VNI2);
+        else
+          NonPHIs.push_back(VNI2);
+      }
+      DEBUG(dbgs() << "split phi value, checking " << PHIs.size()
+                   << " phi-defs, and " << NonPHIs.size()
+                   << " non-phi/orig defs\n");
+
+      // Create entries for all the PHIs.  Don't add them to the worklist, we
+      // are processing all of them in one go here.
+      for (VNInfo *PHI : PHIs)
+        SibValues.insert(std::make_pair(PHI, SibValueInfo(Reg, PHI)));
+
+      // Add every PHI as a dependent of all the non-PHIs.
+      for (VNInfo *NonPHI : NonPHIs) {
+        // Known value? Try an insertion.
+        std::tie(SVI, Inserted) =
+          SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI)));
+        // Add all the PHIs as dependents of NonPHI.
+        SVI->second.Deps.insert(SVI->second.Deps.end(), PHIs.begin(),
+                                PHIs.end());
+        // This is the first time we see NonPHI, add it to the worklist.
+        if (Inserted)
+          WorkList.push_back(std::make_pair(Reg, NonPHI));
+        else
+          // Propagate to all inserted PHIs, not just VNI.
+          propagateSiblingValue(SVI);
+      }
+
+      // Next work list item.
+      continue;
+    }
+
+    MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+    assert(MI && "Missing def");
+
+    // Trace through sibling copies.
+    if (unsigned SrcReg = isFullCopyOf(MI, Reg)) {
+      if (isSibling(SrcReg)) {
+        LiveInterval &SrcLI = LIS.getInterval(SrcReg);
+        LiveQueryResult SrcQ = SrcLI.Query(VNI->def);
+        assert(SrcQ.valueIn() && "Copy from non-existing value");
+        // Check if this COPY kills its source.
+        SVI->second.KillsSource = SrcQ.isKill();
+        VNInfo *SrcVNI = SrcQ.valueIn();
+        DEBUG(dbgs() << "copy of " << PrintReg(SrcReg) << ':'
+                     << SrcVNI->id << '@' << SrcVNI->def
+                     << " kill=" << unsigned(SVI->second.KillsSource) << '\n');
+        // Known sibling source value? Try an insertion.
+        std::tie(SVI, Inserted) = SibValues.insert(
+            std::make_pair(SrcVNI, SibValueInfo(SrcReg, SrcVNI)));
+        // This is the first time we see Src, add it to the worklist.
+        if (Inserted)
+          WorkList.push_back(std::make_pair(SrcReg, SrcVNI));
+        propagateSiblingValue(SVI, VNI);
+        // Next work list item.
+        continue;
+      }
+    }
+
+    // Track reachable reloads.
+    SVI->second.DefMI = MI;
+    SVI->second.SpillMBB = MI->getParent();
+    int FI;
+    if (Reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) {
+      DEBUG(dbgs() << "reload\n");
+      propagateSiblingValue(SVI);
+      // Next work list item.
+      continue;
+    }
+
+    // Potential remat candidate.
+    DEBUG(dbgs() << "def " << *MI);
+    SVI->second.AllDefsAreReloads = false;
+    propagateSiblingValue(SVI);
+  } while (!WorkList.empty());
+
+  // Look up the value we were looking for.  We already did this lookup at the
+  // top of the function, but SibValues may have been invalidated.
+  SVI = SibValues.find(UseVNI);
+  assert(SVI != SibValues.end() && "Didn't compute requested info");
+  DEBUG(dbgs() << "  traced to:\t" << SVI->second);
+  return SVI->second.DefMI;
+}
+
+/// analyzeSiblingValues - Trace values defined by sibling copies back to
+/// something that isn't a sibling copy.
 ///
-bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,
-                                       MachineInstr &CopyMI) {
+/// Keep track of values that may be rematerializable.
+void InlineSpiller::analyzeSiblingValues() {
+  SibValues.clear();
+
+  // No siblings at all?
+  if (Edit->getReg() == Original)
+    return;
+
+  LiveInterval &OrigLI = LIS.getInterval(Original);
+  for (unsigned Reg : RegsToSpill) {
+    LiveInterval &LI = LIS.getInterval(Reg);
+    for (LiveInterval::const_vni_iterator VI = LI.vni_begin(),
+         VE = LI.vni_end(); VI != VE; ++VI) {
+      VNInfo *VNI = *VI;
+      if (VNI->isUnused())
+        continue;
+      MachineInstr *DefMI = nullptr;
+      if (!VNI->isPHIDef()) {
+       DefMI = LIS.getInstructionFromIndex(VNI->def);
+       assert(DefMI && "No defining instruction");
+      }
+      // Check possible sibling copies.
+      if (VNI->isPHIDef() || DefMI->isCopy()) {
+        VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
+        assert(OrigVNI && "Def outside original live range");
+        if (OrigVNI->def != VNI->def)
+          DefMI = traceSiblingValue(Reg, VNI, OrigVNI);
+      }
+      if (DefMI && Edit->checkRematerializable(VNI, DefMI, AA)) {
+        DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@'
+                     << VNI->def << " may remat from " << *DefMI);
+      }
+    }
+  }
+}
+
+/// hoistSpill - Given a sibling copy that defines a value to be spilled, insert
+/// a spill at a better location.
+bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr &CopyMI) {
   SlotIndex Idx = LIS.getInstructionIndex(CopyMI);
-#ifndef NDEBUG
   VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot());
   assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy");
-#endif
+  SibValueMap::iterator I = SibValues.find(VNI);
+  if (I == SibValues.end())
+    return false;
+
+  const SibValueInfo &SVI = I->second;
+
+  // Let the normal folding code deal with the boring case.
+  if (!SVI.AllDefsAreReloads && SVI.SpillVNI == VNI)
+    return false;
 
-  unsigned SrcReg = CopyMI.getOperand(1).getReg();
-  LiveInterval &SrcLI = LIS.getInterval(SrcReg);
-  VNInfo *SrcVNI = SrcLI.getVNInfoAt(Idx);
-  LiveQueryResult SrcQ = SrcLI.Query(Idx);
-  MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(SrcVNI->def);
-  if (DefMBB != CopyMI.getParent() || !SrcQ.isKill())
+  // SpillReg may have been deleted by remat and DCE.
+  if (!LIS.hasInterval(SVI.SpillReg)) {
+    DEBUG(dbgs() << "Stale interval: " << PrintReg(SVI.SpillReg) << '\n');
+    SibValues.erase(I);
     return false;
+  }
+
+  LiveInterval &SibLI = LIS.getInterval(SVI.SpillReg);
+  if (!SibLI.containsValue(SVI.SpillVNI)) {
+    DEBUG(dbgs() << "Stale value: " << PrintReg(SVI.SpillReg) << '\n');
+    SibValues.erase(I);
+    return false;
+  }
 
   // Conservatively extend the stack slot range to the range of the original
   // value. We may be able to do better with stack slot coloring by being more
@@ -366,29 +719,35 @@ bool InlineSpiller::hoistSpillInsideBB(L
   DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": "
                << *StackInt << '\n');
 
-  // We are going to spill SrcVNI immediately after its def, so clear out
+  // Already spilled everywhere.
+  if (SVI.AllDefsAreReloads) {
+    DEBUG(dbgs() << "\tno spill needed: " << SVI);
+    ++NumOmitReloadSpill;
+    return true;
+  }
+  // We are going to spill SVI.SpillVNI immediately after its def, so clear out
   // any later spills of the same value.
-  eliminateRedundantSpills(SrcLI, SrcVNI);
+  eliminateRedundantSpills(SibLI, SVI.SpillVNI);
 
-  MachineBasicBlock *MBB = LIS.getMBBFromIndex(SrcVNI->def);
+  MachineBasicBlock *MBB = LIS.getMBBFromIndex(SVI.SpillVNI->def);
   MachineBasicBlock::iterator MII;
-  if (SrcVNI->isPHIDef())
+  if (SVI.SpillVNI->isPHIDef())
     MII = MBB->SkipPHIsAndLabels(MBB->begin());
   else {
-    MachineInstr *DefMI = LIS.getInstructionFromIndex(SrcVNI->def);
+    MachineInstr *DefMI = LIS.getInstructionFromIndex(SVI.SpillVNI->def);
     assert(DefMI && "Defining instruction disappeared");
     MII = DefMI;
     ++MII;
   }
   // Insert spill without kill flag immediately after def.
-  TII.storeRegToStackSlot(*MBB, MII, SrcReg, false, StackSlot,
-                          MRI.getRegClass(SrcReg), &TRI);
+  TII.storeRegToStackSlot(*MBB, MII, SVI.SpillReg, false, StackSlot,
+                          MRI.getRegClass(SVI.SpillReg), &TRI);
   --MII; // Point to store instruction.
   LIS.InsertMachineInstrInMaps(*MII);
-  DEBUG(dbgs() << "\thoisted: " << SrcVNI->def << '\t' << *MII);
+  DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII);
 
-  HSpiller.addToMergeableSpills(&(*MII), StackSlot, Original);
   ++NumSpills;
+  ++NumHoists;
   return true;
 }
 
@@ -446,8 +805,7 @@ void InlineSpiller::eliminateRedundantSp
         MI->setDesc(TII.get(TargetOpcode::KILL));
         DeadDefs.push_back(MI);
         ++NumSpillsRemoved;
-        if (HSpiller.rmFromMergeableSpills(MI, StackSlot))
-          --NumSpills;
+        --NumSpills;
       }
     }
   } while (!WorkList.empty());
@@ -518,12 +876,12 @@ bool InlineSpiller::reMaterializeFor(Liv
   if (SnippetCopies.count(&MI))
     return false;
 
-  LiveInterval &OrigLI = LIS.getInterval(Original);
-  VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
+  // Use an OrigVNI from traceSiblingValue when ParentVNI is a sibling copy.
   LiveRangeEdit::Remat RM(ParentVNI);
-  RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
-
-  if (!Edit->canRematerializeAt(RM, OrigVNI, UseIdx, false)) {
+  SibValueMap::const_iterator SibI = SibValues.find(ParentVNI);
+  if (SibI != SibValues.end())
+    RM.OrigMI = SibI->second.DefMI;
+  if (!Edit->canRematerializeAt(RM, UseIdx, false)) {
     markValueUsed(&VirtReg, ParentVNI);
     DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI);
     return false;
@@ -573,6 +931,7 @@ bool InlineSpiller::reMaterializeFor(Liv
 /// reMaterializeAll - Try to rematerialize as many uses as possible,
 /// and trim the live ranges after.
 void InlineSpiller::reMaterializeAll() {
+  // analyzeSiblingValues has already tested all relevant defining instructions.
   if (!Edit->anyRematerializable(AA))
     return;
 
@@ -658,9 +1017,6 @@ bool InlineSpiller::coalesceStackAccess(
   if (InstrReg != Reg || FI != StackSlot)
     return false;
 
-  if (!IsLoad)
-    HSpiller.rmFromMergeableSpills(MI, StackSlot);
-
   DEBUG(dbgs() << "Coalescing stack access: " << *MI);
   LIS.RemoveMachineInstrFromMaps(*MI);
   MI->eraseFromParent();
@@ -785,9 +1141,6 @@ foldMemoryOperand(ArrayRef<std::pair<Mac
     LIS.removePhysRegDefAt(Reg, Idx);
   }
 
-  int FI;
-  if (TII.isStoreToStackSlot(MI, FI) && HSpiller.rmFromMergeableSpills(MI, FI))
-    --NumSpills;
   LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI);
   MI->eraseFromParent();
 
@@ -813,10 +1166,9 @@ foldMemoryOperand(ArrayRef<std::pair<Mac
 
   if (!WasCopy)
     ++NumFolded;
-  else if (Ops.front().second == 0) {
+  else if (Ops.front().second == 0)
     ++NumSpills;
-    HSpiller.addToMergeableSpills(FoldMI, StackSlot, Original);
-  } else
+  else
     ++NumReloads;
   return true;
 }
@@ -851,7 +1203,6 @@ void InlineSpiller::insertSpill(unsigned
   DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS,
                                            "spill"));
   ++NumSpills;
-  HSpiller.addToMergeableSpills(std::next(MI), StackSlot, Original);
 }
 
 /// spillAroundUses - insert spill code around each use of Reg.
@@ -915,7 +1266,8 @@ void InlineSpiller::spillAroundUses(unsi
         continue;
       }
       if (RI.Writes) {
-        if (hoistSpillInsideBB(OldLI, *MI)) {
+        // Hoist the spill of a sib-reg copy.
+        if (hoistSpill(OldLI, *MI)) {
           // This COPY is now dead, the value is already in the stack slot.
           MI->getOperand(0).setIsDead();
           DeadDefs.push_back(MI);
@@ -1028,6 +1380,7 @@ void InlineSpiller::spill(LiveRangeEdit
   assert(DeadDefs.empty() && "Previous spill didn't remove dead defs");
 
   collectRegsToSpill();
+  analyzeSiblingValues();
   reMaterializeAll();
 
   // Remat may handle everything.
@@ -1036,400 +1389,3 @@ void InlineSpiller::spill(LiveRangeEdit
 
   Edit->calculateRegClassAndHint(MF, Loops, MBFI);
 }
-
-/// Optimizations after all the reg selections and spills are done.
-///
-void InlineSpiller::postOptimization() {
-  SmallVector<unsigned, 4> NewVRegs;
-  LiveRangeEdit LRE(nullptr, NewVRegs, MF, LIS, &VRM, nullptr);
-  HSpiller.hoistAllSpills(LRE);
-  assert(NewVRegs.size() == 0 &&
-         "No new vregs should be generated in hoistAllSpills");
-}
-
-/// When a spill is inserted, add the spill to MergeableSpills map.
-///
-void HoistSpillHelper::addToMergeableSpills(MachineInstr *Spill, int StackSlot,
-                                            unsigned Original) {
-  StackSlotToReg[StackSlot] = Original;
-  SlotIndex Idx = LIS.getInstructionIndex(*Spill);
-  VNInfo *OrigVNI = LIS.getInterval(Original).getVNInfoAt(Idx.getRegSlot());
-  std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI);
-  MergeableSpills[MIdx].insert(Spill);
-}
-
-/// When a spill is removed, remove the spill from MergeableSpills map.
-/// Return true if the spill is removed successfully.
-///
-bool HoistSpillHelper::rmFromMergeableSpills(MachineInstr *Spill,
-                                             int StackSlot) {
-  int Original = StackSlotToReg[StackSlot];
-  if (!Original)
-    return false;
-  SlotIndex Idx = LIS.getInstructionIndex(*Spill);
-  VNInfo *OrigVNI = LIS.getInterval(Original).getVNInfoAt(Idx.getRegSlot());
-  std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI);
-  return MergeableSpills[MIdx].erase(Spill);
-}
-
-/// Check BB to see if it is a possible target BB to place a hoisted spill,
-/// i.e., there should be a living sibling of OrigReg at the insert point.
-///
-bool HoistSpillHelper::isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI,
-                                     MachineBasicBlock &BB, unsigned &LiveReg) {
-  SlotIndex Idx;
-  MachineBasicBlock::iterator MI = BB.getFirstTerminator();
-  if (MI != BB.end())
-    Idx = LIS.getInstructionIndex(*MI);
-  else
-    Idx = LIS.getMBBEndIdx(&BB).getPrevSlot();
-  SmallSetVector<unsigned, 16> &Siblings = Virt2SiblingsMap[OrigReg];
-  assert((LIS.getInterval(OrigReg)).getVNInfoAt(Idx) == &OrigVNI &&
-         "Unexpected VNI");
-
-  for (auto const SibReg : Siblings) {
-    LiveInterval &LI = LIS.getInterval(SibReg);
-    VNInfo *VNI = LI.getVNInfoAt(Idx);
-    if (VNI) {
-      LiveReg = SibReg;
-      return true;
-    }
-  }
-  return false;
-}
-
-/// Remove redundent spills in the same BB. Save those redundent spills in
-/// SpillsToRm, and save the spill to keep and its BB in SpillBBToSpill map.
-///
-void HoistSpillHelper::rmRedundantSpills(
-    SmallPtrSet<MachineInstr *, 16> &Spills,
-    SmallVectorImpl<MachineInstr *> &SpillsToRm,
-    DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill) {
-  // For each spill saw, check SpillBBToSpill[] and see if its BB already has
-  // another spill inside. If a BB contains more than one spill, only keep the
-  // earlier spill with smaller SlotIndex.
-  for (const auto CurrentSpill : Spills) {
-    MachineBasicBlock *Block = CurrentSpill->getParent();
-    MachineDomTreeNode *Node = MDT.DT->getNode(Block);
-    MachineInstr *PrevSpill = SpillBBToSpill[Node];
-    if (PrevSpill) {
-      SlotIndex PIdx = LIS.getInstructionIndex(*PrevSpill);
-      SlotIndex CIdx = LIS.getInstructionIndex(*CurrentSpill);
-      MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill : PrevSpill;
-      MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill : CurrentSpill;
-      SpillsToRm.push_back(SpillToRm);
-      SpillBBToSpill[MDT.DT->getNode(Block)] = SpillToKeep;
-    } else {
-      SpillBBToSpill[MDT.DT->getNode(Block)] = CurrentSpill;
-    }
-  }
-  for (const auto SpillToRm : SpillsToRm)
-    Spills.erase(SpillToRm);
-}
-
-/// Starting from \p Root find a top-down traversal order of the dominator
-/// tree to visit all basic blocks containing the elements of \p Spills.
-/// Redundant spills will be found and put into \p SpillsToRm at the same
-/// time. \p SpillBBToSpill will be populated as part of the process and
-/// maps a basic block to the first store occurring in the basic block.
-/// \post SpillsToRm.union(Spills\@post) == Spills\@pre
-///
-void HoistSpillHelper::getVisitOrders(
-    MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills,
-    SmallVectorImpl<MachineDomTreeNode *> &Orders,
-    SmallVectorImpl<MachineInstr *> &SpillsToRm,
-    DenseMap<MachineDomTreeNode *, unsigned> &SpillsToKeep,
-    DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill) {
-  // The set contains all the possible BB nodes to which we may hoist
-  // original spills.
-  SmallPtrSet<MachineDomTreeNode *, 8> WorkSet;
-  // Save the BB nodes on the path from the first BB node containing
-  // non-redundent spill to the Root node.
-  SmallPtrSet<MachineDomTreeNode *, 8> NodesOnPath;
-  // All the spills to be hoisted must originate from a single def instruction
-  // to the OrigReg. It means the def instruction should dominate all the spills
-  // to be hoisted. We choose the BB where the def instruction is located as
-  // the Root.
-  MachineDomTreeNode *RootIDomNode = MDT[Root]->getIDom();
-  // For every node on the dominator tree with spill, walk up on the dominator
-  // tree towards the Root node until it is reached. If there is other node
-  // containing spill in the middle of the path, the previous spill saw will
-  // be redundent and the node containing it will be removed. All the nodes on
-  // the path starting from the first node with non-redundent spill to the Root
-  // node will be added to the WorkSet, which will contain all the possible
-  // locations where spills may be hoisted to after the loop below is done.
-  for (const auto Spill : Spills) {
-    MachineBasicBlock *Block = Spill->getParent();
-    MachineDomTreeNode *Node = MDT[Block];
-    MachineInstr *SpillToRm = nullptr;
-    while (Node != RootIDomNode) {
-      // If Node dominates Block, and it already contains a spill, the spill in
-      // Block will be redundent.
-      if (Node != MDT[Block] && SpillBBToSpill[Node]) {
-        SpillToRm = SpillBBToSpill[MDT[Block]];
-        break;
-        /// If we see the Node already in WorkSet, the path from the Node to
-        /// the Root node must already be traversed by another spill.
-        /// Then no need to repeat.
-      } else if (WorkSet.count(Node)) {
-        break;
-      } else {
-        NodesOnPath.insert(Node);
-      }
-      Node = Node->getIDom();
-    }
-    if (SpillToRm) {
-      SpillsToRm.push_back(SpillToRm);
-    } else {
-      // Add a BB containing the original spills to SpillsToKeep -- i.e.,
-      // set the initial status before hoisting start. The value of BBs
-      // containing original spills is set to 0, in order to descriminate
-      // with BBs containing hoisted spills which will be inserted to
-      // SpillsToKeep later during hoisting.
-      SpillsToKeep[MDT[Block]] = 0;
-      WorkSet.insert(NodesOnPath.begin(), NodesOnPath.end());
-    }
-    NodesOnPath.clear();
-  }
-
-  // Sort the nodes in WorkSet in top-down order and save the nodes
-  // in Orders. Orders will be used for hoisting in runHoistSpills.
-  unsigned idx = 0;
-  Orders.push_back(MDT.DT->getNode(Root));
-  do {
-    MachineDomTreeNode *Node = Orders[idx++];
-    const std::vector<MachineDomTreeNode *> &Children = Node->getChildren();
-    unsigned NumChildren = Children.size();
-    for (unsigned i = 0; i != NumChildren; ++i) {
-      MachineDomTreeNode *Child = Children[i];
-      if (WorkSet.count(Child))
-        Orders.push_back(Child);
-    }
-  } while (idx != Orders.size());
-  assert(Orders.size() == WorkSet.size() &&
-         "Orders have different size with WorkSet");
-
-#ifndef NDEBUG
-  DEBUG(dbgs() << "Orders size is " << Orders.size() << "\n");
-  SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt = Orders.rbegin();
-  for (; RIt != Orders.rend(); RIt++)
-    DEBUG(dbgs() << "BB" << (*RIt)->getBlock()->getNumber() << ",");
-  DEBUG(dbgs() << "\n");
-#endif
-}
-
-/// Try to hoist spills according to BB hotness. The spills to removed will
-/// be saved in \p SpillsToRm. The spills to be inserted will be saved in
-/// \p SpillsToIns.
-///
-void HoistSpillHelper::runHoistSpills(
-    unsigned OrigReg, VNInfo &OrigVNI, SmallPtrSet<MachineInstr *, 16> &Spills,
-    SmallVectorImpl<MachineInstr *> &SpillsToRm,
-    DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns) {
-  // Visit order of dominator tree nodes.
-  SmallVector<MachineDomTreeNode *, 32> Orders;
-  // SpillsToKeep contains all the nodes where spills are to be inserted
-  // during hoisting. If the spill to be inserted is an original spill
-  // (not a hoisted one), the value of the map entry is 0. If the spill
-  // is a hoisted spill, the value of the map entry is the VReg to be used
-  // as the source of the spill.
-  DenseMap<MachineDomTreeNode *, unsigned> SpillsToKeep;
-  // Map from BB to the first spill inside of it.
-  DenseMap<MachineDomTreeNode *, MachineInstr *> SpillBBToSpill;
-
-  rmRedundantSpills(Spills, SpillsToRm, SpillBBToSpill);
-
-  MachineBasicBlock *Root = LIS.getMBBFromIndex(OrigVNI.def);
-  getVisitOrders(Root, Spills, Orders, SpillsToRm, SpillsToKeep,
-                 SpillBBToSpill);
-
-  // SpillsInSubTreeMap keeps the map from a dom tree node to a pair of
-  // nodes set and the cost of all the spills inside those nodes.
-  // The nodes set are the locations where spills are to be inserted
-  // in the subtree of current node.
-  typedef std::pair<SmallPtrSet<MachineDomTreeNode *, 16>, BlockFrequency>
-      NodesCostPair;
-  DenseMap<MachineDomTreeNode *, NodesCostPair> SpillsInSubTreeMap;
-  // Iterate Orders set in reverse order, which will be a bottom-up order
-  // in the dominator tree. Once we visit a dom tree node, we know its
-  // children have already been visited and the spill locations in the
-  // subtrees of all the children have been determined.
-  SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt = Orders.rbegin();
-  for (; RIt != Orders.rend(); RIt++) {
-    MachineBasicBlock *Block = (*RIt)->getBlock();
-
-    // If Block contains an original spill, simply continue.
-    if (SpillsToKeep.find(*RIt) != SpillsToKeep.end() && !SpillsToKeep[*RIt]) {
-      SpillsInSubTreeMap[*RIt].first.insert(*RIt);
-      // SpillsInSubTreeMap[*RIt].second contains the cost of spill.
-      SpillsInSubTreeMap[*RIt].second = MBFI.getBlockFreq(Block);
-      continue;
-    }
-
-    // Collect spills in subtree of current node (*RIt) to
-    // SpillsInSubTreeMap[*RIt].first.
-    const std::vector<MachineDomTreeNode *> &Children = (*RIt)->getChildren();
-    unsigned NumChildren = Children.size();
-    for (unsigned i = 0; i != NumChildren; ++i) {
-      MachineDomTreeNode *Child = Children[i];
-      if (SpillsInSubTreeMap.find(Child) == SpillsInSubTreeMap.end())
-        continue;
-      // SpillsInSubTreeMap[*RIt].second += SpillsInSubTreeMap[Child].second
-      // should be placed before getting the begin and end iterators of
-      // SpillsInSubTreeMap[Child].first, or else the iterators may be
-      // invalidated when SpillsInSubTreeMap[*RIt] is seen the first time
-      // and the map grows and then the original buckets in the map are moved.
-      SpillsInSubTreeMap[*RIt].second += SpillsInSubTreeMap[Child].second;
-      auto BI = SpillsInSubTreeMap[Child].first.begin();
-      auto EI = SpillsInSubTreeMap[Child].first.end();
-      SpillsInSubTreeMap[*RIt].first.insert(BI, EI);
-      SpillsInSubTreeMap.erase(Child);
-    }
-
-    // No spills in subtree, simply continue.
-    if (SpillsInSubTreeMap[*RIt].first.empty())
-      continue;
-
-    // Check whether Block is a possible candidate to insert spill.
-    unsigned LiveReg = 0;
-    if (!isSpillCandBB(OrigReg, OrigVNI, *Block, LiveReg))
-      continue;
-
-    // If there are multiple spills that could be merged, bias a little
-    // to hoist the spill.
-    BranchProbability MarginProb = (SpillsInSubTreeMap[*RIt].first.size() > 1)
-                                       ? BranchProbability(9, 10)
-                                       : BranchProbability(1, 1);
-    if (SpillsInSubTreeMap[*RIt].second >
-        MBFI.getBlockFreq(Block) * MarginProb) {
-      // Hoist: Move spills to current Block.
-      for (const auto SpillBB : SpillsInSubTreeMap[*RIt].first) {
-        // When SpillBB is a BB contains original spill, insert the spill
-        // to SpillsToRm.
-        if (SpillsToKeep.find(SpillBB) != SpillsToKeep.end() &&
-            !SpillsToKeep[SpillBB]) {
-          MachineInstr *SpillToRm = SpillBBToSpill[SpillBB];
-          SpillsToRm.push_back(SpillToRm);
-        }
-        // SpillBB will not contain spill anymore, remove it from SpillsToKeep.
-        SpillsToKeep.erase(SpillBB);
-      }
-      // Current Block is the BB containing the new hoisted spill. Add it to
-      // SpillsToKeep. LiveReg is the source of the new spill.
-      SpillsToKeep[*RIt] = LiveReg;
-      DEBUG({
-        dbgs() << "spills in BB: ";
-        for (const auto Rspill : SpillsInSubTreeMap[*RIt].first)
-          dbgs() << Rspill->getBlock()->getNumber() << " ";
-        dbgs() << "were promoted to BB" << (*RIt)->getBlock()->getNumber()
-               << "\n";
-      });
-      SpillsInSubTreeMap[*RIt].first.clear();
-      SpillsInSubTreeMap[*RIt].first.insert(*RIt);
-      SpillsInSubTreeMap[*RIt].second = MBFI.getBlockFreq(Block);
-    }
-  }
-  // For spills in SpillsToKeep with LiveReg set (i.e., not original spill),
-  // save them to SpillsToIns.
-  for (const auto Ent : SpillsToKeep) {
-    if (Ent.second)
-      SpillsToIns[Ent.first->getBlock()] = Ent.second;
-  }
-}
-
-/// For spills with equal values, remove redundent spills and hoist the left
-/// to less hot spots.
-///
-/// Spills with equal values will be collected into the same set in
-/// MergeableSpills when spill is inserted. These equal spills are originated
-/// from the same define instruction and are dominated by the instruction.
-/// Before hoisting all the equal spills, redundent spills inside in the same
-/// BB is first marked to be deleted. Then starting from spills left, walk up
-/// on the dominator tree towards the Root node where the define instruction
-/// is located, mark the dominated spills to be deleted along the way and
-/// collect the BB nodes on the path from non-dominated spills to the define
-/// instruction into a WorkSet. The nodes in WorkSet are the candidate places
-/// where we consider to hoist the spills. We iterate the WorkSet in bottom-up
-/// order, and for each node, we will decide whether to hoist spills inside
-/// its subtree to that node. In this way, we can get benefit locally even if
-/// hoisting all the equal spills to one cold place is impossible.
-///
-void HoistSpillHelper::hoistAllSpills(LiveRangeEdit &Edit) {
-  // Save the mapping between stackslot and its original reg.
-  DenseMap<int, unsigned> SlotToOrigReg;
-  for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
-    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
-    int Slot = VRM.getStackSlot(Reg);
-    if (Slot != VirtRegMap::NO_STACK_SLOT)
-      SlotToOrigReg[Slot] = VRM.getOriginal(Reg);
-    unsigned Original = VRM.getPreSplitReg(Reg);
-    if (!MRI.def_empty(Reg))
-      Virt2SiblingsMap[Original].insert(Reg);
-  }
-
-  // Each entry in MergeableSpills contains a spill set with equal values.
-  for (auto &Ent : MergeableSpills) {
-    int Slot = Ent.first.first;
-    unsigned OrigReg = SlotToOrigReg[Slot];
-    VNInfo *OrigVNI = Ent.first.second;
-    SmallPtrSet<MachineInstr *, 16> &EqValSpills = Ent.second;
-    if (Ent.second.empty())
-      continue;
-
-    DEBUG({
-      dbgs() << "\nFor Slot" << Slot << " and VN" << OrigVNI->id << ":\n"
-             << "Equal spills in BB: ";
-      for (const auto spill : EqValSpills)
-        dbgs() << spill->getParent()->getNumber() << " ";
-      dbgs() << "\n";
-    });
-
-    // SpillsToRm is the spill set to be removed from EqValSpills.
-    SmallVector<MachineInstr *, 16> SpillsToRm;
-    // SpillsToIns is the spill set to be newly inserted after hoisting.
-    DenseMap<MachineBasicBlock *, unsigned> SpillsToIns;
-
-    runHoistSpills(OrigReg, *OrigVNI, EqValSpills, SpillsToRm, SpillsToIns);
-
-    DEBUG({
-      dbgs() << "Finally inserted spills in BB: ";
-      for (const auto Ispill : SpillsToIns)
-        dbgs() << Ispill.first->getNumber() << " ";
-      dbgs() << "\nFinally removed spills in BB: ";
-      for (const auto Rspill : SpillsToRm)
-        dbgs() << Rspill->getParent()->getNumber() << " ";
-      dbgs() << "\n";
-    });
-
-    // Stack live range update.
-    LiveInterval &StackIntvl = LSS.getInterval(Slot);
-    if (!SpillsToIns.empty() || !SpillsToRm.empty()) {
-      LiveInterval &OrigLI = LIS.getInterval(OrigReg);
-      StackIntvl.MergeValueInAsValue(OrigLI, OrigVNI,
-                                     StackIntvl.getValNumInfo(0));
-    }
-
-    // Insert hoisted spills.
-    for (auto const Insert : SpillsToIns) {
-      MachineBasicBlock *BB = Insert.first;
-      unsigned LiveReg = Insert.second;
-      MachineBasicBlock::iterator MI = BB->getFirstTerminator();
-      TII.storeRegToStackSlot(*BB, MI, LiveReg, false, Slot,
-                              MRI.getRegClass(LiveReg), &TRI);
-      LIS.InsertMachineInstrRangeInMaps(std::prev(MI), MI);
-      ++NumSpills;
-    }
-
-    // Remove redundent spills or change them to dead instructions.
-    NumSpills -= SpillsToRm.size();
-    for (auto const RMEnt : SpillsToRm) {
-      RMEnt->setDesc(TII.get(TargetOpcode::KILL));
-      for (unsigned i = RMEnt->getNumOperands(); i; --i) {
-        MachineOperand &MO = RMEnt->getOperand(i - 1);
-        if (MO.isReg() && MO.isImplicit() && MO.isDef() && !MO.isDead())
-          RMEnt->RemoveOperand(i - 1);
-      }
-    }
-    Edit.eliminateDeadDefs(SpillsToRm, None, true);
-  }
-}

Modified: llvm/trunk/lib/CodeGen/LiveRangeEdit.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveRangeEdit.cpp?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/LiveRangeEdit.cpp (original)
+++ llvm/trunk/lib/CodeGen/LiveRangeEdit.cpp Fri Apr  8 10:17:43 2016
@@ -63,13 +63,10 @@ void LiveRangeEdit::scanRemattable(Alias
   for (VNInfo *VNI : getParent().valnos) {
     if (VNI->isUnused())
       continue;
-    unsigned Original = VRM->getOriginal(getReg());
-    LiveInterval &OrigLI = LIS.getInterval(Original);
-    VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
-    MachineInstr *DefMI = LIS.getInstructionFromIndex(OrigVNI->def);
+    MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def);
     if (!DefMI)
       continue;
-    checkRematerializable(OrigVNI, DefMI, aa);
+    checkRematerializable(VNI, DefMI, aa);
   }
   ScannedRemattable = true;
 }
@@ -116,18 +113,24 @@ bool LiveRangeEdit::allUsesAvailableAt(c
   return true;
 }
 
-bool LiveRangeEdit::canRematerializeAt(Remat &RM, VNInfo *OrigVNI,
-                                       SlotIndex UseIdx, bool cheapAsAMove) {
+bool LiveRangeEdit::canRematerializeAt(Remat &RM,
+                                       SlotIndex UseIdx,
+                                       bool cheapAsAMove) {
   assert(ScannedRemattable && "Call anyRematerializable first");
 
   // Use scanRemattable info.
-  if (!Remattable.count(OrigVNI))
+  if (!Remattable.count(RM.ParentVNI))
     return false;
 
   // No defining instruction provided.
   SlotIndex DefIdx;
-  assert(RM.OrigMI && "No defining instruction for remattable value");
-  DefIdx = LIS.getInstructionIndex(*RM.OrigMI);
+  if (RM.OrigMI)
+    DefIdx = LIS.getInstructionIndex(*RM.OrigMI);
+  else {
+    DefIdx = RM.ParentVNI->def;
+    RM.OrigMI = LIS.getInstructionFromIndex(DefIdx);
+    assert(RM.OrigMI && "No defining instruction for remattable value");
+  }
 
   // If only cheap remats were requested, bail out early.
   if (cheapAsAMove && !TII.isAsCheapAsAMove(RM.OrigMI))
@@ -258,15 +261,6 @@ void LiveRangeEdit::eliminateDeadDef(Mac
   // Collect virtual registers to be erased after MI is gone.
   SmallVector<unsigned, 8> RegsToErase;
   bool ReadsPhysRegs = false;
-  bool isOrigDef = false;
-  unsigned Dest;
-  if (VRM && MI->getOperand(0).isReg()) {
-    Dest = MI->getOperand(0).getReg();
-    unsigned Original = VRM->getOriginal(Dest);
-    LiveInterval &OrigLI = LIS.getInterval(Original);
-    VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx);
-    isOrigDef = SlotIndex::isSameInstr(OrigVNI->def, Idx);
-  }
 
   // Check for live intervals that may shrink
   for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
@@ -320,24 +314,11 @@ void LiveRangeEdit::eliminateDeadDef(Mac
     }
     DEBUG(dbgs() << "Converted physregs to:\t" << *MI);
   } else {
-    // If the dest of MI is an original reg, don't delete the inst. Replace
-    // the dest with a new reg, keep the inst for remat of other siblings.
-    // The inst is saved in LiveRangeEdit::DeadRemats and will be deleted
-    // after all the allocations of the func are done.
-    if (isOrigDef) {
-      unsigned NewDest = createFrom(Dest);
-      pop_back();
-      markDeadRemat(MI);
-      const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
-      MI->substituteRegister(Dest, NewDest, 0, TRI);
-      MI->getOperand(0).setIsDead(false);
-    } else {
-      if (TheDelegate)
-        TheDelegate->LRE_WillEraseInstruction(MI);
-      LIS.RemoveMachineInstrFromMaps(*MI);
-      MI->eraseFromParent();
-      ++NumDCEDeleted;
-    }
+    if (TheDelegate)
+      TheDelegate->LRE_WillEraseInstruction(MI);
+    LIS.RemoveMachineInstrFromMaps(*MI);
+    MI->eraseFromParent();
+    ++NumDCEDeleted;
   }
 
   // Erase any virtregs that are now empty and unused. There may be <undef>
@@ -351,9 +332,8 @@ void LiveRangeEdit::eliminateDeadDef(Mac
   }
 }
 
-void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
-                                      ArrayRef<unsigned> RegsBeingSpilled,
-                                      bool NoSplit) {
+void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
+                                      ArrayRef<unsigned> RegsBeingSpilled) {
   ToShrinkSet ToShrink;
 
   for (;;) {
@@ -375,9 +355,6 @@ void LiveRangeEdit::eliminateDeadDefs(Sm
     if (!LIS.shrinkToUses(LI, &Dead))
       continue;
 
-    if (NoSplit)
-      continue;
-
     // Don't create new intervals for a register being spilled.
     // The new intervals would have to be spilled anyway so its not worth it.
     // Also they currently aren't spilled so creating them and not spilling

Modified: llvm/trunk/lib/CodeGen/RegAllocBase.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocBase.cpp?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/RegAllocBase.cpp (original)
+++ llvm/trunk/lib/CodeGen/RegAllocBase.cpp Fri Apr  8 10:17:43 2016
@@ -153,12 +153,3 @@ void RegAllocBase::allocatePhysRegs() {
     }
   }
 }
-
-void RegAllocBase::postOptimization() {
-  spiller().postOptimization();
-  for (auto DeadInst : DeadRemats) {
-    LIS->RemoveMachineInstrFromMaps(*DeadInst);
-    DeadInst->eraseFromParent();
-  }
-  DeadRemats.clear();
-}

Modified: llvm/trunk/lib/CodeGen/RegAllocBase.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocBase.h?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/RegAllocBase.h (original)
+++ llvm/trunk/lib/CodeGen/RegAllocBase.h Fri Apr  8 10:17:43 2016
@@ -65,12 +65,6 @@ protected:
   LiveRegMatrix *Matrix;
   RegisterClassInfo RegClassInfo;
 
-  /// Inst which is a def of an original reg and whose defs are already all
-  /// dead after remat is saved in DeadRemats. The deletion of such inst is
-  /// postponed till all the allocations are done, so its remat expr is
-  /// always available for the remat of all the siblings of the original reg.
-  SmallPtrSet<MachineInstr *, 32> DeadRemats;
-
   RegAllocBase()
     : TRI(nullptr), MRI(nullptr), VRM(nullptr), LIS(nullptr), Matrix(nullptr) {}
 
@@ -83,10 +77,6 @@ protected:
   // physical register assignments.
   void allocatePhysRegs();
 
-  // Include spiller post optimization and removing dead defs left because of
-  // rematerialization.
-  virtual void postOptimization();
-
   // Get a temporary reference to a Spiller instance.
   virtual Spiller &spiller() = 0;
 

Modified: llvm/trunk/lib/CodeGen/RegAllocBasic.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocBasic.cpp?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/RegAllocBasic.cpp (original)
+++ llvm/trunk/lib/CodeGen/RegAllocBasic.cpp Fri Apr  8 10:17:43 2016
@@ -199,7 +199,7 @@ bool RABasic::spillInterferences(LiveInt
     Matrix->unassign(Spill);
 
     // Spill the extracted interval.
-    LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats);
+    LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM);
     spiller().spill(LRE);
   }
   return true;
@@ -258,7 +258,7 @@ unsigned RABasic::selectOrSplit(LiveInte
   DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
   if (!VirtReg.isSpillable())
     return ~0u;
-  LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats);
+  LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM);
   spiller().spill(LRE);
 
   // The live virtual register requesting allocation was spilled, so tell
@@ -283,7 +283,6 @@ bool RABasic::runOnMachineFunction(Machi
   SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
 
   allocatePhysRegs();
-  postOptimization();
 
   // Diagnostic output before rewriting
   DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");

Modified: llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp (original)
+++ llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp Fri Apr  8 10:17:43 2016
@@ -12,6 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/Passes.h"
 #include "AllocationOrder.h"
 #include "InterferenceCache.h"
 #include "LiveDebugVariables.h"
@@ -32,7 +33,6 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
@@ -44,7 +44,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <queue>
 
@@ -56,14 +55,14 @@ STATISTIC(NumGlobalSplits, "Number of sp
 STATISTIC(NumLocalSplits,  "Number of split local live ranges");
 STATISTIC(NumEvicted,      "Number of interferences evicted");
 
-static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode(
-    "split-spill-mode", cl::Hidden,
-    cl::desc("Spill mode for splitting live ranges"),
-    cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
-               clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
-               clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"),
-               clEnumValEnd),
-    cl::init(SplitEditor::SM_Speed));
+static cl::opt<SplitEditor::ComplementSpillMode>
+SplitSpillMode("split-spill-mode", cl::Hidden,
+  cl::desc("Spill mode for splitting live ranges"),
+  cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
+             clEnumValN(SplitEditor::SM_Size,  "size",  "Optimize for size"),
+             clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"),
+             clEnumValEnd),
+  cl::init(SplitEditor::SM_Partition));
 
 static cl::opt<unsigned>
 LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden,
@@ -1466,7 +1465,7 @@ unsigned RAGreedy::doRegionSplit(LiveInt
                                  SmallVectorImpl<unsigned> &NewVRegs) {
   SmallVector<unsigned, 8> UsedCands;
   // Prepare split editor.
-  LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
+  LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
   SE->reset(LREdit, SplitSpillMode);
 
   // Assign all edge bundles to the preferred candidate, or NoCand.
@@ -1514,7 +1513,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInt
   assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
   unsigned Reg = VirtReg.reg;
   bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
-  LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
+  LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
   SE->reset(LREdit, SplitSpillMode);
   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
@@ -1586,7 +1585,7 @@ RAGreedy::tryInstructionSplit(LiveInterv
 
   // Always enable split spill mode, since we're effectively spilling to a
   // register.
-  LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
+  LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
   SE->reset(LREdit, SplitEditor::SM_Size);
 
   ArrayRef<SlotIndex> Uses = SA->getUseSlots();
@@ -1909,7 +1908,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInt
                << '-' << Uses[BestAfter] << ", " << BestDiff
                << ", " << (BestAfter - BestBefore + 1) << " instrs\n");
 
-  LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
+  LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
   SE->reset(LREdit);
 
   SE->openIntv();
@@ -2552,7 +2551,7 @@ unsigned RAGreedy::selectOrSplitImpl(Liv
     NewVRegs.push_back(VirtReg.reg);
   } else {
     NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
-    LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
+    LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
     spiller().spill(LRE);
     setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
 
@@ -2610,8 +2609,6 @@ bool RAGreedy::runOnMachineFunction(Mach
 
   allocatePhysRegs();
   tryHintsRecoloring();
-  postOptimization();
-
   releaseMemory();
   return true;
 }

Modified: llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp (original)
+++ llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp Fri Apr  8 10:17:43 2016
@@ -123,12 +123,6 @@ private:
 
   RegSet VRegsToAlloc, EmptyIntervalVRegs;
 
-  /// Inst which is a def of an original reg and whose defs are already all
-  /// dead after remat is saved in DeadRemats. The deletion of such inst is
-  /// postponed till all the allocations are done, so its remat expr is
-  /// always available for the remat of all the siblings of the original reg.
-  SmallPtrSet<MachineInstr *, 32> DeadRemats;
-
   /// \brief Finds the initial set of vreg intervals to allocate.
   void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS);
 
@@ -152,7 +146,6 @@ private:
   void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS,
                      VirtRegMap &VRM) const;
 
-  void postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS);
 };
 
 char RegAllocPBQP::ID = 0;
@@ -638,8 +631,7 @@ void RegAllocPBQP::spillVReg(unsigned VR
                              VirtRegMap &VRM, Spiller &VRegSpiller) {
 
   VRegsToAlloc.erase(VReg);
-  LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM,
-                    nullptr, &DeadRemats);
+  LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM);
   VRegSpiller.spill(LRE);
 
   const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
@@ -721,16 +713,6 @@ void RegAllocPBQP::finalizeAlloc(Machine
   }
 }
 
-void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) {
-  VRegSpiller.postOptimization();
-  /// Remove dead defs because of rematerialization.
-  for (auto DeadInst : DeadRemats) {
-    LIS.RemoveMachineInstrFromMaps(*DeadInst);
-    DeadInst->eraseFromParent();
-  }
-  DeadRemats.clear();
-}
-
 static inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned Size,
                                          unsigned NumInstr) {
   // All intervals have a spill weight that is mostly proportional to the number
@@ -816,7 +798,6 @@ bool RegAllocPBQP::runOnMachineFunction(
 
   // Finalise allocation, allocate empty ranges.
   finalizeAlloc(MF, LIS, VRM);
-  postOptimization(*VRegSpiller, LIS);
   VRegsToAlloc.clear();
   EmptyIntervalVRegs.clear();
 

Modified: llvm/trunk/lib/CodeGen/Spiller.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/Spiller.h?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/Spiller.h (original)
+++ llvm/trunk/lib/CodeGen/Spiller.h Fri Apr  8 10:17:43 2016
@@ -16,7 +16,6 @@ namespace llvm {
   class MachineFunction;
   class MachineFunctionPass;
   class VirtRegMap;
-  class LiveIntervals;
 
   /// Spiller interface.
   ///
@@ -29,7 +28,7 @@ namespace llvm {
 
     /// spill - Spill the LRE.getParent() live interval.
     virtual void spill(LiveRangeEdit &LRE) = 0;
-    virtual void postOptimization(){};
+
   };
 
   /// Create and return a spiller that will insert spill code directly instead
@@ -37,6 +36,7 @@ namespace llvm {
   Spiller *createInlineSpiller(MachineFunctionPass &pass,
                                MachineFunction &mf,
                                VirtRegMap &vrm);
+
 }
 
 #endif

Modified: llvm/trunk/lib/CodeGen/SplitKit.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SplitKit.cpp?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SplitKit.cpp (original)
+++ llvm/trunk/lib/CodeGen/SplitKit.cpp Fri Apr  8 10:17:43 2016
@@ -16,7 +16,6 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
-#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
@@ -431,13 +430,8 @@ VNInfo *SplitEditor::defFromParent(unsig
   bool Late = RegIdx != 0;
 
   // Attempt cheap-as-a-copy rematerialization.
-  unsigned Original = VRM.getOriginal(Edit->get(RegIdx));
-  LiveInterval &OrigLI = LIS.getInterval(Original);
-  VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
   LiveRangeEdit::Remat RM(ParentVNI);
-  RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
-
-  if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) {
+  if (Edit->canRematerializeAt(RM, UseIdx, true)) {
     Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
     ++NumRemats;
   } else {
@@ -722,62 +716,7 @@ SplitEditor::findShallowDominator(Machin
   }
 }
 
-void SplitEditor::computeRedundantBackCopies(
-    DenseSet<unsigned> &NotToHoistSet, SmallVectorImpl<VNInfo *> &BackCopies) {
-  LiveInterval *LI = &LIS.getInterval(Edit->get(0));
-  LiveInterval *Parent = &Edit->getParent();
-  SmallVector<SmallPtrSet<VNInfo *, 8>, 8> EqualVNs(Parent->getNumValNums());
-  SmallPtrSet<VNInfo *, 8> DominatedVNIs;
-
-  // Aggregate VNIs having the same value as ParentVNI.
-  for (VNInfo *VNI : LI->valnos) {
-    if (VNI->isUnused())
-      continue;
-    VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
-    EqualVNs[ParentVNI->id].insert(VNI);
-  }
-
-  // For VNI aggregation of each ParentVNI, collect dominated, i.e.,
-  // redundant VNIs to BackCopies.
-  for (unsigned i = 0, e = Parent->getNumValNums(); i != e; ++i) {
-    VNInfo *ParentVNI = Parent->getValNumInfo(i);
-    if (!NotToHoistSet.count(ParentVNI->id))
-      continue;
-    SmallPtrSetIterator<VNInfo *> It1 = EqualVNs[ParentVNI->id].begin();
-    SmallPtrSetIterator<VNInfo *> It2 = It1;
-    for (; It1 != EqualVNs[ParentVNI->id].end(); ++It1) {
-      It2 = It1;
-      for (++It2; It2 != EqualVNs[ParentVNI->id].end(); ++It2) {
-        if (DominatedVNIs.count(*It1) || DominatedVNIs.count(*It2))
-          continue;
-
-        MachineBasicBlock *MBB1 = LIS.getMBBFromIndex((*It1)->def);
-        MachineBasicBlock *MBB2 = LIS.getMBBFromIndex((*It2)->def);
-        if (MBB1 == MBB2) {
-          DominatedVNIs.insert((*It1)->def < (*It2)->def ? (*It2) : (*It1));
-        } else if (MDT.dominates(MBB1, MBB2)) {
-          DominatedVNIs.insert(*It2);
-        } else if (MDT.dominates(MBB2, MBB1)) {
-          DominatedVNIs.insert(*It1);
-        }
-      }
-    }
-    if (!DominatedVNIs.empty()) {
-      forceRecompute(0, ParentVNI);
-      for (auto VNI : DominatedVNIs) {
-        BackCopies.push_back(VNI);
-      }
-      DominatedVNIs.clear();
-    }
-  }
-}
-
-/// For SM_Size mode, find a common dominator for all the back-copies for
-/// the same ParentVNI and hoist the backcopies to the dominator BB.
-/// For SM_Speed mode, if the common dominator is hot and it is not beneficial
-/// to do the hoisting, simply remove the dominated backcopies for the same
-/// ParentVNI.
-void SplitEditor::hoistCopies() {
+void SplitEditor::hoistCopiesForSize() {
   // Get the complement interval, always RegIdx 0.
   LiveInterval *LI = &LIS.getInterval(Edit->get(0));
   LiveInterval *Parent = &Edit->getParent();
@@ -786,11 +725,6 @@ void SplitEditor::hoistCopies() {
   // indexed by ParentVNI->id.
   typedef std::pair<MachineBasicBlock*, SlotIndex> DomPair;
   SmallVector<DomPair, 8> NearestDom(Parent->getNumValNums());
-  // The total cost of all the back-copies for each ParentVNI.
-  SmallVector<BlockFrequency, 8> Costs(Parent->getNumValNums());
-  // The ParentVNI->id set for which hoisting back-copies are not beneficial
-  // for Speed.
-  DenseSet<unsigned> NotToHoistSet;
 
   // Find the nearest common dominator for parent values with multiple
   // back-copies.  If a single back-copy dominates, put it in DomPair.second.
@@ -806,7 +740,6 @@ void SplitEditor::hoistCopies() {
       continue;
 
     MachineBasicBlock *ValMBB = LIS.getMBBFromIndex(VNI->def);
-
     DomPair &Dom = NearestDom[ParentVNI->id];
 
     // Keep directly defined parent values.  This is either a PHI or an
@@ -841,7 +774,6 @@ void SplitEditor::hoistCopies() {
       else if (Near != Dom.first)
         // None dominate. Hoist to common dominator, need new def.
         Dom = DomPair(Near, SlotIndex());
-      Costs[ParentVNI->id] += MBFI.getBlockFreq(ValMBB);
     }
 
     DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' << VNI->def
@@ -860,11 +792,6 @@ void SplitEditor::hoistCopies() {
     MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(ParentVNI->def);
     // Get a less loopy dominator than Dom.first.
     Dom.first = findShallowDominator(Dom.first, DefMBB);
-    if (SpillMode == SM_Speed &&
-        MBFI.getBlockFreq(Dom.first) > Costs[ParentVNI->id]) {
-      NotToHoistSet.insert(ParentVNI->id);
-      continue;
-    }
     SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot();
     Dom.second =
       defFromParent(0, ParentVNI, Last, *Dom.first,
@@ -879,18 +806,11 @@ void SplitEditor::hoistCopies() {
       continue;
     VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
     const DomPair &Dom = NearestDom[ParentVNI->id];
-    if (!Dom.first || Dom.second == VNI->def ||
-        NotToHoistSet.count(ParentVNI->id))
+    if (!Dom.first || Dom.second == VNI->def)
       continue;
     BackCopies.push_back(VNI);
     forceRecompute(0, ParentVNI);
   }
-
-  // If it is not beneficial to hoist all the BackCopies, simply remove
-  // redundant BackCopies in speed mode.
-  if (SpillMode == SM_Speed && !NotToHoistSet.empty())
-    computeRedundantBackCopies(NotToHoistSet, BackCopies);
-
   removeBackCopies(BackCopies);
 }
 
@@ -1084,8 +1004,6 @@ void SplitEditor::deleteRematVictims() {
       // Dead defs end at the dead slot.
       if (S.end != S.valno->def.getDeadSlot())
         continue;
-      if (S.valno->isPHIDef())
-        continue;
       MachineInstr *MI = LIS.getInstructionFromIndex(S.valno->def);
       assert(MI && "Missing instruction for dead def");
       MI->addRegisterDead(LI->reg, &TRI);
@@ -1130,9 +1048,10 @@ void SplitEditor::finish(SmallVectorImpl
     // Leave all back-copies as is.
     break;
   case SM_Size:
+    hoistCopiesForSize();
+    break;
   case SM_Speed:
-    // hoistCopies will behave differently between size and speed.
-    hoistCopies();
+    llvm_unreachable("Spill mode 'speed' not implemented yet");
   }
 
   // Transfer the simply mapped values, check if any are skipped.

Modified: llvm/trunk/lib/CodeGen/SplitKit.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SplitKit.h?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SplitKit.h (original)
+++ llvm/trunk/lib/CodeGen/SplitKit.h Fri Apr  8 10:17:43 2016
@@ -18,7 +18,6 @@
 #include "LiveRangeCalc.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntervalMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 
@@ -330,14 +329,9 @@ private:
   MachineBasicBlock *findShallowDominator(MachineBasicBlock *MBB,
                                           MachineBasicBlock *DefMBB);
 
-  /// Find out all the backCopies dominated by others.
-  void computeRedundantBackCopies(DenseSet<unsigned> &NotToHoistSet,
-                                  SmallVectorImpl<VNInfo *> &BackCopies);
-
-  /// Hoist back-copies to the complement interval. It tries to hoist all
-  /// the back-copies to one BB if it is beneficial, or else simply remove
-  /// redundent backcopies dominated by others.
-  void hoistCopies();
+  /// hoistCopiesForSize - Hoist back-copies to the complement interval in a
+  /// way that minimizes code size. This implements the SM_Size spill mode.
+  void hoistCopiesForSize();
 
   /// transferValues - Transfer values to the new ranges.
   /// Return true if any ranges were skipped.

Added: llvm/trunk/test/CodeGen/AArch64/aarch64-deferred-spilling.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/aarch64-deferred-spilling.ll?rev=265790&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/aarch64-deferred-spilling.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/aarch64-deferred-spilling.ll Fri Apr  8 10:17:43 2016
@@ -0,0 +1,514 @@
+;RUN: llc < %s -mtriple=aarch64--linux-android -regalloc=greedy -enable-deferred-spilling=true -mcpu=cortex-a57 -disable-fp-elim | FileCheck %s --check-prefix=CHECK --check-prefix=DEFERRED
+;RUN: llc < %s -mtriple=aarch64--linux-android -regalloc=greedy -enable-deferred-spilling=false -mcpu=cortex-a57 -disable-fp-elim | FileCheck %s --check-prefix=CHECK --check-prefix=REGULAR
+
+; Check that we do not end up with useless spill code.
+;
+; Move to the basic block we are interested in.
+;
+; CHECK: // %if.then.120
+;
+; REGULAR: str w21, [sp, #[[OFFSET:[0-9]+]]] // 4-byte Folded Spill
+; Check that w21 wouldn't need to be spilled since it is never reused.
+; REGULAR-NOT: {{[wx]}}21{{,?}}
+;
+; Check that w22 is used to carry a value through the call.
+; DEFERRED-NOT: str {{[wx]}}22,
+; DEFERRED: mov {{[wx]}}22,
+; DEFERRED-NOT: str {{[wx]}}22,
+;
+; CHECK:        bl      fprintf
+;
+; DEFERRED-NOT: ldr {{[wx]}}22,
+; DEFERRED: mov {{[wx][0-9]+}}, {{[wx]}}22
+; DEFERRED-NOT: ldr {{[wx]}}22,
+;
+; REGULAR-NOT: {{[wx]}}21{{,?}}
+; REGULAR: ldr w21, [sp, #[[OFFSET]]] // 4-byte Folded Reload
+;
+; End of the basic block we are interested in.
+; CHECK:        b
+; CHECK: {{[^:]+}}: // %sw.bb.123
+
+%struct.__sFILE = type { i8*, i32, i32, i32, i32, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sbuf = type { i8*, i64 }
+%struct.DState = type { %struct.bz_stream*, i32, i8, i32, i8, i32, i32, i32, i32, i32, i8, i32, i32, i32, i32, i32, [256 x i32], i32, [257 x i32], [257 x i32], i32*, i16*, i8*, i32, i32, i32, i32, i32, [256 x i8], [16 x i8], [256 x i8], [4096 x i8], [16 x i32], [18002 x i8], [18002 x i8], [6 x [258 x i8]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, i32*, i32* }
+%struct.bz_stream = type { i8*, i32, i32, i32, i8*, i32, i32, i32, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8* }
+
+ at __sF = external global [0 x %struct.__sFILE], align 8
+ at .str = private unnamed_addr constant [20 x i8] c"\0A    [%d: stuff+mf \00", align 1
+
+declare i32 @fprintf(%struct.__sFILE* nocapture, i8* nocapture readonly, ...)
+
+declare void @bar(i32)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
+
+define i32 @foo(%struct.DState* %s) {
+entry:
+  %state = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 1
+  %tmp = load i32, i32* %state, align 4
+  %cmp = icmp eq i32 %tmp, 10
+  %save_i = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 40
+  br i1 %cmp, label %if.end.thread, label %if.end
+
+if.end.thread:                                    ; preds = %entry
+  %save_j = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 41
+  %save_t = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 42
+  %save_alphaSize = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 43
+  %save_nGroups = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 44
+  %save_nSelectors = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 45
+  %save_EOB = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 46
+  %save_groupNo = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 47
+  %save_groupPos = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 48
+  %save_nextSym = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 49
+  %save_nblockMAX = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 50
+  %save_nblock = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 51
+  %save_es = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 52
+  %save_N = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 53
+  %save_curr = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 54
+  %save_zt = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 55
+  %save_zn = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 56
+  %save_zvec = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 57
+  %save_zj = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 58
+  %tmp1 = bitcast i32* %save_i to i8*
+  call void @llvm.memset.p0i8.i64(i8* %tmp1, i8 0, i64 108, i32 4, i1 false)
+  br label %sw.default
+
+if.end:                                           ; preds = %entry
+  %.pre = load i32, i32* %save_i, align 4
+  %save_j3.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 41
+  %.pre406 = load i32, i32* %save_j3.phi.trans.insert, align 4
+  %save_t4.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 42
+  %.pre407 = load i32, i32* %save_t4.phi.trans.insert, align 4
+  %save_alphaSize5.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 43
+  %.pre408 = load i32, i32* %save_alphaSize5.phi.trans.insert, align 4
+  %save_nGroups6.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 44
+  %.pre409 = load i32, i32* %save_nGroups6.phi.trans.insert, align 4
+  %save_nSelectors7.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 45
+  %.pre410 = load i32, i32* %save_nSelectors7.phi.trans.insert, align 4
+  %save_EOB8.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 46
+  %.pre411 = load i32, i32* %save_EOB8.phi.trans.insert, align 4
+  %save_groupNo9.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 47
+  %.pre412 = load i32, i32* %save_groupNo9.phi.trans.insert, align 4
+  %save_groupPos10.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 48
+  %.pre413 = load i32, i32* %save_groupPos10.phi.trans.insert, align 4
+  %save_nextSym11.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 49
+  %.pre414 = load i32, i32* %save_nextSym11.phi.trans.insert, align 4
+  %save_nblockMAX12.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 50
+  %.pre415 = load i32, i32* %save_nblockMAX12.phi.trans.insert, align 4
+  %save_nblock13.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 51
+  %.pre416 = load i32, i32* %save_nblock13.phi.trans.insert, align 4
+  %save_es14.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 52
+  %.pre417 = load i32, i32* %save_es14.phi.trans.insert, align 4
+  %save_N15.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 53
+  %.pre418 = load i32, i32* %save_N15.phi.trans.insert, align 4
+  %save_curr16.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 54
+  %.pre419 = load i32, i32* %save_curr16.phi.trans.insert, align 4
+  %save_zt17.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 55
+  %.pre420 = load i32, i32* %save_zt17.phi.trans.insert, align 4
+  %save_zn18.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 56
+  %.pre421 = load i32, i32* %save_zn18.phi.trans.insert, align 4
+  %save_zvec19.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 57
+  %.pre422 = load i32, i32* %save_zvec19.phi.trans.insert, align 4
+  %save_zj20.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 58
+  %.pre423 = load i32, i32* %save_zj20.phi.trans.insert, align 4
+  switch i32 %tmp, label %sw.default [
+    i32 13, label %sw.bb
+    i32 14, label %if.end.sw.bb.65_crit_edge
+    i32 25, label %if.end.sw.bb.123_crit_edge
+  ]
+
+if.end.sw.bb.123_crit_edge:                       ; preds = %if.end
+  %.pre433 = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8
+  br label %sw.bb.123
+
+if.end.sw.bb.65_crit_edge:                        ; preds = %if.end
+  %bsLive69.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8
+  %.pre426 = load i32, i32* %bsLive69.phi.trans.insert, align 4
+  br label %sw.bb.65
+
+sw.bb:                                            ; preds = %if.end
+  %sunkaddr = ptrtoint %struct.DState* %s to i64
+  %sunkaddr485 = add i64 %sunkaddr, 8
+  %sunkaddr486 = inttoptr i64 %sunkaddr485 to i32*
+  store i32 13, i32* %sunkaddr486, align 4
+  %bsLive = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8
+  %tmp2 = load i32, i32* %bsLive, align 4
+  %cmp28.400 = icmp sgt i32 %tmp2, 7
+  br i1 %cmp28.400, label %sw.bb.if.then.29_crit_edge, label %if.end.33.lr.ph
+
+sw.bb.if.then.29_crit_edge:                       ; preds = %sw.bb
+  %sunkaddr487 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr488 = add i64 %sunkaddr487, 32
+  %sunkaddr489 = inttoptr i64 %sunkaddr488 to i32*
+  %.pre425 = load i32, i32* %sunkaddr489, align 4
+  br label %if.then.29
+
+if.end.33.lr.ph:                                  ; preds = %sw.bb
+  %tmp3 = bitcast %struct.DState* %s to %struct.bz_stream**
+  %.pre424 = load %struct.bz_stream*, %struct.bz_stream** %tmp3, align 8
+  %avail_in.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre424, i64 0, i32 1
+  %.pre430 = load i32, i32* %avail_in.phi.trans.insert, align 4
+  %tmp4 = add i32 %.pre430, -1
+  br label %if.end.33
+
+if.then.29:                                       ; preds = %while.body.backedge, %sw.bb.if.then.29_crit_edge
+  %tmp5 = phi i32 [ %.pre425, %sw.bb.if.then.29_crit_edge ], [ %or, %while.body.backedge ]
+  %.lcssa393 = phi i32 [ %tmp2, %sw.bb.if.then.29_crit_edge ], [ %add, %while.body.backedge ]
+  %sub = add nsw i32 %.lcssa393, -8
+  %shr = lshr i32 %tmp5, %sub
+  %and = and i32 %shr, 255
+  %sunkaddr491 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr492 = add i64 %sunkaddr491, 36
+  %sunkaddr493 = inttoptr i64 %sunkaddr492 to i32*
+  store i32 %sub, i32* %sunkaddr493, align 4
+  %blockSize100k = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 9
+  store i32 %and, i32* %blockSize100k, align 4
+  %and.off = add nsw i32 %and, -49
+  %tmp6 = icmp ugt i32 %and.off, 8
+  br i1 %tmp6, label %save_state_and_return, label %if.end.62
+
+if.end.33:                                        ; preds = %while.body.backedge, %if.end.33.lr.ph
+  %lsr.iv482 = phi i32 [ %tmp4, %if.end.33.lr.ph ], [ %lsr.iv.next483, %while.body.backedge ]
+  %tmp7 = phi i32 [ %tmp2, %if.end.33.lr.ph ], [ %add, %while.body.backedge ]
+  %cmp35 = icmp eq i32 %lsr.iv482, -1
+  br i1 %cmp35, label %save_state_and_return, label %if.end.37
+
+if.end.37:                                        ; preds = %if.end.33
+  %tmp8 = bitcast %struct.bz_stream* %.pre424 to i8**
+  %sunkaddr494 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr495 = add i64 %sunkaddr494, 32
+  %sunkaddr496 = inttoptr i64 %sunkaddr495 to i32*
+  %tmp9 = load i32, i32* %sunkaddr496, align 4
+  %shl = shl i32 %tmp9, 8
+  %tmp10 = load i8*, i8** %tmp8, align 8
+  %tmp11 = load i8, i8* %tmp10, align 1
+  %conv = zext i8 %tmp11 to i32
+  %or = or i32 %conv, %shl
+  store i32 %or, i32* %sunkaddr496, align 4
+  %add = add nsw i32 %tmp7, 8
+  %sunkaddr497 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr498 = add i64 %sunkaddr497, 36
+  %sunkaddr499 = inttoptr i64 %sunkaddr498 to i32*
+  store i32 %add, i32* %sunkaddr499, align 4
+  %incdec.ptr = getelementptr inbounds i8, i8* %tmp10, i64 1
+  store i8* %incdec.ptr, i8** %tmp8, align 8
+  %sunkaddr500 = ptrtoint %struct.bz_stream* %.pre424 to i64
+  %sunkaddr501 = add i64 %sunkaddr500, 8
+  %sunkaddr502 = inttoptr i64 %sunkaddr501 to i32*
+  store i32 %lsr.iv482, i32* %sunkaddr502, align 4
+  %sunkaddr503 = ptrtoint %struct.bz_stream* %.pre424 to i64
+  %sunkaddr504 = add i64 %sunkaddr503, 12
+  %sunkaddr505 = inttoptr i64 %sunkaddr504 to i32*
+  %tmp12 = load i32, i32* %sunkaddr505, align 4
+  %inc = add i32 %tmp12, 1
+  store i32 %inc, i32* %sunkaddr505, align 4
+  %cmp49 = icmp eq i32 %inc, 0
+  br i1 %cmp49, label %if.then.51, label %while.body.backedge
+
+if.then.51:                                       ; preds = %if.end.37
+  %sunkaddr506 = ptrtoint %struct.bz_stream* %.pre424 to i64
+  %sunkaddr507 = add i64 %sunkaddr506, 16
+  %sunkaddr508 = inttoptr i64 %sunkaddr507 to i32*
+  %tmp13 = load i32, i32* %sunkaddr508, align 4
+  %inc53 = add i32 %tmp13, 1
+  store i32 %inc53, i32* %sunkaddr508, align 4
+  br label %while.body.backedge
+
+while.body.backedge:                              ; preds = %if.then.51, %if.end.37
+  %lsr.iv.next483 = add i32 %lsr.iv482, -1
+  %cmp28 = icmp sgt i32 %add, 7
+  br i1 %cmp28, label %if.then.29, label %if.end.33
+
+if.end.62:                                        ; preds = %if.then.29
+  %sub64 = add nsw i32 %and, -48
+  %sunkaddr509 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr510 = add i64 %sunkaddr509, 40
+  %sunkaddr511 = inttoptr i64 %sunkaddr510 to i32*
+  store i32 %sub64, i32* %sunkaddr511, align 4
+  br label %sw.bb.65
+
+sw.bb.65:                                         ; preds = %if.end.62, %if.end.sw.bb.65_crit_edge
+  %bsLive69.pre-phi = phi i32* [ %bsLive69.phi.trans.insert, %if.end.sw.bb.65_crit_edge ], [ %bsLive, %if.end.62 ]
+  %tmp14 = phi i32 [ %.pre426, %if.end.sw.bb.65_crit_edge ], [ %sub, %if.end.62 ]
+  %sunkaddr512 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr513 = add i64 %sunkaddr512, 8
+  %sunkaddr514 = inttoptr i64 %sunkaddr513 to i32*
+  store i32 14, i32* %sunkaddr514, align 4
+  %cmp70.397 = icmp sgt i32 %tmp14, 7
+  br i1 %cmp70.397, label %if.then.72, label %if.end.82.lr.ph
+
+if.end.82.lr.ph:                                  ; preds = %sw.bb.65
+  %tmp15 = bitcast %struct.DState* %s to %struct.bz_stream**
+  %.pre427 = load %struct.bz_stream*, %struct.bz_stream** %tmp15, align 8
+  %avail_in84.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre427, i64 0, i32 1
+  %.pre431 = load i32, i32* %avail_in84.phi.trans.insert, align 4
+  %tmp16 = add i32 %.pre431, -1
+  br label %if.end.82
+
+if.then.72:                                       ; preds = %while.body.68.backedge, %sw.bb.65
+  %.lcssa390 = phi i32 [ %tmp14, %sw.bb.65 ], [ %add97, %while.body.68.backedge ]
+  %sub76 = add nsw i32 %.lcssa390, -8
+  %sunkaddr516 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr517 = add i64 %sunkaddr516, 36
+  %sunkaddr518 = inttoptr i64 %sunkaddr517 to i32*
+  store i32 %sub76, i32* %sunkaddr518, align 4
+  %currBlockNo = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 11
+  %tmp17 = load i32, i32* %currBlockNo, align 4
+  %inc117 = add nsw i32 %tmp17, 1
+  store i32 %inc117, i32* %currBlockNo, align 4
+  %verbosity = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 12
+  %tmp18 = load i32, i32* %verbosity, align 4
+  %cmp118 = icmp sgt i32 %tmp18, 1
+  br i1 %cmp118, label %if.then.120, label %sw.bb.123, !prof !0
+
+if.end.82:                                        ; preds = %while.body.68.backedge, %if.end.82.lr.ph
+  %lsr.iv480 = phi i32 [ %tmp16, %if.end.82.lr.ph ], [ %lsr.iv.next481, %while.body.68.backedge ]
+  %tmp19 = phi i32 [ %tmp14, %if.end.82.lr.ph ], [ %add97, %while.body.68.backedge ]
+  %cmp85 = icmp eq i32 %lsr.iv480, -1
+  br i1 %cmp85, label %save_state_and_return, label %if.end.88
+
+if.end.88:                                        ; preds = %if.end.82
+  %tmp20 = bitcast %struct.bz_stream* %.pre427 to i8**
+  %sunkaddr519 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr520 = add i64 %sunkaddr519, 32
+  %sunkaddr521 = inttoptr i64 %sunkaddr520 to i32*
+  %tmp21 = load i32, i32* %sunkaddr521, align 4
+  %shl90 = shl i32 %tmp21, 8
+  %tmp22 = load i8*, i8** %tmp20, align 8
+  %tmp23 = load i8, i8* %tmp22, align 1
+  %conv93 = zext i8 %tmp23 to i32
+  %or94 = or i32 %conv93, %shl90
+  store i32 %or94, i32* %sunkaddr521, align 4
+  %add97 = add nsw i32 %tmp19, 8
+  %sunkaddr522 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr523 = add i64 %sunkaddr522, 36
+  %sunkaddr524 = inttoptr i64 %sunkaddr523 to i32*
+  store i32 %add97, i32* %sunkaddr524, align 4
+  %incdec.ptr100 = getelementptr inbounds i8, i8* %tmp22, i64 1
+  store i8* %incdec.ptr100, i8** %tmp20, align 8
+  %sunkaddr525 = ptrtoint %struct.bz_stream* %.pre427 to i64
+  %sunkaddr526 = add i64 %sunkaddr525, 8
+  %sunkaddr527 = inttoptr i64 %sunkaddr526 to i32*
+  store i32 %lsr.iv480, i32* %sunkaddr527, align 4
+  %sunkaddr528 = ptrtoint %struct.bz_stream* %.pre427 to i64
+  %sunkaddr529 = add i64 %sunkaddr528, 12
+  %sunkaddr530 = inttoptr i64 %sunkaddr529 to i32*
+  %tmp24 = load i32, i32* %sunkaddr530, align 4
+  %inc106 = add i32 %tmp24, 1
+  store i32 %inc106, i32* %sunkaddr530, align 4
+  %cmp109 = icmp eq i32 %inc106, 0
+  br i1 %cmp109, label %if.then.111, label %while.body.68.backedge
+
+if.then.111:                                      ; preds = %if.end.88
+  %sunkaddr531 = ptrtoint %struct.bz_stream* %.pre427 to i64
+  %sunkaddr532 = add i64 %sunkaddr531, 16
+  %sunkaddr533 = inttoptr i64 %sunkaddr532 to i32*
+  %tmp25 = load i32, i32* %sunkaddr533, align 4
+  %inc114 = add i32 %tmp25, 1
+  store i32 %inc114, i32* %sunkaddr533, align 4
+  br label %while.body.68.backedge
+
+while.body.68.backedge:                           ; preds = %if.then.111, %if.end.88
+  %lsr.iv.next481 = add i32 %lsr.iv480, -1
+  %cmp70 = icmp sgt i32 %add97, 7
+  br i1 %cmp70, label %if.then.72, label %if.end.82
+
+if.then.120:                                      ; preds = %if.then.72
+  %call = tail call i32 (%struct.__sFILE*, i8*, ...) @fprintf(%struct.__sFILE* getelementptr inbounds ([0 x %struct.__sFILE], [0 x %struct.__sFILE]* @__sF, i64 0, i64 2), i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str, i64 0, i64 0), i32 %inc117)
+  br label %sw.bb.123
+
+sw.bb.123:                                        ; preds = %if.then.120, %if.then.72, %if.end.sw.bb.123_crit_edge
+  %bsLive127.pre-phi = phi i32* [ %.pre433, %if.end.sw.bb.123_crit_edge ], [ %bsLive69.pre-phi, %if.then.72 ], [ %bsLive69.pre-phi, %if.then.120 ]
+  %sunkaddr534 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr535 = add i64 %sunkaddr534, 8
+  %sunkaddr536 = inttoptr i64 %sunkaddr535 to i32*
+  store i32 25, i32* %sunkaddr536, align 4
+  %tmp26 = load i32, i32* %bsLive127.pre-phi, align 4
+  %cmp128.395 = icmp sgt i32 %tmp26, 7
+  br i1 %cmp128.395, label %sw.bb.123.if.then.130_crit_edge, label %if.end.140.lr.ph
+
+sw.bb.123.if.then.130_crit_edge:                  ; preds = %sw.bb.123
+  %sunkaddr537 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr538 = add i64 %sunkaddr537, 32
+  %sunkaddr539 = inttoptr i64 %sunkaddr538 to i32*
+  %.pre429 = load i32, i32* %sunkaddr539, align 4
+  br label %if.then.130
+
+if.end.140.lr.ph:                                 ; preds = %sw.bb.123
+  %tmp27 = bitcast %struct.DState* %s to %struct.bz_stream**
+  %.pre428 = load %struct.bz_stream*, %struct.bz_stream** %tmp27, align 8
+  %avail_in142.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre428, i64 0, i32 1
+  %.pre432 = load i32, i32* %avail_in142.phi.trans.insert, align 4
+  %tmp28 = add i32 %.pre432, -1
+  br label %if.end.140
+
+if.then.130:                                      ; preds = %while.body.126.backedge, %sw.bb.123.if.then.130_crit_edge
+  %tmp29 = phi i32 [ %.pre429, %sw.bb.123.if.then.130_crit_edge ], [ %or152, %while.body.126.backedge ]
+  %.lcssa = phi i32 [ %tmp26, %sw.bb.123.if.then.130_crit_edge ], [ %add155, %while.body.126.backedge ]
+  %sub134 = add nsw i32 %.lcssa, -8
+  %shr135 = lshr i32 %tmp29, %sub134
+  store i32 %sub134, i32* %bsLive127.pre-phi, align 4
+  %origPtr = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 13
+  %tmp30 = load i32, i32* %origPtr, align 4
+  %shl175 = shl i32 %tmp30, 8
+  %conv176 = and i32 %shr135, 255
+  %or177 = or i32 %shl175, %conv176
+  store i32 %or177, i32* %origPtr, align 4
+  %nInUse = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 27
+  %tmp31 = load i32, i32* %nInUse, align 4
+  %add179 = add nsw i32 %tmp31, 2
+  br label %save_state_and_return
+
+if.end.140:                                       ; preds = %while.body.126.backedge, %if.end.140.lr.ph
+  %lsr.iv = phi i32 [ %tmp28, %if.end.140.lr.ph ], [ %lsr.iv.next, %while.body.126.backedge ]
+  %tmp32 = phi i32 [ %tmp26, %if.end.140.lr.ph ], [ %add155, %while.body.126.backedge ]
+  %cmp143 = icmp eq i32 %lsr.iv, -1
+  br i1 %cmp143, label %save_state_and_return, label %if.end.146
+
+if.end.146:                                       ; preds = %if.end.140
+  %tmp33 = bitcast %struct.bz_stream* %.pre428 to i8**
+  %sunkaddr541 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr542 = add i64 %sunkaddr541, 32
+  %sunkaddr543 = inttoptr i64 %sunkaddr542 to i32*
+  %tmp34 = load i32, i32* %sunkaddr543, align 4
+  %shl148 = shl i32 %tmp34, 8
+  %tmp35 = load i8*, i8** %tmp33, align 8
+  %tmp36 = load i8, i8* %tmp35, align 1
+  %conv151 = zext i8 %tmp36 to i32
+  %or152 = or i32 %conv151, %shl148
+  store i32 %or152, i32* %sunkaddr543, align 4
+  %add155 = add nsw i32 %tmp32, 8
+  store i32 %add155, i32* %bsLive127.pre-phi, align 4
+  %incdec.ptr158 = getelementptr inbounds i8, i8* %tmp35, i64 1
+  store i8* %incdec.ptr158, i8** %tmp33, align 8
+  %sunkaddr544 = ptrtoint %struct.bz_stream* %.pre428 to i64
+  %sunkaddr545 = add i64 %sunkaddr544, 8
+  %sunkaddr546 = inttoptr i64 %sunkaddr545 to i32*
+  store i32 %lsr.iv, i32* %sunkaddr546, align 4
+  %sunkaddr547 = ptrtoint %struct.bz_stream* %.pre428 to i64
+  %sunkaddr548 = add i64 %sunkaddr547, 12
+  %sunkaddr549 = inttoptr i64 %sunkaddr548 to i32*
+  %tmp37 = load i32, i32* %sunkaddr549, align 4
+  %inc164 = add i32 %tmp37, 1
+  store i32 %inc164, i32* %sunkaddr549, align 4
+  %cmp167 = icmp eq i32 %inc164, 0
+  br i1 %cmp167, label %if.then.169, label %while.body.126.backedge
+
+if.then.169:                                      ; preds = %if.end.146
+  %sunkaddr550 = ptrtoint %struct.bz_stream* %.pre428 to i64
+  %sunkaddr551 = add i64 %sunkaddr550, 16
+  %sunkaddr552 = inttoptr i64 %sunkaddr551 to i32*
+  %tmp38 = load i32, i32* %sunkaddr552, align 4
+  %inc172 = add i32 %tmp38, 1
+  store i32 %inc172, i32* %sunkaddr552, align 4
+  br label %while.body.126.backedge
+
+while.body.126.backedge:                          ; preds = %if.then.169, %if.end.146
+  %lsr.iv.next = add i32 %lsr.iv, -1
+  %cmp128 = icmp sgt i32 %add155, 7
+  br i1 %cmp128, label %if.then.130, label %if.end.140
+
+sw.default:                                       ; preds = %if.end, %if.end.thread
+  %tmp39 = phi i32 [ 0, %if.end.thread ], [ %.pre, %if.end ]
+  %tmp40 = phi i32 [ 0, %if.end.thread ], [ %.pre406, %if.end ]
+  %tmp41 = phi i32 [ 0, %if.end.thread ], [ %.pre407, %if.end ]
+  %tmp42 = phi i32 [ 0, %if.end.thread ], [ %.pre408, %if.end ]
+  %tmp43 = phi i32 [ 0, %if.end.thread ], [ %.pre409, %if.end ]
+  %tmp44 = phi i32 [ 0, %if.end.thread ], [ %.pre410, %if.end ]
+  %tmp45 = phi i32 [ 0, %if.end.thread ], [ %.pre411, %if.end ]
+  %tmp46 = phi i32 [ 0, %if.end.thread ], [ %.pre412, %if.end ]
+  %tmp47 = phi i32 [ 0, %if.end.thread ], [ %.pre413, %if.end ]
+  %tmp48 = phi i32 [ 0, %if.end.thread ], [ %.pre414, %if.end ]
+  %tmp49 = phi i32 [ 0, %if.end.thread ], [ %.pre415, %if.end ]
+  %tmp50 = phi i32 [ 0, %if.end.thread ], [ %.pre416, %if.end ]
+  %tmp51 = phi i32 [ 0, %if.end.thread ], [ %.pre417, %if.end ]
+  %tmp52 = phi i32 [ 0, %if.end.thread ], [ %.pre418, %if.end ]
+  %tmp53 = phi i32 [ 0, %if.end.thread ], [ %.pre419, %if.end ]
+  %tmp54 = phi i32 [ 0, %if.end.thread ], [ %.pre420, %if.end ]
+  %tmp55 = phi i32 [ 0, %if.end.thread ], [ %.pre421, %if.end ]
+  %tmp56 = phi i32 [ 0, %if.end.thread ], [ %.pre422, %if.end ]
+  %tmp57 = phi i32 [ 0, %if.end.thread ], [ %.pre423, %if.end ]
+  %save_j3.pre-phi469 = phi i32* [ %save_j, %if.end.thread ], [ %save_j3.phi.trans.insert, %if.end ]
+  %save_t4.pre-phi467 = phi i32* [ %save_t, %if.end.thread ], [ %save_t4.phi.trans.insert, %if.end ]
+  %save_alphaSize5.pre-phi465 = phi i32* [ %save_alphaSize, %if.end.thread ], [ %save_alphaSize5.phi.trans.insert, %if.end ]
+  %save_nGroups6.pre-phi463 = phi i32* [ %save_nGroups, %if.end.thread ], [ %save_nGroups6.phi.trans.insert, %if.end ]
+  %save_nSelectors7.pre-phi461 = phi i32* [ %save_nSelectors, %if.end.thread ], [ %save_nSelectors7.phi.trans.insert, %if.end ]
+  %save_EOB8.pre-phi459 = phi i32* [ %save_EOB, %if.end.thread ], [ %save_EOB8.phi.trans.insert, %if.end ]
+  %save_groupNo9.pre-phi457 = phi i32* [ %save_groupNo, %if.end.thread ], [ %save_groupNo9.phi.trans.insert, %if.end ]
+  %save_groupPos10.pre-phi455 = phi i32* [ %save_groupPos, %if.end.thread ], [ %save_groupPos10.phi.trans.insert, %if.end ]
+  %save_nextSym11.pre-phi453 = phi i32* [ %save_nextSym, %if.end.thread ], [ %save_nextSym11.phi.trans.insert, %if.end ]
+  %save_nblockMAX12.pre-phi451 = phi i32* [ %save_nblockMAX, %if.end.thread ], [ %save_nblockMAX12.phi.trans.insert, %if.end ]
+  %save_nblock13.pre-phi449 = phi i32* [ %save_nblock, %if.end.thread ], [ %save_nblock13.phi.trans.insert, %if.end ]
+  %save_es14.pre-phi447 = phi i32* [ %save_es, %if.end.thread ], [ %save_es14.phi.trans.insert, %if.end ]
+  %save_N15.pre-phi445 = phi i32* [ %save_N, %if.end.thread ], [ %save_N15.phi.trans.insert, %if.end ]
+  %save_curr16.pre-phi443 = phi i32* [ %save_curr, %if.end.thread ], [ %save_curr16.phi.trans.insert, %if.end ]
+  %save_zt17.pre-phi441 = phi i32* [ %save_zt, %if.end.thread ], [ %save_zt17.phi.trans.insert, %if.end ]
+  %save_zn18.pre-phi439 = phi i32* [ %save_zn, %if.end.thread ], [ %save_zn18.phi.trans.insert, %if.end ]
+  %save_zvec19.pre-phi437 = phi i32* [ %save_zvec, %if.end.thread ], [ %save_zvec19.phi.trans.insert, %if.end ]
+  %save_zj20.pre-phi435 = phi i32* [ %save_zj, %if.end.thread ], [ %save_zj20.phi.trans.insert, %if.end ]
+  tail call void @bar(i32 4001)
+  br label %save_state_and_return
+
+save_state_and_return:                            ; preds = %sw.default, %if.end.140, %if.then.130, %if.end.82, %if.end.33, %if.then.29
+  %tmp58 = phi i32 [ %tmp39, %sw.default ], [ %.pre, %if.then.29 ], [ %.pre, %if.then.130 ], [ %.pre, %if.end.140 ], [ %.pre, %if.end.82 ], [ %.pre, %if.end.33 ]
+  %tmp59 = phi i32 [ %tmp40, %sw.default ], [ %.pre406, %if.then.29 ], [ %.pre406, %if.then.130 ], [ %.pre406, %if.end.140 ], [ %.pre406, %if.end.82 ], [ %.pre406, %if.end.33 ]
+  %tmp60 = phi i32 [ %tmp41, %sw.default ], [ %.pre407, %if.then.29 ], [ %.pre407, %if.then.130 ], [ %.pre407, %if.end.140 ], [ %.pre407, %if.end.82 ], [ %.pre407, %if.end.33 ]
+  %tmp61 = phi i32 [ %tmp43, %sw.default ], [ %.pre409, %if.then.29 ], [ %.pre409, %if.then.130 ], [ %.pre409, %if.end.140 ], [ %.pre409, %if.end.82 ], [ %.pre409, %if.end.33 ]
+  %tmp62 = phi i32 [ %tmp44, %sw.default ], [ %.pre410, %if.then.29 ], [ %.pre410, %if.then.130 ], [ %.pre410, %if.end.140 ], [ %.pre410, %if.end.82 ], [ %.pre410, %if.end.33 ]
+  %tmp63 = phi i32 [ %tmp45, %sw.default ], [ %.pre411, %if.then.29 ], [ %.pre411, %if.then.130 ], [ %.pre411, %if.end.140 ], [ %.pre411, %if.end.82 ], [ %.pre411, %if.end.33 ]
+  %tmp64 = phi i32 [ %tmp46, %sw.default ], [ %.pre412, %if.then.29 ], [ %.pre412, %if.then.130 ], [ %.pre412, %if.end.140 ], [ %.pre412, %if.end.82 ], [ %.pre412, %if.end.33 ]
+  %tmp65 = phi i32 [ %tmp47, %sw.default ], [ %.pre413, %if.then.29 ], [ %.pre413, %if.then.130 ], [ %.pre413, %if.end.140 ], [ %.pre413, %if.end.82 ], [ %.pre413, %if.end.33 ]
+  %tmp66 = phi i32 [ %tmp48, %sw.default ], [ %.pre414, %if.then.29 ], [ %.pre414, %if.then.130 ], [ %.pre414, %if.end.140 ], [ %.pre414, %if.end.82 ], [ %.pre414, %if.end.33 ]
+  %tmp67 = phi i32 [ %tmp49, %sw.default ], [ %.pre415, %if.then.29 ], [ %.pre415, %if.then.130 ], [ %.pre415, %if.end.140 ], [ %.pre415, %if.end.82 ], [ %.pre415, %if.end.33 ]
+  %tmp68 = phi i32 [ %tmp51, %sw.default ], [ %.pre417, %if.then.29 ], [ %.pre417, %if.then.130 ], [ %.pre417, %if.end.140 ], [ %.pre417, %if.end.82 ], [ %.pre417, %if.end.33 ]
+  %tmp69 = phi i32 [ %tmp52, %sw.default ], [ %.pre418, %if.then.29 ], [ %.pre418, %if.then.130 ], [ %.pre418, %if.end.140 ], [ %.pre418, %if.end.82 ], [ %.pre418, %if.end.33 ]
+  %tmp70 = phi i32 [ %tmp53, %sw.default ], [ %.pre419, %if.then.29 ], [ %.pre419, %if.then.130 ], [ %.pre419, %if.end.140 ], [ %.pre419, %if.end.82 ], [ %.pre419, %if.end.33 ]
+  %tmp71 = phi i32 [ %tmp54, %sw.default ], [ %.pre420, %if.then.29 ], [ %.pre420, %if.then.130 ], [ %.pre420, %if.end.140 ], [ %.pre420, %if.end.82 ], [ %.pre420, %if.end.33 ]
+  %tmp72 = phi i32 [ %tmp55, %sw.default ], [ %.pre421, %if.then.29 ], [ %.pre421, %if.then.130 ], [ %.pre421, %if.end.140 ], [ %.pre421, %if.end.82 ], [ %.pre421, %if.end.33 ]
+  %tmp73 = phi i32 [ %tmp56, %sw.default ], [ %.pre422, %if.then.29 ], [ %.pre422, %if.then.130 ], [ %.pre422, %if.end.140 ], [ %.pre422, %if.end.82 ], [ %.pre422, %if.end.33 ]
+  %tmp74 = phi i32 [ %tmp57, %sw.default ], [ %.pre423, %if.then.29 ], [ %.pre423, %if.then.130 ], [ %.pre423, %if.end.140 ], [ %.pre423, %if.end.82 ], [ %.pre423, %if.end.33 ]
+  %save_j3.pre-phi468 = phi i32* [ %save_j3.pre-phi469, %sw.default ], [ %save_j3.phi.trans.insert, %if.then.29 ], [ %save_j3.phi.trans.insert, %if.then.130 ], [ %save_j3.phi.trans.insert, %if.end.140 ], [ %save_j3.phi.trans.insert, %if.end.82 ], [ %save_j3.phi.trans.insert, %if.end.33 ]
+  %save_t4.pre-phi466 = phi i32* [ %save_t4.pre-phi467, %sw.default ], [ %save_t4.phi.trans.insert, %if.then.29 ], [ %save_t4.phi.trans.insert, %if.then.130 ], [ %save_t4.phi.trans.insert, %if.end.140 ], [ %save_t4.phi.trans.insert, %if.end.82 ], [ %save_t4.phi.trans.insert, %if.end.33 ]
+  %save_alphaSize5.pre-phi464 = phi i32* [ %save_alphaSize5.pre-phi465, %sw.default ], [ %save_alphaSize5.phi.trans.insert, %if.then.29 ], [ %save_alphaSize5.phi.trans.insert, %if.then.130 ], [ %save_alphaSize5.phi.trans.insert, %if.end.140 ], [ %save_alphaSize5.phi.trans.insert, %if.end.82 ], [ %save_alphaSize5.phi.trans.insert, %if.end.33 ]
+  %save_nGroups6.pre-phi462 = phi i32* [ %save_nGroups6.pre-phi463, %sw.default ], [ %save_nGroups6.phi.trans.insert, %if.then.29 ], [ %save_nGroups6.phi.trans.insert, %if.then.130 ], [ %save_nGroups6.phi.trans.insert, %if.end.140 ], [ %save_nGroups6.phi.trans.insert, %if.end.82 ], [ %save_nGroups6.phi.trans.insert, %if.end.33 ]
+  %save_nSelectors7.pre-phi460 = phi i32* [ %save_nSelectors7.pre-phi461, %sw.default ], [ %save_nSelectors7.phi.trans.insert, %if.then.29 ], [ %save_nSelectors7.phi.trans.insert, %if.then.130 ], [ %save_nSelectors7.phi.trans.insert, %if.end.140 ], [ %save_nSelectors7.phi.trans.insert, %if.end.82 ], [ %save_nSelectors7.phi.trans.insert, %if.end.33 ]
+  %save_EOB8.pre-phi458 = phi i32* [ %save_EOB8.pre-phi459, %sw.default ], [ %save_EOB8.phi.trans.insert, %if.then.29 ], [ %save_EOB8.phi.trans.insert, %if.then.130 ], [ %save_EOB8.phi.trans.insert, %if.end.140 ], [ %save_EOB8.phi.trans.insert, %if.end.82 ], [ %save_EOB8.phi.trans.insert, %if.end.33 ]
+  %save_groupNo9.pre-phi456 = phi i32* [ %save_groupNo9.pre-phi457, %sw.default ], [ %save_groupNo9.phi.trans.insert, %if.then.29 ], [ %save_groupNo9.phi.trans.insert, %if.then.130 ], [ %save_groupNo9.phi.trans.insert, %if.end.140 ], [ %save_groupNo9.phi.trans.insert, %if.end.82 ], [ %save_groupNo9.phi.trans.insert, %if.end.33 ]
+  %save_groupPos10.pre-phi454 = phi i32* [ %save_groupPos10.pre-phi455, %sw.default ], [ %save_groupPos10.phi.trans.insert, %if.then.29 ], [ %save_groupPos10.phi.trans.insert, %if.then.130 ], [ %save_groupPos10.phi.trans.insert, %if.end.140 ], [ %save_groupPos10.phi.trans.insert, %if.end.82 ], [ %save_groupPos10.phi.trans.insert, %if.end.33 ]
+  %save_nextSym11.pre-phi452 = phi i32* [ %save_nextSym11.pre-phi453, %sw.default ], [ %save_nextSym11.phi.trans.insert, %if.then.29 ], [ %save_nextSym11.phi.trans.insert, %if.then.130 ], [ %save_nextSym11.phi.trans.insert, %if.end.140 ], [ %save_nextSym11.phi.trans.insert, %if.end.82 ], [ %save_nextSym11.phi.trans.insert, %if.end.33 ]
+  %save_nblockMAX12.pre-phi450 = phi i32* [ %save_nblockMAX12.pre-phi451, %sw.default ], [ %save_nblockMAX12.phi.trans.insert, %if.then.29 ], [ %save_nblockMAX12.phi.trans.insert, %if.then.130 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.140 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.82 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.33 ]
+  %save_nblock13.pre-phi448 = phi i32* [ %save_nblock13.pre-phi449, %sw.default ], [ %save_nblock13.phi.trans.insert, %if.then.29 ], [ %save_nblock13.phi.trans.insert, %if.then.130 ], [ %save_nblock13.phi.trans.insert, %if.end.140 ], [ %save_nblock13.phi.trans.insert, %if.end.82 ], [ %save_nblock13.phi.trans.insert, %if.end.33 ]
+  %save_es14.pre-phi446 = phi i32* [ %save_es14.pre-phi447, %sw.default ], [ %save_es14.phi.trans.insert, %if.then.29 ], [ %save_es14.phi.trans.insert, %if.then.130 ], [ %save_es14.phi.trans.insert, %if.end.140 ], [ %save_es14.phi.trans.insert, %if.end.82 ], [ %save_es14.phi.trans.insert, %if.end.33 ]
+  %save_N15.pre-phi444 = phi i32* [ %save_N15.pre-phi445, %sw.default ], [ %save_N15.phi.trans.insert, %if.then.29 ], [ %save_N15.phi.trans.insert, %if.then.130 ], [ %save_N15.phi.trans.insert, %if.end.140 ], [ %save_N15.phi.trans.insert, %if.end.82 ], [ %save_N15.phi.trans.insert, %if.end.33 ]
+  %save_curr16.pre-phi442 = phi i32* [ %save_curr16.pre-phi443, %sw.default ], [ %save_curr16.phi.trans.insert, %if.then.29 ], [ %save_curr16.phi.trans.insert, %if.then.130 ], [ %save_curr16.phi.trans.insert, %if.end.140 ], [ %save_curr16.phi.trans.insert, %if.end.82 ], [ %save_curr16.phi.trans.insert, %if.end.33 ]
+  %save_zt17.pre-phi440 = phi i32* [ %save_zt17.pre-phi441, %sw.default ], [ %save_zt17.phi.trans.insert, %if.then.29 ], [ %save_zt17.phi.trans.insert, %if.then.130 ], [ %save_zt17.phi.trans.insert, %if.end.140 ], [ %save_zt17.phi.trans.insert, %if.end.82 ], [ %save_zt17.phi.trans.insert, %if.end.33 ]
+  %save_zn18.pre-phi438 = phi i32* [ %save_zn18.pre-phi439, %sw.default ], [ %save_zn18.phi.trans.insert, %if.then.29 ], [ %save_zn18.phi.trans.insert, %if.then.130 ], [ %save_zn18.phi.trans.insert, %if.end.140 ], [ %save_zn18.phi.trans.insert, %if.end.82 ], [ %save_zn18.phi.trans.insert, %if.end.33 ]
+  %save_zvec19.pre-phi436 = phi i32* [ %save_zvec19.pre-phi437, %sw.default ], [ %save_zvec19.phi.trans.insert, %if.then.29 ], [ %save_zvec19.phi.trans.insert, %if.then.130 ], [ %save_zvec19.phi.trans.insert, %if.end.140 ], [ %save_zvec19.phi.trans.insert, %if.end.82 ], [ %save_zvec19.phi.trans.insert, %if.end.33 ]
+  %save_zj20.pre-phi434 = phi i32* [ %save_zj20.pre-phi435, %sw.default ], [ %save_zj20.phi.trans.insert, %if.then.29 ], [ %save_zj20.phi.trans.insert, %if.then.130 ], [ %save_zj20.phi.trans.insert, %if.end.140 ], [ %save_zj20.phi.trans.insert, %if.end.82 ], [ %save_zj20.phi.trans.insert, %if.end.33 ]
+  %nblock.1 = phi i32 [ %tmp50, %sw.default ], [ %.pre416, %if.then.29 ], [ 0, %if.then.130 ], [ %.pre416, %if.end.140 ], [ %.pre416, %if.end.82 ], [ %.pre416, %if.end.33 ]
+  %alphaSize.1 = phi i32 [ %tmp42, %sw.default ], [ %.pre408, %if.then.29 ], [ %add179, %if.then.130 ], [ %.pre408, %if.end.140 ], [ %.pre408, %if.end.82 ], [ %.pre408, %if.end.33 ]
+  %retVal.0 = phi i32 [ 0, %sw.default ], [ -5, %if.then.29 ], [ -4, %if.then.130 ], [ 0, %if.end.140 ], [ 0, %if.end.82 ], [ 0, %if.end.33 ]
+  store i32 %tmp58, i32* %save_i, align 4
+  store i32 %tmp59, i32* %save_j3.pre-phi468, align 4
+  store i32 %tmp60, i32* %save_t4.pre-phi466, align 4
+  store i32 %alphaSize.1, i32* %save_alphaSize5.pre-phi464, align 4
+  store i32 %tmp61, i32* %save_nGroups6.pre-phi462, align 4
+  store i32 %tmp62, i32* %save_nSelectors7.pre-phi460, align 4
+  store i32 %tmp63, i32* %save_EOB8.pre-phi458, align 4
+  store i32 %tmp64, i32* %save_groupNo9.pre-phi456, align 4
+  store i32 %tmp65, i32* %save_groupPos10.pre-phi454, align 4
+  store i32 %tmp66, i32* %save_nextSym11.pre-phi452, align 4
+  store i32 %tmp67, i32* %save_nblockMAX12.pre-phi450, align 4
+  store i32 %nblock.1, i32* %save_nblock13.pre-phi448, align 4
+  store i32 %tmp68, i32* %save_es14.pre-phi446, align 4
+  store i32 %tmp69, i32* %save_N15.pre-phi444, align 4
+  store i32 %tmp70, i32* %save_curr16.pre-phi442, align 4
+  store i32 %tmp71, i32* %save_zt17.pre-phi440, align 4
+  store i32 %tmp72, i32* %save_zn18.pre-phi438, align 4
+  store i32 %tmp73, i32* %save_zvec19.pre-phi436, align 4
+  store i32 %tmp74, i32* %save_zj20.pre-phi434, align 4
+  ret i32 %retVal.0
+}
+
+!0 = !{!"branch_weights", i32 10, i32 1}

Modified: llvm/trunk/test/CodeGen/X86/fp128-compare.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp128-compare.ll?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fp128-compare.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fp128-compare.ll Fri Apr  8 10:17:43 2016
@@ -86,8 +86,8 @@ entry:
   %cond = select i1 %cmp, fp128 %x, fp128 %y
   ret fp128 %cond
 ; CHECK-LABEL: TestMax:
-; CHECK: movaps %xmm0
 ; CHECK: movaps %xmm1
+; CHECK: movaps %xmm0
 ; CHECK: callq __gttf2
 ; CHECK: movaps {{.*}}, %xmm0
 ; CHECK: testl %eax, %eax

Removed: llvm/trunk/test/CodeGen/X86/hoist-spill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/hoist-spill.ll?rev=265789&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/hoist-spill.ll (original)
+++ llvm/trunk/test/CodeGen/X86/hoist-spill.ll (removed)
@@ -1,121 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-
-; grep 'Spill' |sed 's%.*\(-[0-9]\+(\%rsp)\).*%\1%g' |sort |uniq -d |awk '{if (/rsp/); exit -1}'
-; Check no spills to the same stack slot after hoisting.
-; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET1:-?[0-9]*]](%rsp)
-; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET2:-?[0-9]*]](%rsp)
-; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET3:-?[0-9]*]](%rsp)
-; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET1]](%rsp)
-; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET2]](%rsp)
-; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET3]](%rsp)
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
- at a = external global i32*, align 8
- at b = external global i32, align 4
- at d = external global i32*, align 8
-
-; Function Attrs: norecurse noreturn nounwind uwtable
-define void @fn1(i32 %p1) {
-entry:
-  %tmp = load i32*, i32** @d, align 8
-  %tmp1 = load i32*, i32** @a, align 8
-  %tmp2 = sext i32 %p1 to i64
-  br label %for.cond
-
-for.cond:                                         ; preds = %for.inc14, %entry
-  %indvar = phi i32 [ %indvar.next, %for.inc14 ], [ 0, %entry ]
-  %indvars.iv30.in = phi i32 [ %indvars.iv30, %for.inc14 ], [ %p1, %entry ]
-  %c.0 = phi i32 [ %inc15, %for.inc14 ], [ 1, %entry ]
-  %k.0 = phi i32 [ %k.1.lcssa, %for.inc14 ], [ undef, %entry ]
-  %tmp3 = icmp sgt i32 undef, 0
-  %smax52 = select i1 %tmp3, i32 undef, i32 0
-  %tmp4 = zext i32 %smax52 to i64
-  %tmp5 = icmp sgt i64 undef, %tmp4
-  %smax53 = select i1 %tmp5, i64 undef, i64 %tmp4
-  %tmp6 = add nsw i64 %smax53, 1
-  %tmp7 = sub nsw i64 %tmp6, %tmp4
-  %tmp8 = add nsw i64 %tmp7, -8
-  %tmp9 = sub i32 undef, %indvar
-  %tmp10 = icmp sgt i64 %tmp2, 0
-  %smax40 = select i1 %tmp10, i64 %tmp2, i64 0
-  %scevgep41 = getelementptr i32, i32* %tmp1, i64 %smax40
-  %indvars.iv30 = add i32 %indvars.iv30.in, -1
-  %tmp11 = icmp sgt i32 %indvars.iv30, 0
-  %smax = select i1 %tmp11, i32 %indvars.iv30, i32 0
-  %tmp12 = zext i32 %smax to i64
-  %sub = sub nsw i32 %p1, %c.0
-  %cmp = icmp sgt i32 %sub, 0
-  %sub. = select i1 %cmp, i32 %sub, i32 0
-  %cmp326 = icmp sgt i32 %k.0, %p1
-  br i1 %cmp326, label %for.cond4.preheader, label %for.body.preheader
-
-for.body.preheader:                               ; preds = %for.cond
-  br label %for.body
-
-for.cond4.preheader:                              ; preds = %for.body, %for.cond
-  %k.1.lcssa = phi i32 [ %k.0, %for.cond ], [ %add, %for.body ]
-  %cmp528 = icmp sgt i32 %sub., %p1
-  br i1 %cmp528, label %for.inc14, label %for.body6.preheader
-
-for.body6.preheader:                              ; preds = %for.cond4.preheader
-  br i1 undef, label %for.body6, label %min.iters.checked
-
-min.iters.checked:                                ; preds = %for.body6.preheader
-  br i1 undef, label %for.body6, label %vector.memcheck
-
-vector.memcheck:                                  ; preds = %min.iters.checked
-  %bound1 = icmp ule i32* undef, %scevgep41
-  %memcheck.conflict = and i1 undef, %bound1
-  br i1 %memcheck.conflict, label %for.body6, label %vector.body.preheader
-
-vector.body.preheader:                            ; preds = %vector.memcheck
-  %lcmp.mod = icmp eq i64 undef, 0
-  br i1 %lcmp.mod, label %vector.body.preheader.split, label %vector.body.prol
-
-vector.body.prol:                                 ; preds = %vector.body.prol, %vector.body.preheader
-  %prol.iter.cmp = icmp eq i64 undef, 0
-  br i1 %prol.iter.cmp, label %vector.body.preheader.split, label %vector.body.prol
-
-vector.body.preheader.split:                      ; preds = %vector.body.prol, %vector.body.preheader
-  %tmp13 = icmp ult i64 %tmp8, 24
-  br i1 %tmp13, label %middle.block, label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.body.preheader.split
-  %index = phi i64 [ %index.next.3, %vector.body ], [ 0, %vector.body.preheader.split ]
-  %index.next = add i64 %index, 8
-  %offset.idx.1 = add i64 %tmp12, %index.next
-  %tmp14 = getelementptr inbounds i32, i32* %tmp, i64 %offset.idx.1
-  %tmp15 = bitcast i32* %tmp14 to <4 x i32>*
-  %wide.load.1 = load <4 x i32>, <4 x i32>* %tmp15, align 4
-  %tmp16 = getelementptr inbounds i32, i32* %tmp1, i64 %offset.idx.1
-  %tmp17 = bitcast i32* %tmp16 to <4 x i32>*
-  store <4 x i32> %wide.load.1, <4 x i32>* %tmp17, align 4
-  %index.next.3 = add i64 %index, 32
-  br i1 undef, label %middle.block, label %vector.body
-
-middle.block:                                     ; preds = %vector.body, %vector.body.preheader.split
-  br i1 undef, label %for.inc14, label %for.body6
-
-for.body:                                         ; preds = %for.body, %for.body.preheader
-  %k.127 = phi i32 [ %k.0, %for.body.preheader ], [ %add, %for.body ]
-  %add = add nsw i32 %k.127, 1
-  %tmp18 = load i32, i32* undef, align 4
-  store i32 %tmp18, i32* @b, align 4
-  br i1 undef, label %for.body, label %for.cond4.preheader
-
-for.body6:                                        ; preds = %for.body6, %middle.block, %vector.memcheck, %min.iters.checked, %for.body6.preheader
-  %indvars.iv32 = phi i64 [ undef, %for.body6 ], [ %tmp12, %vector.memcheck ], [ %tmp12, %min.iters.checked ], [ %tmp12, %for.body6.preheader ], [ undef, %middle.block ]
-  %arrayidx8 = getelementptr inbounds i32, i32* %tmp, i64 %indvars.iv32
-  %tmp19 = load i32, i32* %arrayidx8, align 4
-  %arrayidx10 = getelementptr inbounds i32, i32* %tmp1, i64 %indvars.iv32
-  store i32 %tmp19, i32* %arrayidx10, align 4
-  %cmp5 = icmp slt i64 %indvars.iv32, undef
-  br i1 %cmp5, label %for.body6, label %for.inc14
-
-for.inc14:                                        ; preds = %for.body6, %middle.block, %for.cond4.preheader
-  %inc15 = add nuw nsw i32 %c.0, 1
-  %indvar.next = add i32 %indvar, 1
-  br label %for.cond
-}

Removed: llvm/trunk/test/CodeGen/X86/new-remat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/new-remat.ll?rev=265789&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/new-remat.ll (original)
+++ llvm/trunk/test/CodeGen/X86/new-remat.ll (removed)
@@ -1,70 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-; Check all spills are rematerialized.
-; CHECK-NOT: Spill
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
- at b = common global double 0.000000e+00, align 8
- at a = common global i32 0, align 4
-
-; Function Attrs: nounwind uwtable
-define i32 @uniform_testdata(i32 %p1) {
-entry:
-  %cmp3 = icmp sgt i32 %p1, 0
-  br i1 %cmp3, label %for.body.preheader, label %for.end
-
-for.body.preheader:                               ; preds = %entry
-  %tmp = add i32 %p1, -1
-  %xtraiter = and i32 %p1, 7
-  %lcmp.mod = icmp eq i32 %xtraiter, 0
-  br i1 %lcmp.mod, label %for.body.preheader.split, label %for.body.prol.preheader
-
-for.body.prol.preheader:                          ; preds = %for.body.preheader
-  br label %for.body.prol
-
-for.body.prol:                                    ; preds = %for.body.prol, %for.body.prol.preheader
-  %i.04.prol = phi i32 [ %inc.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ]
-  %prol.iter = phi i32 [ %prol.iter.sub, %for.body.prol ], [ %xtraiter, %for.body.prol.preheader ]
-  %tmp1 = load double, double* @b, align 8
-  %call.prol = tail call double @pow(double %tmp1, double 2.500000e-01)
-  %inc.prol = add nuw nsw i32 %i.04.prol, 1
-  %prol.iter.sub = add i32 %prol.iter, -1
-  %prol.iter.cmp = icmp eq i32 %prol.iter.sub, 0
-  br i1 %prol.iter.cmp, label %for.body.preheader.split.loopexit, label %for.body.prol
-
-for.body.preheader.split.loopexit:                ; preds = %for.body.prol
-  %inc.prol.lcssa = phi i32 [ %inc.prol, %for.body.prol ]
-  br label %for.body.preheader.split
-
-for.body.preheader.split:                         ; preds = %for.body.preheader.split.loopexit, %for.body.preheader
-  %i.04.unr = phi i32 [ 0, %for.body.preheader ], [ %inc.prol.lcssa, %for.body.preheader.split.loopexit ]
-  %tmp2 = icmp ult i32 %tmp, 7
-  br i1 %tmp2, label %for.end.loopexit, label %for.body.preheader.split.split
-
-for.body.preheader.split.split:                   ; preds = %for.body.preheader.split
-  br label %for.body
-
-for.body:                                         ; preds = %for.body, %for.body.preheader.split.split
-  %i.04 = phi i32 [ %i.04.unr, %for.body.preheader.split.split ], [ %inc.7, %for.body ]
-  %tmp3 = load double, double* @b, align 8
-  %call = tail call double @pow(double %tmp3, double 2.500000e-01)
-  %tmp4 = load double, double* @b, align 8
-  %call.1 = tail call double @pow(double %tmp4, double 2.500000e-01)
-  %inc.7 = add nsw i32 %i.04, 8
-  %exitcond.7 = icmp eq i32 %inc.7, %p1
-  br i1 %exitcond.7, label %for.end.loopexit.unr-lcssa, label %for.body
-
-for.end.loopexit.unr-lcssa:                       ; preds = %for.body
-  br label %for.end.loopexit
-
-for.end.loopexit:                                 ; preds = %for.end.loopexit.unr-lcssa, %for.body.preheader.split
-  br label %for.end
-
-for.end:                                          ; preds = %for.end.loopexit, %entry
-  %tmp5 = load i32, i32* @a, align 4
-  ret i32 %tmp5
-}
-
-; Function Attrs: nounwind
-declare double @pow(double, double)

Modified: llvm/trunk/test/CodeGen/X86/ragreedy-hoist-spill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/ragreedy-hoist-spill.ll?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/ragreedy-hoist-spill.ll (original)
+++ llvm/trunk/test/CodeGen/X86/ragreedy-hoist-spill.ll Fri Apr  8 10:17:43 2016
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-macosx -regalloc=greedy | FileCheck %s
 
 ; This testing case is reduced from 254.gap SyFgets function.
-; We make sure a spill is hoisted to a cold BB inside the hotter outer loop.
+; We make sure a spill is not hoisted to a hotter outer loop.
 
 %struct.TMP.1 = type { %struct.TMP.2*, %struct.TMP.2*, [1024 x i8] }
 %struct.TMP.2 = type { i8*, i32, i32, i16, i16, %struct.TMP.3, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.TMP.3, %struct.TMP.4*, i32, [3 x i8], [1 x i8], %struct.TMP.3, i32, i64 }
@@ -181,10 +181,6 @@ sw.bb474:
   br i1 %cmp476, label %if.end517, label %do.body479.preheader
 
 do.body479.preheader:
-  ; CHECK: do.body479.preheader
-  ; spill is hoisted here. Although loop depth1 is even hotter than loop depth2, do.body479.preheader is cold.
-  ; CHECK: movq %r{{.*}}, {{[0-9]+}}(%rsp)
-  ; CHECK: land.rhs485
   %cmp4833314 = icmp eq i8 undef, 0
   br i1 %cmp4833314, label %if.end517, label %land.rhs485
 
@@ -204,8 +200,8 @@ land.lhs.true490:
 
 lor.rhs500:
   ; CHECK: lor.rhs500
-  ; Make sure spill is hoisted to a cold preheader in outside loop.
-  ; CHECK-NOT: movq %r{{.*}}, {{[0-9]+}}(%rsp)
+  ; Make sure that we don't hoist the spill to outer loops.
+  ; CHECK: movq %r{{.*}}, {{[0-9]+}}(%rsp)
   ; CHECK: callq {{.*}}maskrune
   %call3.i.i2792 = call i32 @__maskrune(i32 undef, i64 256)
   br i1 undef, label %land.lhs.true504, label %do.body479.backedge




More information about the llvm-commits mailing list