[llvm] r265790 - Revert r265547 "Recommit r265309 after fixed an invalid memory reference bug happened"
Hans Wennborg via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 8 08:17:44 PDT 2016
Author: hans
Date: Fri Apr 8 10:17:43 2016
New Revision: 265790
URL: http://llvm.org/viewvc/llvm-project?rev=265790&view=rev
Log:
Revert r265547 "Recommit r265309 after fixed an invalid memory reference bug happened"
It caused PR27275: "ARM: Bad machine code: Using an undefined physical register"
Also reverting the following commits that were landed on top:
r265610 "Fix the compare-clang diff error introduced by r265547."
r265639 "Fix the sanitizer bootstrap error in r265547."
r265657 "InlineSpiller.cpp: Escap \@ in r265547. [-Wdocumentation]"
Added:
llvm/trunk/test/CodeGen/AArch64/aarch64-deferred-spilling.ll
Removed:
llvm/trunk/test/CodeGen/X86/hoist-spill.ll
llvm/trunk/test/CodeGen/X86/new-remat.ll
Modified:
llvm/trunk/include/llvm/CodeGen/LiveRangeEdit.h
llvm/trunk/lib/CodeGen/InlineSpiller.cpp
llvm/trunk/lib/CodeGen/LiveRangeEdit.cpp
llvm/trunk/lib/CodeGen/RegAllocBase.cpp
llvm/trunk/lib/CodeGen/RegAllocBase.h
llvm/trunk/lib/CodeGen/RegAllocBasic.cpp
llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp
llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp
llvm/trunk/lib/CodeGen/Spiller.h
llvm/trunk/lib/CodeGen/SplitKit.cpp
llvm/trunk/lib/CodeGen/SplitKit.h
llvm/trunk/test/CodeGen/X86/fp128-compare.ll
llvm/trunk/test/CodeGen/X86/ragreedy-hoist-spill.ll
Modified: llvm/trunk/include/llvm/CodeGen/LiveRangeEdit.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/LiveRangeEdit.h?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/LiveRangeEdit.h (original)
+++ llvm/trunk/include/llvm/CodeGen/LiveRangeEdit.h Fri Apr 8 10:17:43 2016
@@ -72,10 +72,6 @@ private:
/// ScannedRemattable - true when remattable values have been identified.
bool ScannedRemattable;
- /// DeadRemats - The saved instructions which have already been dead after
- /// rematerialization but not deleted yet -- to be done in postOptimization.
- SmallPtrSet<MachineInstr *, 32> *DeadRemats;
-
/// Remattable - Values defined by remattable instructions as identified by
/// tii.isTriviallyReMaterializable().
SmallPtrSet<const VNInfo*,4> Remattable;
@@ -120,16 +116,13 @@ public:
/// @param vrm Map of virtual registers to physical registers for this
/// function. If NULL, no virtual register map updates will
/// be done. This could be the case if called before Regalloc.
- /// @param deadRemats The collection of all the instructions defining an
- /// original reg and are dead after remat.
LiveRangeEdit(LiveInterval *parent, SmallVectorImpl<unsigned> &newRegs,
MachineFunction &MF, LiveIntervals &lis, VirtRegMap *vrm,
- Delegate *delegate = nullptr,
- SmallPtrSet<MachineInstr *, 32> *deadRemats = nullptr)
+ Delegate *delegate = nullptr)
: Parent(parent), NewRegs(newRegs), MRI(MF.getRegInfo()), LIS(lis),
- VRM(vrm), TII(*MF.getSubtarget().getInstrInfo()), TheDelegate(delegate),
- FirstNew(newRegs.size()), ScannedRemattable(false),
- DeadRemats(deadRemats) {
+ VRM(vrm), TII(*MF.getSubtarget().getInstrInfo()),
+ TheDelegate(delegate), FirstNew(newRegs.size()),
+ ScannedRemattable(false) {
MRI.setDelegate(this);
}
@@ -149,16 +142,6 @@ public:
bool empty() const { return size() == 0; }
unsigned get(unsigned idx) const { return NewRegs[idx+FirstNew]; }
- /// pop_back - It allows LiveRangeEdit users to drop new registers.
- /// The context is when an original def instruction of a register is
- /// dead after rematerialization, we still want to keep it for following
- /// rematerializations. We save the def instruction in DeadRemats,
- /// and replace the original dst register with a new dummy register so
- /// the live range of original dst register can be shrinked normally.
- /// We don't want to allocate phys register for the dummy register, so
- /// we want to drop it from the NewRegs set.
- void pop_back() { NewRegs.pop_back(); }
-
ArrayRef<unsigned> regs() const {
return makeArrayRef(NewRegs).slice(FirstNew);
}
@@ -192,15 +175,15 @@ public:
/// Remat - Information needed to rematerialize at a specific location.
struct Remat {
VNInfo *ParentVNI; // parent_'s value at the remat location.
- MachineInstr *OrigMI; // Instruction defining OrigVNI. It contains the
- // real expr for remat.
+ MachineInstr *OrigMI; // Instruction defining ParentVNI.
explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(nullptr) {}
};
/// canRematerializeAt - Determine if ParentVNI can be rematerialized at
/// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI.
/// When cheapAsAMove is set, only cheap remats are allowed.
- bool canRematerializeAt(Remat &RM, VNInfo *OrigVNI, SlotIndex UseIdx,
+ bool canRematerializeAt(Remat &RM,
+ SlotIndex UseIdx,
bool cheapAsAMove);
/// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an
@@ -225,12 +208,6 @@ public:
return Rematted.count(ParentVNI);
}
- void markDeadRemat(MachineInstr *inst) {
- // DeadRemats is an optional field.
- if (DeadRemats)
- DeadRemats->insert(inst);
- }
-
/// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try
/// to erase it from LIS.
void eraseVirtReg(unsigned Reg);
@@ -241,11 +218,8 @@ public:
/// RegsBeingSpilled lists registers currently being spilled by the register
/// allocator. These registers should not be split into new intervals
/// as currently those new intervals are not guaranteed to spill.
- /// NoSplit indicates this func is used after the iterations of selectOrSplit
- /// where registers should not be split into new intervals.
- void eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
- ArrayRef<unsigned> RegsBeingSpilled = None,
- bool NoSplit = false);
+ void eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
+ ArrayRef<unsigned> RegsBeingSpilled = None);
/// calculateRegClassAndHint - Recompute register class and hint for each new
/// register.
Modified: llvm/trunk/lib/CodeGen/InlineSpiller.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/InlineSpiller.cpp?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/InlineSpiller.cpp (original)
+++ llvm/trunk/lib/CodeGen/InlineSpiller.cpp Fri Apr 8 10:17:43 2016
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "Spiller.h"
-#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/TinyPtrVector.h"
@@ -49,77 +48,13 @@ STATISTIC(NumReloadsRemoved, "Number of
STATISTIC(NumFolded, "Number of folded stack accesses");
STATISTIC(NumFoldedLoads, "Number of folded loads");
STATISTIC(NumRemats, "Number of rematerialized defs for spilling");
+STATISTIC(NumOmitReloadSpill, "Number of omitted spills of reloads");
+STATISTIC(NumHoists, "Number of hoisted spills");
static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden,
cl::desc("Disable inline spill hoisting"));
namespace {
-class HoistSpillHelper {
- LiveIntervals &LIS;
- LiveStacks &LSS;
- AliasAnalysis *AA;
- MachineDominatorTree &MDT;
- MachineLoopInfo &Loops;
- VirtRegMap &VRM;
- MachineFrameInfo &MFI;
- MachineRegisterInfo &MRI;
- const TargetInstrInfo &TII;
- const TargetRegisterInfo &TRI;
- const MachineBlockFrequencyInfo &MBFI;
-
- // Map from StackSlot to its original register.
- DenseMap<int, unsigned> StackSlotToReg;
- // Map from pair of (StackSlot and Original VNI) to a set of spills which
- // have the same stackslot and have equal values defined by Original VNI.
- // These spills are mergeable and are hoist candiates.
- typedef MapVector<std::pair<int, VNInfo *>, SmallPtrSet<MachineInstr *, 16>>
- MergeableSpillsMap;
- MergeableSpillsMap MergeableSpills;
-
- /// This is the map from original register to a set containing all its
- /// siblings. To hoist a spill to another BB, we need to find out a live
- /// sibling there and use it as the source of the new spill.
- DenseMap<unsigned, SmallSetVector<unsigned, 16>> Virt2SiblingsMap;
-
- bool isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI, MachineBasicBlock &BB,
- unsigned &LiveReg);
-
- void rmRedundantSpills(
- SmallPtrSet<MachineInstr *, 16> &Spills,
- SmallVectorImpl<MachineInstr *> &SpillsToRm,
- DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill);
-
- void getVisitOrders(
- MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills,
- SmallVectorImpl<MachineDomTreeNode *> &Orders,
- SmallVectorImpl<MachineInstr *> &SpillsToRm,
- DenseMap<MachineDomTreeNode *, unsigned> &SpillsToKeep,
- DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill);
-
- void runHoistSpills(unsigned OrigReg, VNInfo &OrigVNI,
- SmallPtrSet<MachineInstr *, 16> &Spills,
- SmallVectorImpl<MachineInstr *> &SpillsToRm,
- DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns);
-
-public:
- HoistSpillHelper(MachineFunctionPass &pass, MachineFunction &mf,
- VirtRegMap &vrm)
- : LIS(pass.getAnalysis<LiveIntervals>()),
- LSS(pass.getAnalysis<LiveStacks>()),
- AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
- MDT(pass.getAnalysis<MachineDominatorTree>()),
- Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
- MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()),
- TII(*mf.getSubtarget().getInstrInfo()),
- TRI(*mf.getSubtarget().getRegisterInfo()),
- MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {}
-
- void addToMergeableSpills(MachineInstr *Spill, int StackSlot,
- unsigned Original);
- bool rmFromMergeableSpills(MachineInstr *Spill, int StackSlot);
- void hoistAllSpills(LiveRangeEdit &Edit);
-};
-
class InlineSpiller : public Spiller {
MachineFunction &MF;
LiveIntervals &LIS;
@@ -150,12 +85,56 @@ class InlineSpiller : public Spiller {
// Values that failed to remat at some point.
SmallPtrSet<VNInfo*, 8> UsedValues;
+public:
+ // Information about a value that was defined by a copy from a sibling
+ // register.
+ struct SibValueInfo {
+ // True when all reaching defs were reloads: No spill is necessary.
+ bool AllDefsAreReloads;
+
+ // True when value is defined by an original PHI not from splitting.
+ bool DefByOrigPHI;
+
+ // True when the COPY defining this value killed its source.
+ bool KillsSource;
+
+ // The preferred register to spill.
+ unsigned SpillReg;
+
+ // The value of SpillReg that should be spilled.
+ VNInfo *SpillVNI;
+
+ // The block where SpillVNI should be spilled. Currently, this must be the
+ // block containing SpillVNI->def.
+ MachineBasicBlock *SpillMBB;
+
+ // A defining instruction that is not a sibling copy or a reload, or NULL.
+ // This can be used as a template for rematerialization.
+ MachineInstr *DefMI;
+
+ // List of values that depend on this one. These values are actually the
+ // same, but live range splitting has placed them in different registers,
+ // or SSA update needed to insert PHI-defs to preserve SSA form. This is
+ // copies of the current value and phi-kills. Usually only phi-kills cause
+ // more than one dependent value.
+ TinyPtrVector<VNInfo*> Deps;
+
+ SibValueInfo(unsigned Reg, VNInfo *VNI)
+ : AllDefsAreReloads(true), DefByOrigPHI(false), KillsSource(false),
+ SpillReg(Reg), SpillVNI(VNI), SpillMBB(nullptr), DefMI(nullptr) {}
+
+ // Returns true when a def has been found.
+ bool hasDef() const { return DefByOrigPHI || DefMI; }
+ };
+
+private:
+ // Values in RegsToSpill defined by sibling copies.
+ typedef DenseMap<VNInfo*, SibValueInfo> SibValueMap;
+ SibValueMap SibValues;
+
// Dead defs generated during spilling.
SmallVector<MachineInstr*, 8> DeadDefs;
- // Object records spills information and does the hoisting.
- HoistSpillHelper HSpiller;
-
~InlineSpiller() override {}
public:
@@ -168,11 +147,9 @@ public:
MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()),
TII(*mf.getSubtarget().getInstrInfo()),
TRI(*mf.getSubtarget().getRegisterInfo()),
- MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
- HSpiller(pass, mf, vrm) {}
+ MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {}
void spill(LiveRangeEdit &) override;
- void postOptimization() override;
private:
bool isSnippet(const LiveInterval &SnipLI);
@@ -184,7 +161,11 @@ private:
}
bool isSibling(unsigned Reg);
- bool hoistSpillInsideBB(LiveInterval &SpillLI, MachineInstr &CopyMI);
+ MachineInstr *traceSiblingValue(unsigned, VNInfo*, VNInfo*);
+ void propagateSiblingValue(SibValueMap::iterator, VNInfo *VNI = nullptr);
+ void analyzeSiblingValues();
+
+ bool hoistSpill(LiveInterval &SpillLI, MachineInstr &CopyMI);
void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI);
void markValueUsed(LiveInterval*, VNInfo*);
@@ -316,45 +297,417 @@ void InlineSpiller::collectRegsToSpill()
}
}
+
+//===----------------------------------------------------------------------===//
+// Sibling Values
+//===----------------------------------------------------------------------===//
+
+// After live range splitting, some values to be spilled may be defined by
+// copies from sibling registers. We trace the sibling copies back to the
+// original value if it still exists. We need it for rematerialization.
+//
+// Even when the value can't be rematerialized, we still want to determine if
+// the value has already been spilled, or we may want to hoist the spill from a
+// loop.
+
bool InlineSpiller::isSibling(unsigned Reg) {
return TargetRegisterInfo::isVirtualRegister(Reg) &&
VRM.getOriginal(Reg) == Original;
}
-/// It is beneficial to spill to earlier place in the same BB in case
-/// as follows:
-/// There is an alternative def earlier in the same MBB.
-/// Hoist the spill as far as possible in SpillMBB. This can ease
-/// register pressure:
+#ifndef NDEBUG
+static raw_ostream &operator<<(raw_ostream &OS,
+ const InlineSpiller::SibValueInfo &SVI) {
+ OS << "spill " << PrintReg(SVI.SpillReg) << ':'
+ << SVI.SpillVNI->id << '@' << SVI.SpillVNI->def;
+ if (SVI.SpillMBB)
+ OS << " in BB#" << SVI.SpillMBB->getNumber();
+ if (SVI.AllDefsAreReloads)
+ OS << " all-reloads";
+ if (SVI.DefByOrigPHI)
+ OS << " orig-phi";
+ if (SVI.KillsSource)
+ OS << " kill";
+ OS << " deps[";
+ for (VNInfo *Dep : SVI.Deps)
+ OS << ' ' << Dep->id << '@' << Dep->def;
+ OS << " ]";
+ if (SVI.DefMI)
+ OS << " def: " << *SVI.DefMI;
+ else
+ OS << '\n';
+ return OS;
+}
+#endif
+
+/// propagateSiblingValue - Propagate the value in SVI to dependents if it is
+/// known. Otherwise remember the dependency for later.
///
-/// x = def
-/// y = use x
-/// s = copy x
+/// @param SVIIter SibValues entry to propagate.
+/// @param VNI Dependent value, or NULL to propagate to all saved dependents.
+void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVIIter,
+ VNInfo *VNI) {
+ SibValueMap::value_type *SVI = &*SVIIter;
+
+ // When VNI is non-NULL, add it to SVI's deps, and only propagate to that.
+ TinyPtrVector<VNInfo*> FirstDeps;
+ if (VNI) {
+ FirstDeps.push_back(VNI);
+ SVI->second.Deps.push_back(VNI);
+ }
+
+ // Has the value been completely determined yet? If not, defer propagation.
+ if (!SVI->second.hasDef())
+ return;
+
+ // Work list of values to propagate.
+ SmallSetVector<SibValueMap::value_type *, 8> WorkList;
+ WorkList.insert(SVI);
+
+ do {
+ SVI = WorkList.pop_back_val();
+ TinyPtrVector<VNInfo*> *Deps = VNI ? &FirstDeps : &SVI->second.Deps;
+ VNI = nullptr;
+
+ SibValueInfo &SV = SVI->second;
+ if (!SV.SpillMBB)
+ SV.SpillMBB = LIS.getMBBFromIndex(SV.SpillVNI->def);
+
+ DEBUG(dbgs() << " prop to " << Deps->size() << ": "
+ << SVI->first->id << '@' << SVI->first->def << ":\t" << SV);
+
+ assert(SV.hasDef() && "Propagating undefined value");
+
+ // Should this value be propagated as a preferred spill candidate? We don't
+ // propagate values of registers that are about to spill.
+ bool PropSpill = !DisableHoisting && !isRegToSpill(SV.SpillReg);
+ unsigned SpillDepth = ~0u;
+
+ for (VNInfo *Dep : *Deps) {
+ SibValueMap::iterator DepSVI = SibValues.find(Dep);
+ assert(DepSVI != SibValues.end() && "Dependent value not in SibValues");
+ SibValueInfo &DepSV = DepSVI->second;
+ if (!DepSV.SpillMBB)
+ DepSV.SpillMBB = LIS.getMBBFromIndex(DepSV.SpillVNI->def);
+
+ bool Changed = false;
+
+ // Propagate defining instruction.
+ if (!DepSV.hasDef()) {
+ Changed = true;
+ DepSV.DefMI = SV.DefMI;
+ DepSV.DefByOrigPHI = SV.DefByOrigPHI;
+ }
+
+ // Propagate AllDefsAreReloads. For PHI values, this computes an AND of
+ // all predecessors.
+ if (!SV.AllDefsAreReloads && DepSV.AllDefsAreReloads) {
+ Changed = true;
+ DepSV.AllDefsAreReloads = false;
+ }
+
+ // Propagate best spill value.
+ if (PropSpill && SV.SpillVNI != DepSV.SpillVNI) {
+ if (SV.SpillMBB == DepSV.SpillMBB) {
+ // DepSV is in the same block. Hoist when dominated.
+ if (DepSV.KillsSource && SV.SpillVNI->def < DepSV.SpillVNI->def) {
+ // This is an alternative def earlier in the same MBB.
+ // Hoist the spill as far as possible in SpillMBB. This can ease
+ // register pressure:
+ //
+ // x = def
+ // y = use x
+ // s = copy x
+ //
+ // Hoisting the spill of s to immediately after the def removes the
+ // interference between x and y:
+ //
+ // x = def
+ // spill x
+ // y = use x<kill>
+ //
+ // This hoist only helps when the DepSV copy kills its source.
+ Changed = true;
+ DepSV.SpillReg = SV.SpillReg;
+ DepSV.SpillVNI = SV.SpillVNI;
+ DepSV.SpillMBB = SV.SpillMBB;
+ }
+ } else {
+ // DepSV is in a different block.
+ if (SpillDepth == ~0u)
+ SpillDepth = Loops.getLoopDepth(SV.SpillMBB);
+
+ // Also hoist spills to blocks with smaller loop depth, but make sure
+ // that the new value dominates. Non-phi dependents are always
+ // dominated, phis need checking.
+
+ const BranchProbability MarginProb(4, 5); // 80%
+ // Hoist a spill to outer loop if there are multiple dependents (it
+ // can be beneficial if more than one dependents are hoisted) or
+ // if DepSV (the hoisting source) is hotter than SV (the hoisting
+ // destination) (we add a 80% margin to bias a little towards
+ // loop depth).
+ bool HoistCondition =
+ (MBFI.getBlockFreq(DepSV.SpillMBB) >=
+ (MBFI.getBlockFreq(SV.SpillMBB) * MarginProb)) ||
+ Deps->size() > 1;
+
+ if ((Loops.getLoopDepth(DepSV.SpillMBB) > SpillDepth) &&
+ HoistCondition &&
+ (!DepSVI->first->isPHIDef() ||
+ MDT.dominates(SV.SpillMBB, DepSV.SpillMBB))) {
+ Changed = true;
+ DepSV.SpillReg = SV.SpillReg;
+ DepSV.SpillVNI = SV.SpillVNI;
+ DepSV.SpillMBB = SV.SpillMBB;
+ }
+ }
+ }
+
+ if (!Changed)
+ continue;
+
+ // Something changed in DepSVI. Propagate to dependents.
+ WorkList.insert(&*DepSVI);
+
+ DEBUG(dbgs() << " update " << DepSVI->first->id << '@'
+ << DepSVI->first->def << " to:\t" << DepSV);
+ }
+ } while (!WorkList.empty());
+}
+
+/// traceSiblingValue - Trace a value that is about to be spilled back to the
+/// real defining instructions by looking through sibling copies. Always stay
+/// within the range of OrigVNI so the registers are known to carry the same
+/// value.
///
-/// Hoisting the spill of s to immediately after the def removes the
-/// interference between x and y:
+/// Determine if the value is defined by all reloads, so spilling isn't
+/// necessary - the value is already in the stack slot.
///
-/// x = def
-/// spill x
-/// y = use x<kill>
+/// Return a defining instruction that may be a candidate for rematerialization.
///
-/// This hoist only helps when the copy kills its source.
+MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
+ VNInfo *OrigVNI) {
+ // Check if a cached value already exists.
+ SibValueMap::iterator SVI;
+ bool Inserted;
+ std::tie(SVI, Inserted) =
+ SibValues.insert(std::make_pair(UseVNI, SibValueInfo(UseReg, UseVNI)));
+ if (!Inserted) {
+ DEBUG(dbgs() << "Cached value " << PrintReg(UseReg) << ':'
+ << UseVNI->id << '@' << UseVNI->def << ' ' << SVI->second);
+ return SVI->second.DefMI;
+ }
+
+ DEBUG(dbgs() << "Tracing value " << PrintReg(UseReg) << ':'
+ << UseVNI->id << '@' << UseVNI->def << '\n');
+
+ // List of (Reg, VNI) that have been inserted into SibValues, but need to be
+ // processed.
+ SmallVector<std::pair<unsigned, VNInfo*>, 8> WorkList;
+ WorkList.push_back(std::make_pair(UseReg, UseVNI));
+
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ do {
+ unsigned Reg;
+ VNInfo *VNI;
+ std::tie(Reg, VNI) = WorkList.pop_back_val();
+ DEBUG(dbgs() << " " << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def
+ << ":\t");
+
+ // First check if this value has already been computed.
+ SVI = SibValues.find(VNI);
+ assert(SVI != SibValues.end() && "Missing SibValues entry");
+
+ // Trace through PHI-defs created by live range splitting.
+ if (VNI->isPHIDef()) {
+ // Stop at original PHIs. We don't know the value at the
+ // predecessors. Look up the VNInfo for the current definition
+ // in OrigLI, to properly determine whether or not this phi was
+ // added by splitting.
+ if (VNI->def == OrigLI.getVNInfoAt(VNI->def)->def) {
+ DEBUG(dbgs() << "orig phi value\n");
+ SVI->second.DefByOrigPHI = true;
+ SVI->second.AllDefsAreReloads = false;
+ propagateSiblingValue(SVI);
+ continue;
+ }
+
+ // This is a PHI inserted by live range splitting. We could trace the
+ // live-out value from predecessor blocks, but that search can be very
+ // expensive if there are many predecessors and many more PHIs as
+ // generated by tail-dup when it sees an indirectbr. Instead, look at
+ // all the non-PHI defs that have the same value as OrigVNI. They must
+ // jointly dominate VNI->def. This is not optimal since VNI may actually
+ // be jointly dominated by a smaller subset of defs, so there is a change
+ // we will miss a AllDefsAreReloads optimization.
+
+ // Separate all values dominated by OrigVNI into PHIs and non-PHIs.
+ SmallVector<VNInfo*, 8> PHIs, NonPHIs;
+ LiveInterval &LI = LIS.getInterval(Reg);
+
+ for (LiveInterval::vni_iterator VI = LI.vni_begin(), VE = LI.vni_end();
+ VI != VE; ++VI) {
+ VNInfo *VNI2 = *VI;
+ if (VNI2->isUnused())
+ continue;
+ if (!OrigLI.containsOneValue() &&
+ OrigLI.getVNInfoAt(VNI2->def) != OrigVNI)
+ continue;
+ if (VNI2->isPHIDef() && VNI2->def != OrigVNI->def)
+ PHIs.push_back(VNI2);
+ else
+ NonPHIs.push_back(VNI2);
+ }
+ DEBUG(dbgs() << "split phi value, checking " << PHIs.size()
+ << " phi-defs, and " << NonPHIs.size()
+ << " non-phi/orig defs\n");
+
+ // Create entries for all the PHIs. Don't add them to the worklist, we
+ // are processing all of them in one go here.
+ for (VNInfo *PHI : PHIs)
+ SibValues.insert(std::make_pair(PHI, SibValueInfo(Reg, PHI)));
+
+ // Add every PHI as a dependent of all the non-PHIs.
+ for (VNInfo *NonPHI : NonPHIs) {
+ // Known value? Try an insertion.
+ std::tie(SVI, Inserted) =
+ SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI)));
+ // Add all the PHIs as dependents of NonPHI.
+ SVI->second.Deps.insert(SVI->second.Deps.end(), PHIs.begin(),
+ PHIs.end());
+ // This is the first time we see NonPHI, add it to the worklist.
+ if (Inserted)
+ WorkList.push_back(std::make_pair(Reg, NonPHI));
+ else
+ // Propagate to all inserted PHIs, not just VNI.
+ propagateSiblingValue(SVI);
+ }
+
+ // Next work list item.
+ continue;
+ }
+
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ assert(MI && "Missing def");
+
+ // Trace through sibling copies.
+ if (unsigned SrcReg = isFullCopyOf(MI, Reg)) {
+ if (isSibling(SrcReg)) {
+ LiveInterval &SrcLI = LIS.getInterval(SrcReg);
+ LiveQueryResult SrcQ = SrcLI.Query(VNI->def);
+ assert(SrcQ.valueIn() && "Copy from non-existing value");
+ // Check if this COPY kills its source.
+ SVI->second.KillsSource = SrcQ.isKill();
+ VNInfo *SrcVNI = SrcQ.valueIn();
+ DEBUG(dbgs() << "copy of " << PrintReg(SrcReg) << ':'
+ << SrcVNI->id << '@' << SrcVNI->def
+ << " kill=" << unsigned(SVI->second.KillsSource) << '\n');
+ // Known sibling source value? Try an insertion.
+ std::tie(SVI, Inserted) = SibValues.insert(
+ std::make_pair(SrcVNI, SibValueInfo(SrcReg, SrcVNI)));
+ // This is the first time we see Src, add it to the worklist.
+ if (Inserted)
+ WorkList.push_back(std::make_pair(SrcReg, SrcVNI));
+ propagateSiblingValue(SVI, VNI);
+ // Next work list item.
+ continue;
+ }
+ }
+
+ // Track reachable reloads.
+ SVI->second.DefMI = MI;
+ SVI->second.SpillMBB = MI->getParent();
+ int FI;
+ if (Reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) {
+ DEBUG(dbgs() << "reload\n");
+ propagateSiblingValue(SVI);
+ // Next work list item.
+ continue;
+ }
+
+ // Potential remat candidate.
+ DEBUG(dbgs() << "def " << *MI);
+ SVI->second.AllDefsAreReloads = false;
+ propagateSiblingValue(SVI);
+ } while (!WorkList.empty());
+
+ // Look up the value we were looking for. We already did this lookup at the
+ // top of the function, but SibValues may have been invalidated.
+ SVI = SibValues.find(UseVNI);
+ assert(SVI != SibValues.end() && "Didn't compute requested info");
+ DEBUG(dbgs() << " traced to:\t" << SVI->second);
+ return SVI->second.DefMI;
+}
+
+/// analyzeSiblingValues - Trace values defined by sibling copies back to
+/// something that isn't a sibling copy.
///
-bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,
- MachineInstr &CopyMI) {
+/// Keep track of values that may be rematerializable.
+void InlineSpiller::analyzeSiblingValues() {
+ SibValues.clear();
+
+ // No siblings at all?
+ if (Edit->getReg() == Original)
+ return;
+
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ for (unsigned Reg : RegsToSpill) {
+ LiveInterval &LI = LIS.getInterval(Reg);
+ for (LiveInterval::const_vni_iterator VI = LI.vni_begin(),
+ VE = LI.vni_end(); VI != VE; ++VI) {
+ VNInfo *VNI = *VI;
+ if (VNI->isUnused())
+ continue;
+ MachineInstr *DefMI = nullptr;
+ if (!VNI->isPHIDef()) {
+ DefMI = LIS.getInstructionFromIndex(VNI->def);
+ assert(DefMI && "No defining instruction");
+ }
+ // Check possible sibling copies.
+ if (VNI->isPHIDef() || DefMI->isCopy()) {
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
+ assert(OrigVNI && "Def outside original live range");
+ if (OrigVNI->def != VNI->def)
+ DefMI = traceSiblingValue(Reg, VNI, OrigVNI);
+ }
+ if (DefMI && Edit->checkRematerializable(VNI, DefMI, AA)) {
+ DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@'
+ << VNI->def << " may remat from " << *DefMI);
+ }
+ }
+ }
+}
+
+/// hoistSpill - Given a sibling copy that defines a value to be spilled, insert
+/// a spill at a better location.
+bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr &CopyMI) {
SlotIndex Idx = LIS.getInstructionIndex(CopyMI);
-#ifndef NDEBUG
VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot());
assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy");
-#endif
+ SibValueMap::iterator I = SibValues.find(VNI);
+ if (I == SibValues.end())
+ return false;
+
+ const SibValueInfo &SVI = I->second;
+
+ // Let the normal folding code deal with the boring case.
+ if (!SVI.AllDefsAreReloads && SVI.SpillVNI == VNI)
+ return false;
- unsigned SrcReg = CopyMI.getOperand(1).getReg();
- LiveInterval &SrcLI = LIS.getInterval(SrcReg);
- VNInfo *SrcVNI = SrcLI.getVNInfoAt(Idx);
- LiveQueryResult SrcQ = SrcLI.Query(Idx);
- MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(SrcVNI->def);
- if (DefMBB != CopyMI.getParent() || !SrcQ.isKill())
+ // SpillReg may have been deleted by remat and DCE.
+ if (!LIS.hasInterval(SVI.SpillReg)) {
+ DEBUG(dbgs() << "Stale interval: " << PrintReg(SVI.SpillReg) << '\n');
+ SibValues.erase(I);
return false;
+ }
+
+ LiveInterval &SibLI = LIS.getInterval(SVI.SpillReg);
+ if (!SibLI.containsValue(SVI.SpillVNI)) {
+ DEBUG(dbgs() << "Stale value: " << PrintReg(SVI.SpillReg) << '\n');
+ SibValues.erase(I);
+ return false;
+ }
// Conservatively extend the stack slot range to the range of the original
// value. We may be able to do better with stack slot coloring by being more
@@ -366,29 +719,35 @@ bool InlineSpiller::hoistSpillInsideBB(L
DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": "
<< *StackInt << '\n');
- // We are going to spill SrcVNI immediately after its def, so clear out
+ // Already spilled everywhere.
+ if (SVI.AllDefsAreReloads) {
+ DEBUG(dbgs() << "\tno spill needed: " << SVI);
+ ++NumOmitReloadSpill;
+ return true;
+ }
+ // We are going to spill SVI.SpillVNI immediately after its def, so clear out
// any later spills of the same value.
- eliminateRedundantSpills(SrcLI, SrcVNI);
+ eliminateRedundantSpills(SibLI, SVI.SpillVNI);
- MachineBasicBlock *MBB = LIS.getMBBFromIndex(SrcVNI->def);
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(SVI.SpillVNI->def);
MachineBasicBlock::iterator MII;
- if (SrcVNI->isPHIDef())
+ if (SVI.SpillVNI->isPHIDef())
MII = MBB->SkipPHIsAndLabels(MBB->begin());
else {
- MachineInstr *DefMI = LIS.getInstructionFromIndex(SrcVNI->def);
+ MachineInstr *DefMI = LIS.getInstructionFromIndex(SVI.SpillVNI->def);
assert(DefMI && "Defining instruction disappeared");
MII = DefMI;
++MII;
}
// Insert spill without kill flag immediately after def.
- TII.storeRegToStackSlot(*MBB, MII, SrcReg, false, StackSlot,
- MRI.getRegClass(SrcReg), &TRI);
+ TII.storeRegToStackSlot(*MBB, MII, SVI.SpillReg, false, StackSlot,
+ MRI.getRegClass(SVI.SpillReg), &TRI);
--MII; // Point to store instruction.
LIS.InsertMachineInstrInMaps(*MII);
- DEBUG(dbgs() << "\thoisted: " << SrcVNI->def << '\t' << *MII);
+ DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII);
- HSpiller.addToMergeableSpills(&(*MII), StackSlot, Original);
++NumSpills;
+ ++NumHoists;
return true;
}
@@ -446,8 +805,7 @@ void InlineSpiller::eliminateRedundantSp
MI->setDesc(TII.get(TargetOpcode::KILL));
DeadDefs.push_back(MI);
++NumSpillsRemoved;
- if (HSpiller.rmFromMergeableSpills(MI, StackSlot))
- --NumSpills;
+ --NumSpills;
}
}
} while (!WorkList.empty());
@@ -518,12 +876,12 @@ bool InlineSpiller::reMaterializeFor(Liv
if (SnippetCopies.count(&MI))
return false;
- LiveInterval &OrigLI = LIS.getInterval(Original);
- VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
+ // Use an OrigVNI from traceSiblingValue when ParentVNI is a sibling copy.
LiveRangeEdit::Remat RM(ParentVNI);
- RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
-
- if (!Edit->canRematerializeAt(RM, OrigVNI, UseIdx, false)) {
+ SibValueMap::const_iterator SibI = SibValues.find(ParentVNI);
+ if (SibI != SibValues.end())
+ RM.OrigMI = SibI->second.DefMI;
+ if (!Edit->canRematerializeAt(RM, UseIdx, false)) {
markValueUsed(&VirtReg, ParentVNI);
DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI);
return false;
@@ -573,6 +931,7 @@ bool InlineSpiller::reMaterializeFor(Liv
/// reMaterializeAll - Try to rematerialize as many uses as possible,
/// and trim the live ranges after.
void InlineSpiller::reMaterializeAll() {
+ // analyzeSiblingValues has already tested all relevant defining instructions.
if (!Edit->anyRematerializable(AA))
return;
@@ -658,9 +1017,6 @@ bool InlineSpiller::coalesceStackAccess(
if (InstrReg != Reg || FI != StackSlot)
return false;
- if (!IsLoad)
- HSpiller.rmFromMergeableSpills(MI, StackSlot);
-
DEBUG(dbgs() << "Coalescing stack access: " << *MI);
LIS.RemoveMachineInstrFromMaps(*MI);
MI->eraseFromParent();
@@ -785,9 +1141,6 @@ foldMemoryOperand(ArrayRef<std::pair<Mac
LIS.removePhysRegDefAt(Reg, Idx);
}
- int FI;
- if (TII.isStoreToStackSlot(MI, FI) && HSpiller.rmFromMergeableSpills(MI, FI))
- --NumSpills;
LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI);
MI->eraseFromParent();
@@ -813,10 +1166,9 @@ foldMemoryOperand(ArrayRef<std::pair<Mac
if (!WasCopy)
++NumFolded;
- else if (Ops.front().second == 0) {
+ else if (Ops.front().second == 0)
++NumSpills;
- HSpiller.addToMergeableSpills(FoldMI, StackSlot, Original);
- } else
+ else
++NumReloads;
return true;
}
@@ -851,7 +1203,6 @@ void InlineSpiller::insertSpill(unsigned
DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS,
"spill"));
++NumSpills;
- HSpiller.addToMergeableSpills(std::next(MI), StackSlot, Original);
}
/// spillAroundUses - insert spill code around each use of Reg.
@@ -915,7 +1266,8 @@ void InlineSpiller::spillAroundUses(unsi
continue;
}
if (RI.Writes) {
- if (hoistSpillInsideBB(OldLI, *MI)) {
+ // Hoist the spill of a sib-reg copy.
+ if (hoistSpill(OldLI, *MI)) {
// This COPY is now dead, the value is already in the stack slot.
MI->getOperand(0).setIsDead();
DeadDefs.push_back(MI);
@@ -1028,6 +1380,7 @@ void InlineSpiller::spill(LiveRangeEdit
assert(DeadDefs.empty() && "Previous spill didn't remove dead defs");
collectRegsToSpill();
+ analyzeSiblingValues();
reMaterializeAll();
// Remat may handle everything.
@@ -1036,400 +1389,3 @@ void InlineSpiller::spill(LiveRangeEdit
Edit->calculateRegClassAndHint(MF, Loops, MBFI);
}
-
-/// Optimizations after all the reg selections and spills are done.
-///
-void InlineSpiller::postOptimization() {
- SmallVector<unsigned, 4> NewVRegs;
- LiveRangeEdit LRE(nullptr, NewVRegs, MF, LIS, &VRM, nullptr);
- HSpiller.hoistAllSpills(LRE);
- assert(NewVRegs.size() == 0 &&
- "No new vregs should be generated in hoistAllSpills");
-}
-
-/// When a spill is inserted, add the spill to MergeableSpills map.
-///
-void HoistSpillHelper::addToMergeableSpills(MachineInstr *Spill, int StackSlot,
- unsigned Original) {
- StackSlotToReg[StackSlot] = Original;
- SlotIndex Idx = LIS.getInstructionIndex(*Spill);
- VNInfo *OrigVNI = LIS.getInterval(Original).getVNInfoAt(Idx.getRegSlot());
- std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI);
- MergeableSpills[MIdx].insert(Spill);
-}
-
-/// When a spill is removed, remove the spill from MergeableSpills map.
-/// Return true if the spill is removed successfully.
-///
-bool HoistSpillHelper::rmFromMergeableSpills(MachineInstr *Spill,
- int StackSlot) {
- int Original = StackSlotToReg[StackSlot];
- if (!Original)
- return false;
- SlotIndex Idx = LIS.getInstructionIndex(*Spill);
- VNInfo *OrigVNI = LIS.getInterval(Original).getVNInfoAt(Idx.getRegSlot());
- std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI);
- return MergeableSpills[MIdx].erase(Spill);
-}
-
-/// Check BB to see if it is a possible target BB to place a hoisted spill,
-/// i.e., there should be a living sibling of OrigReg at the insert point.
-///
-bool HoistSpillHelper::isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI,
- MachineBasicBlock &BB, unsigned &LiveReg) {
- SlotIndex Idx;
- MachineBasicBlock::iterator MI = BB.getFirstTerminator();
- if (MI != BB.end())
- Idx = LIS.getInstructionIndex(*MI);
- else
- Idx = LIS.getMBBEndIdx(&BB).getPrevSlot();
- SmallSetVector<unsigned, 16> &Siblings = Virt2SiblingsMap[OrigReg];
- assert((LIS.getInterval(OrigReg)).getVNInfoAt(Idx) == &OrigVNI &&
- "Unexpected VNI");
-
- for (auto const SibReg : Siblings) {
- LiveInterval &LI = LIS.getInterval(SibReg);
- VNInfo *VNI = LI.getVNInfoAt(Idx);
- if (VNI) {
- LiveReg = SibReg;
- return true;
- }
- }
- return false;
-}
-
-/// Remove redundent spills in the same BB. Save those redundent spills in
-/// SpillsToRm, and save the spill to keep and its BB in SpillBBToSpill map.
-///
-void HoistSpillHelper::rmRedundantSpills(
- SmallPtrSet<MachineInstr *, 16> &Spills,
- SmallVectorImpl<MachineInstr *> &SpillsToRm,
- DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill) {
- // For each spill saw, check SpillBBToSpill[] and see if its BB already has
- // another spill inside. If a BB contains more than one spill, only keep the
- // earlier spill with smaller SlotIndex.
- for (const auto CurrentSpill : Spills) {
- MachineBasicBlock *Block = CurrentSpill->getParent();
- MachineDomTreeNode *Node = MDT.DT->getNode(Block);
- MachineInstr *PrevSpill = SpillBBToSpill[Node];
- if (PrevSpill) {
- SlotIndex PIdx = LIS.getInstructionIndex(*PrevSpill);
- SlotIndex CIdx = LIS.getInstructionIndex(*CurrentSpill);
- MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill : PrevSpill;
- MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill : CurrentSpill;
- SpillsToRm.push_back(SpillToRm);
- SpillBBToSpill[MDT.DT->getNode(Block)] = SpillToKeep;
- } else {
- SpillBBToSpill[MDT.DT->getNode(Block)] = CurrentSpill;
- }
- }
- for (const auto SpillToRm : SpillsToRm)
- Spills.erase(SpillToRm);
-}
-
-/// Starting from \p Root find a top-down traversal order of the dominator
-/// tree to visit all basic blocks containing the elements of \p Spills.
-/// Redundant spills will be found and put into \p SpillsToRm at the same
-/// time. \p SpillBBToSpill will be populated as part of the process and
-/// maps a basic block to the first store occurring in the basic block.
-/// \post SpillsToRm.union(Spills\@post) == Spills\@pre
-///
-void HoistSpillHelper::getVisitOrders(
- MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills,
- SmallVectorImpl<MachineDomTreeNode *> &Orders,
- SmallVectorImpl<MachineInstr *> &SpillsToRm,
- DenseMap<MachineDomTreeNode *, unsigned> &SpillsToKeep,
- DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill) {
- // The set contains all the possible BB nodes to which we may hoist
- // original spills.
- SmallPtrSet<MachineDomTreeNode *, 8> WorkSet;
- // Save the BB nodes on the path from the first BB node containing
- // non-redundent spill to the Root node.
- SmallPtrSet<MachineDomTreeNode *, 8> NodesOnPath;
- // All the spills to be hoisted must originate from a single def instruction
- // to the OrigReg. It means the def instruction should dominate all the spills
- // to be hoisted. We choose the BB where the def instruction is located as
- // the Root.
- MachineDomTreeNode *RootIDomNode = MDT[Root]->getIDom();
- // For every node on the dominator tree with spill, walk up on the dominator
- // tree towards the Root node until it is reached. If there is other node
- // containing spill in the middle of the path, the previous spill saw will
- // be redundent and the node containing it will be removed. All the nodes on
- // the path starting from the first node with non-redundent spill to the Root
- // node will be added to the WorkSet, which will contain all the possible
- // locations where spills may be hoisted to after the loop below is done.
- for (const auto Spill : Spills) {
- MachineBasicBlock *Block = Spill->getParent();
- MachineDomTreeNode *Node = MDT[Block];
- MachineInstr *SpillToRm = nullptr;
- while (Node != RootIDomNode) {
- // If Node dominates Block, and it already contains a spill, the spill in
- // Block will be redundent.
- if (Node != MDT[Block] && SpillBBToSpill[Node]) {
- SpillToRm = SpillBBToSpill[MDT[Block]];
- break;
- /// If we see the Node already in WorkSet, the path from the Node to
- /// the Root node must already be traversed by another spill.
- /// Then no need to repeat.
- } else if (WorkSet.count(Node)) {
- break;
- } else {
- NodesOnPath.insert(Node);
- }
- Node = Node->getIDom();
- }
- if (SpillToRm) {
- SpillsToRm.push_back(SpillToRm);
- } else {
- // Add a BB containing the original spills to SpillsToKeep -- i.e.,
- // set the initial status before hoisting start. The value of BBs
- // containing original spills is set to 0, in order to descriminate
- // with BBs containing hoisted spills which will be inserted to
- // SpillsToKeep later during hoisting.
- SpillsToKeep[MDT[Block]] = 0;
- WorkSet.insert(NodesOnPath.begin(), NodesOnPath.end());
- }
- NodesOnPath.clear();
- }
-
- // Sort the nodes in WorkSet in top-down order and save the nodes
- // in Orders. Orders will be used for hoisting in runHoistSpills.
- unsigned idx = 0;
- Orders.push_back(MDT.DT->getNode(Root));
- do {
- MachineDomTreeNode *Node = Orders[idx++];
- const std::vector<MachineDomTreeNode *> &Children = Node->getChildren();
- unsigned NumChildren = Children.size();
- for (unsigned i = 0; i != NumChildren; ++i) {
- MachineDomTreeNode *Child = Children[i];
- if (WorkSet.count(Child))
- Orders.push_back(Child);
- }
- } while (idx != Orders.size());
- assert(Orders.size() == WorkSet.size() &&
- "Orders have different size with WorkSet");
-
-#ifndef NDEBUG
- DEBUG(dbgs() << "Orders size is " << Orders.size() << "\n");
- SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt = Orders.rbegin();
- for (; RIt != Orders.rend(); RIt++)
- DEBUG(dbgs() << "BB" << (*RIt)->getBlock()->getNumber() << ",");
- DEBUG(dbgs() << "\n");
-#endif
-}
-
-/// Try to hoist spills according to BB hotness. The spills to removed will
-/// be saved in \p SpillsToRm. The spills to be inserted will be saved in
-/// \p SpillsToIns.
-///
-void HoistSpillHelper::runHoistSpills(
- unsigned OrigReg, VNInfo &OrigVNI, SmallPtrSet<MachineInstr *, 16> &Spills,
- SmallVectorImpl<MachineInstr *> &SpillsToRm,
- DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns) {
- // Visit order of dominator tree nodes.
- SmallVector<MachineDomTreeNode *, 32> Orders;
- // SpillsToKeep contains all the nodes where spills are to be inserted
- // during hoisting. If the spill to be inserted is an original spill
- // (not a hoisted one), the value of the map entry is 0. If the spill
- // is a hoisted spill, the value of the map entry is the VReg to be used
- // as the source of the spill.
- DenseMap<MachineDomTreeNode *, unsigned> SpillsToKeep;
- // Map from BB to the first spill inside of it.
- DenseMap<MachineDomTreeNode *, MachineInstr *> SpillBBToSpill;
-
- rmRedundantSpills(Spills, SpillsToRm, SpillBBToSpill);
-
- MachineBasicBlock *Root = LIS.getMBBFromIndex(OrigVNI.def);
- getVisitOrders(Root, Spills, Orders, SpillsToRm, SpillsToKeep,
- SpillBBToSpill);
-
- // SpillsInSubTreeMap keeps the map from a dom tree node to a pair of
- // nodes set and the cost of all the spills inside those nodes.
- // The nodes set are the locations where spills are to be inserted
- // in the subtree of current node.
- typedef std::pair<SmallPtrSet<MachineDomTreeNode *, 16>, BlockFrequency>
- NodesCostPair;
- DenseMap<MachineDomTreeNode *, NodesCostPair> SpillsInSubTreeMap;
- // Iterate Orders set in reverse order, which will be a bottom-up order
- // in the dominator tree. Once we visit a dom tree node, we know its
- // children have already been visited and the spill locations in the
- // subtrees of all the children have been determined.
- SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt = Orders.rbegin();
- for (; RIt != Orders.rend(); RIt++) {
- MachineBasicBlock *Block = (*RIt)->getBlock();
-
- // If Block contains an original spill, simply continue.
- if (SpillsToKeep.find(*RIt) != SpillsToKeep.end() && !SpillsToKeep[*RIt]) {
- SpillsInSubTreeMap[*RIt].first.insert(*RIt);
- // SpillsInSubTreeMap[*RIt].second contains the cost of spill.
- SpillsInSubTreeMap[*RIt].second = MBFI.getBlockFreq(Block);
- continue;
- }
-
- // Collect spills in subtree of current node (*RIt) to
- // SpillsInSubTreeMap[*RIt].first.
- const std::vector<MachineDomTreeNode *> &Children = (*RIt)->getChildren();
- unsigned NumChildren = Children.size();
- for (unsigned i = 0; i != NumChildren; ++i) {
- MachineDomTreeNode *Child = Children[i];
- if (SpillsInSubTreeMap.find(Child) == SpillsInSubTreeMap.end())
- continue;
- // SpillsInSubTreeMap[*RIt].second += SpillsInSubTreeMap[Child].second
- // should be placed before getting the begin and end iterators of
- // SpillsInSubTreeMap[Child].first, or else the iterators may be
- // invalidated when SpillsInSubTreeMap[*RIt] is seen the first time
- // and the map grows and then the original buckets in the map are moved.
- SpillsInSubTreeMap[*RIt].second += SpillsInSubTreeMap[Child].second;
- auto BI = SpillsInSubTreeMap[Child].first.begin();
- auto EI = SpillsInSubTreeMap[Child].first.end();
- SpillsInSubTreeMap[*RIt].first.insert(BI, EI);
- SpillsInSubTreeMap.erase(Child);
- }
-
- // No spills in subtree, simply continue.
- if (SpillsInSubTreeMap[*RIt].first.empty())
- continue;
-
- // Check whether Block is a possible candidate to insert spill.
- unsigned LiveReg = 0;
- if (!isSpillCandBB(OrigReg, OrigVNI, *Block, LiveReg))
- continue;
-
- // If there are multiple spills that could be merged, bias a little
- // to hoist the spill.
- BranchProbability MarginProb = (SpillsInSubTreeMap[*RIt].first.size() > 1)
- ? BranchProbability(9, 10)
- : BranchProbability(1, 1);
- if (SpillsInSubTreeMap[*RIt].second >
- MBFI.getBlockFreq(Block) * MarginProb) {
- // Hoist: Move spills to current Block.
- for (const auto SpillBB : SpillsInSubTreeMap[*RIt].first) {
- // When SpillBB is a BB contains original spill, insert the spill
- // to SpillsToRm.
- if (SpillsToKeep.find(SpillBB) != SpillsToKeep.end() &&
- !SpillsToKeep[SpillBB]) {
- MachineInstr *SpillToRm = SpillBBToSpill[SpillBB];
- SpillsToRm.push_back(SpillToRm);
- }
- // SpillBB will not contain spill anymore, remove it from SpillsToKeep.
- SpillsToKeep.erase(SpillBB);
- }
- // Current Block is the BB containing the new hoisted spill. Add it to
- // SpillsToKeep. LiveReg is the source of the new spill.
- SpillsToKeep[*RIt] = LiveReg;
- DEBUG({
- dbgs() << "spills in BB: ";
- for (const auto Rspill : SpillsInSubTreeMap[*RIt].first)
- dbgs() << Rspill->getBlock()->getNumber() << " ";
- dbgs() << "were promoted to BB" << (*RIt)->getBlock()->getNumber()
- << "\n";
- });
- SpillsInSubTreeMap[*RIt].first.clear();
- SpillsInSubTreeMap[*RIt].first.insert(*RIt);
- SpillsInSubTreeMap[*RIt].second = MBFI.getBlockFreq(Block);
- }
- }
- // For spills in SpillsToKeep with LiveReg set (i.e., not original spill),
- // save them to SpillsToIns.
- for (const auto Ent : SpillsToKeep) {
- if (Ent.second)
- SpillsToIns[Ent.first->getBlock()] = Ent.second;
- }
-}
-
-/// For spills with equal values, remove redundent spills and hoist the left
-/// to less hot spots.
-///
-/// Spills with equal values will be collected into the same set in
-/// MergeableSpills when spill is inserted. These equal spills are originated
-/// from the same define instruction and are dominated by the instruction.
-/// Before hoisting all the equal spills, redundent spills inside in the same
-/// BB is first marked to be deleted. Then starting from spills left, walk up
-/// on the dominator tree towards the Root node where the define instruction
-/// is located, mark the dominated spills to be deleted along the way and
-/// collect the BB nodes on the path from non-dominated spills to the define
-/// instruction into a WorkSet. The nodes in WorkSet are the candidate places
-/// where we consider to hoist the spills. We iterate the WorkSet in bottom-up
-/// order, and for each node, we will decide whether to hoist spills inside
-/// its subtree to that node. In this way, we can get benefit locally even if
-/// hoisting all the equal spills to one cold place is impossible.
-///
-void HoistSpillHelper::hoistAllSpills(LiveRangeEdit &Edit) {
- // Save the mapping between stackslot and its original reg.
- DenseMap<int, unsigned> SlotToOrigReg;
- for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
- int Slot = VRM.getStackSlot(Reg);
- if (Slot != VirtRegMap::NO_STACK_SLOT)
- SlotToOrigReg[Slot] = VRM.getOriginal(Reg);
- unsigned Original = VRM.getPreSplitReg(Reg);
- if (!MRI.def_empty(Reg))
- Virt2SiblingsMap[Original].insert(Reg);
- }
-
- // Each entry in MergeableSpills contains a spill set with equal values.
- for (auto &Ent : MergeableSpills) {
- int Slot = Ent.first.first;
- unsigned OrigReg = SlotToOrigReg[Slot];
- VNInfo *OrigVNI = Ent.first.second;
- SmallPtrSet<MachineInstr *, 16> &EqValSpills = Ent.second;
- if (Ent.second.empty())
- continue;
-
- DEBUG({
- dbgs() << "\nFor Slot" << Slot << " and VN" << OrigVNI->id << ":\n"
- << "Equal spills in BB: ";
- for (const auto spill : EqValSpills)
- dbgs() << spill->getParent()->getNumber() << " ";
- dbgs() << "\n";
- });
-
- // SpillsToRm is the spill set to be removed from EqValSpills.
- SmallVector<MachineInstr *, 16> SpillsToRm;
- // SpillsToIns is the spill set to be newly inserted after hoisting.
- DenseMap<MachineBasicBlock *, unsigned> SpillsToIns;
-
- runHoistSpills(OrigReg, *OrigVNI, EqValSpills, SpillsToRm, SpillsToIns);
-
- DEBUG({
- dbgs() << "Finally inserted spills in BB: ";
- for (const auto Ispill : SpillsToIns)
- dbgs() << Ispill.first->getNumber() << " ";
- dbgs() << "\nFinally removed spills in BB: ";
- for (const auto Rspill : SpillsToRm)
- dbgs() << Rspill->getParent()->getNumber() << " ";
- dbgs() << "\n";
- });
-
- // Stack live range update.
- LiveInterval &StackIntvl = LSS.getInterval(Slot);
- if (!SpillsToIns.empty() || !SpillsToRm.empty()) {
- LiveInterval &OrigLI = LIS.getInterval(OrigReg);
- StackIntvl.MergeValueInAsValue(OrigLI, OrigVNI,
- StackIntvl.getValNumInfo(0));
- }
-
- // Insert hoisted spills.
- for (auto const Insert : SpillsToIns) {
- MachineBasicBlock *BB = Insert.first;
- unsigned LiveReg = Insert.second;
- MachineBasicBlock::iterator MI = BB->getFirstTerminator();
- TII.storeRegToStackSlot(*BB, MI, LiveReg, false, Slot,
- MRI.getRegClass(LiveReg), &TRI);
- LIS.InsertMachineInstrRangeInMaps(std::prev(MI), MI);
- ++NumSpills;
- }
-
- // Remove redundent spills or change them to dead instructions.
- NumSpills -= SpillsToRm.size();
- for (auto const RMEnt : SpillsToRm) {
- RMEnt->setDesc(TII.get(TargetOpcode::KILL));
- for (unsigned i = RMEnt->getNumOperands(); i; --i) {
- MachineOperand &MO = RMEnt->getOperand(i - 1);
- if (MO.isReg() && MO.isImplicit() && MO.isDef() && !MO.isDead())
- RMEnt->RemoveOperand(i - 1);
- }
- }
- Edit.eliminateDeadDefs(SpillsToRm, None, true);
- }
-}
Modified: llvm/trunk/lib/CodeGen/LiveRangeEdit.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveRangeEdit.cpp?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/LiveRangeEdit.cpp (original)
+++ llvm/trunk/lib/CodeGen/LiveRangeEdit.cpp Fri Apr 8 10:17:43 2016
@@ -63,13 +63,10 @@ void LiveRangeEdit::scanRemattable(Alias
for (VNInfo *VNI : getParent().valnos) {
if (VNI->isUnused())
continue;
- unsigned Original = VRM->getOriginal(getReg());
- LiveInterval &OrigLI = LIS.getInterval(Original);
- VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
- MachineInstr *DefMI = LIS.getInstructionFromIndex(OrigVNI->def);
+ MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def);
if (!DefMI)
continue;
- checkRematerializable(OrigVNI, DefMI, aa);
+ checkRematerializable(VNI, DefMI, aa);
}
ScannedRemattable = true;
}
@@ -116,18 +113,24 @@ bool LiveRangeEdit::allUsesAvailableAt(c
return true;
}
-bool LiveRangeEdit::canRematerializeAt(Remat &RM, VNInfo *OrigVNI,
- SlotIndex UseIdx, bool cheapAsAMove) {
+bool LiveRangeEdit::canRematerializeAt(Remat &RM,
+ SlotIndex UseIdx,
+ bool cheapAsAMove) {
assert(ScannedRemattable && "Call anyRematerializable first");
// Use scanRemattable info.
- if (!Remattable.count(OrigVNI))
+ if (!Remattable.count(RM.ParentVNI))
return false;
// No defining instruction provided.
SlotIndex DefIdx;
- assert(RM.OrigMI && "No defining instruction for remattable value");
- DefIdx = LIS.getInstructionIndex(*RM.OrigMI);
+ if (RM.OrigMI)
+ DefIdx = LIS.getInstructionIndex(*RM.OrigMI);
+ else {
+ DefIdx = RM.ParentVNI->def;
+ RM.OrigMI = LIS.getInstructionFromIndex(DefIdx);
+ assert(RM.OrigMI && "No defining instruction for remattable value");
+ }
// If only cheap remats were requested, bail out early.
if (cheapAsAMove && !TII.isAsCheapAsAMove(RM.OrigMI))
@@ -258,15 +261,6 @@ void LiveRangeEdit::eliminateDeadDef(Mac
// Collect virtual registers to be erased after MI is gone.
SmallVector<unsigned, 8> RegsToErase;
bool ReadsPhysRegs = false;
- bool isOrigDef = false;
- unsigned Dest;
- if (VRM && MI->getOperand(0).isReg()) {
- Dest = MI->getOperand(0).getReg();
- unsigned Original = VRM->getOriginal(Dest);
- LiveInterval &OrigLI = LIS.getInterval(Original);
- VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx);
- isOrigDef = SlotIndex::isSameInstr(OrigVNI->def, Idx);
- }
// Check for live intervals that may shrink
for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
@@ -320,24 +314,11 @@ void LiveRangeEdit::eliminateDeadDef(Mac
}
DEBUG(dbgs() << "Converted physregs to:\t" << *MI);
} else {
- // If the dest of MI is an original reg, don't delete the inst. Replace
- // the dest with a new reg, keep the inst for remat of other siblings.
- // The inst is saved in LiveRangeEdit::DeadRemats and will be deleted
- // after all the allocations of the func are done.
- if (isOrigDef) {
- unsigned NewDest = createFrom(Dest);
- pop_back();
- markDeadRemat(MI);
- const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
- MI->substituteRegister(Dest, NewDest, 0, TRI);
- MI->getOperand(0).setIsDead(false);
- } else {
- if (TheDelegate)
- TheDelegate->LRE_WillEraseInstruction(MI);
- LIS.RemoveMachineInstrFromMaps(*MI);
- MI->eraseFromParent();
- ++NumDCEDeleted;
- }
+ if (TheDelegate)
+ TheDelegate->LRE_WillEraseInstruction(MI);
+ LIS.RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ ++NumDCEDeleted;
}
// Erase any virtregs that are now empty and unused. There may be <undef>
@@ -351,9 +332,8 @@ void LiveRangeEdit::eliminateDeadDef(Mac
}
}
-void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
- ArrayRef<unsigned> RegsBeingSpilled,
- bool NoSplit) {
+void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
+ ArrayRef<unsigned> RegsBeingSpilled) {
ToShrinkSet ToShrink;
for (;;) {
@@ -375,9 +355,6 @@ void LiveRangeEdit::eliminateDeadDefs(Sm
if (!LIS.shrinkToUses(LI, &Dead))
continue;
- if (NoSplit)
- continue;
-
// Don't create new intervals for a register being spilled.
// The new intervals would have to be spilled anyway so its not worth it.
// Also they currently aren't spilled so creating them and not spilling
Modified: llvm/trunk/lib/CodeGen/RegAllocBase.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocBase.cpp?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/RegAllocBase.cpp (original)
+++ llvm/trunk/lib/CodeGen/RegAllocBase.cpp Fri Apr 8 10:17:43 2016
@@ -153,12 +153,3 @@ void RegAllocBase::allocatePhysRegs() {
}
}
}
-
-void RegAllocBase::postOptimization() {
- spiller().postOptimization();
- for (auto DeadInst : DeadRemats) {
- LIS->RemoveMachineInstrFromMaps(*DeadInst);
- DeadInst->eraseFromParent();
- }
- DeadRemats.clear();
-}
Modified: llvm/trunk/lib/CodeGen/RegAllocBase.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocBase.h?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/RegAllocBase.h (original)
+++ llvm/trunk/lib/CodeGen/RegAllocBase.h Fri Apr 8 10:17:43 2016
@@ -65,12 +65,6 @@ protected:
LiveRegMatrix *Matrix;
RegisterClassInfo RegClassInfo;
- /// Inst which is a def of an original reg and whose defs are already all
- /// dead after remat is saved in DeadRemats. The deletion of such inst is
- /// postponed till all the allocations are done, so its remat expr is
- /// always available for the remat of all the siblings of the original reg.
- SmallPtrSet<MachineInstr *, 32> DeadRemats;
-
RegAllocBase()
: TRI(nullptr), MRI(nullptr), VRM(nullptr), LIS(nullptr), Matrix(nullptr) {}
@@ -83,10 +77,6 @@ protected:
// physical register assignments.
void allocatePhysRegs();
- // Include spiller post optimization and removing dead defs left because of
- // rematerialization.
- virtual void postOptimization();
-
// Get a temporary reference to a Spiller instance.
virtual Spiller &spiller() = 0;
Modified: llvm/trunk/lib/CodeGen/RegAllocBasic.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocBasic.cpp?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/RegAllocBasic.cpp (original)
+++ llvm/trunk/lib/CodeGen/RegAllocBasic.cpp Fri Apr 8 10:17:43 2016
@@ -199,7 +199,7 @@ bool RABasic::spillInterferences(LiveInt
Matrix->unassign(Spill);
// Spill the extracted interval.
- LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats);
+ LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM);
spiller().spill(LRE);
}
return true;
@@ -258,7 +258,7 @@ unsigned RABasic::selectOrSplit(LiveInte
DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
if (!VirtReg.isSpillable())
return ~0u;
- LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats);
+ LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM);
spiller().spill(LRE);
// The live virtual register requesting allocation was spilled, so tell
@@ -283,7 +283,6 @@ bool RABasic::runOnMachineFunction(Machi
SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
allocatePhysRegs();
- postOptimization();
// Diagnostic output before rewriting
DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
Modified: llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp (original)
+++ llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp Fri Apr 8 10:17:43 2016
@@ -12,6 +12,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/Passes.h"
#include "AllocationOrder.h"
#include "InterferenceCache.h"
#include "LiveDebugVariables.h"
@@ -32,7 +33,6 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
@@ -44,7 +44,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <queue>
@@ -56,14 +55,14 @@ STATISTIC(NumGlobalSplits, "Number of sp
STATISTIC(NumLocalSplits, "Number of split local live ranges");
STATISTIC(NumEvicted, "Number of interferences evicted");
-static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode(
- "split-spill-mode", cl::Hidden,
- cl::desc("Spill mode for splitting live ranges"),
- cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
- clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
- clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"),
- clEnumValEnd),
- cl::init(SplitEditor::SM_Speed));
+static cl::opt<SplitEditor::ComplementSpillMode>
+SplitSpillMode("split-spill-mode", cl::Hidden,
+ cl::desc("Spill mode for splitting live ranges"),
+ cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
+ clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
+ clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"),
+ clEnumValEnd),
+ cl::init(SplitEditor::SM_Partition));
static cl::opt<unsigned>
LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden,
@@ -1466,7 +1465,7 @@ unsigned RAGreedy::doRegionSplit(LiveInt
SmallVectorImpl<unsigned> &NewVRegs) {
SmallVector<unsigned, 8> UsedCands;
// Prepare split editor.
- LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
SE->reset(LREdit, SplitSpillMode);
// Assign all edge bundles to the preferred candidate, or NoCand.
@@ -1514,7 +1513,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInt
assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
unsigned Reg = VirtReg.reg;
bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
- LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
SE->reset(LREdit, SplitSpillMode);
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
for (unsigned i = 0; i != UseBlocks.size(); ++i) {
@@ -1586,7 +1585,7 @@ RAGreedy::tryInstructionSplit(LiveInterv
// Always enable split spill mode, since we're effectively spilling to a
// register.
- LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
SE->reset(LREdit, SplitEditor::SM_Size);
ArrayRef<SlotIndex> Uses = SA->getUseSlots();
@@ -1909,7 +1908,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInt
<< '-' << Uses[BestAfter] << ", " << BestDiff
<< ", " << (BestAfter - BestBefore + 1) << " instrs\n");
- LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
SE->reset(LREdit);
SE->openIntv();
@@ -2552,7 +2551,7 @@ unsigned RAGreedy::selectOrSplitImpl(Liv
NewVRegs.push_back(VirtReg.reg);
} else {
NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
- LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
+ LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
spiller().spill(LRE);
setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
@@ -2610,8 +2609,6 @@ bool RAGreedy::runOnMachineFunction(Mach
allocatePhysRegs();
tryHintsRecoloring();
- postOptimization();
-
releaseMemory();
return true;
}
Modified: llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp (original)
+++ llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp Fri Apr 8 10:17:43 2016
@@ -123,12 +123,6 @@ private:
RegSet VRegsToAlloc, EmptyIntervalVRegs;
- /// Inst which is a def of an original reg and whose defs are already all
- /// dead after remat is saved in DeadRemats. The deletion of such inst is
- /// postponed till all the allocations are done, so its remat expr is
- /// always available for the remat of all the siblings of the original reg.
- SmallPtrSet<MachineInstr *, 32> DeadRemats;
-
/// \brief Finds the initial set of vreg intervals to allocate.
void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS);
@@ -152,7 +146,6 @@ private:
void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS,
VirtRegMap &VRM) const;
- void postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS);
};
char RegAllocPBQP::ID = 0;
@@ -638,8 +631,7 @@ void RegAllocPBQP::spillVReg(unsigned VR
VirtRegMap &VRM, Spiller &VRegSpiller) {
VRegsToAlloc.erase(VReg);
- LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM,
- nullptr, &DeadRemats);
+ LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM);
VRegSpiller.spill(LRE);
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
@@ -721,16 +713,6 @@ void RegAllocPBQP::finalizeAlloc(Machine
}
}
-void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) {
- VRegSpiller.postOptimization();
- /// Remove dead defs because of rematerialization.
- for (auto DeadInst : DeadRemats) {
- LIS.RemoveMachineInstrFromMaps(*DeadInst);
- DeadInst->eraseFromParent();
- }
- DeadRemats.clear();
-}
-
static inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned Size,
unsigned NumInstr) {
// All intervals have a spill weight that is mostly proportional to the number
@@ -816,7 +798,6 @@ bool RegAllocPBQP::runOnMachineFunction(
// Finalise allocation, allocate empty ranges.
finalizeAlloc(MF, LIS, VRM);
- postOptimization(*VRegSpiller, LIS);
VRegsToAlloc.clear();
EmptyIntervalVRegs.clear();
Modified: llvm/trunk/lib/CodeGen/Spiller.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/Spiller.h?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/Spiller.h (original)
+++ llvm/trunk/lib/CodeGen/Spiller.h Fri Apr 8 10:17:43 2016
@@ -16,7 +16,6 @@ namespace llvm {
class MachineFunction;
class MachineFunctionPass;
class VirtRegMap;
- class LiveIntervals;
/// Spiller interface.
///
@@ -29,7 +28,7 @@ namespace llvm {
/// spill - Spill the LRE.getParent() live interval.
virtual void spill(LiveRangeEdit &LRE) = 0;
- virtual void postOptimization(){};
+
};
/// Create and return a spiller that will insert spill code directly instead
@@ -37,6 +36,7 @@ namespace llvm {
Spiller *createInlineSpiller(MachineFunctionPass &pass,
MachineFunction &mf,
VirtRegMap &vrm);
+
}
#endif
Modified: llvm/trunk/lib/CodeGen/SplitKit.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SplitKit.cpp?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SplitKit.cpp (original)
+++ llvm/trunk/lib/CodeGen/SplitKit.cpp Fri Apr 8 10:17:43 2016
@@ -16,7 +16,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
-#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -431,13 +430,8 @@ VNInfo *SplitEditor::defFromParent(unsig
bool Late = RegIdx != 0;
// Attempt cheap-as-a-copy rematerialization.
- unsigned Original = VRM.getOriginal(Edit->get(RegIdx));
- LiveInterval &OrigLI = LIS.getInterval(Original);
- VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
LiveRangeEdit::Remat RM(ParentVNI);
- RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
-
- if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) {
+ if (Edit->canRematerializeAt(RM, UseIdx, true)) {
Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
++NumRemats;
} else {
@@ -722,62 +716,7 @@ SplitEditor::findShallowDominator(Machin
}
}
-void SplitEditor::computeRedundantBackCopies(
- DenseSet<unsigned> &NotToHoistSet, SmallVectorImpl<VNInfo *> &BackCopies) {
- LiveInterval *LI = &LIS.getInterval(Edit->get(0));
- LiveInterval *Parent = &Edit->getParent();
- SmallVector<SmallPtrSet<VNInfo *, 8>, 8> EqualVNs(Parent->getNumValNums());
- SmallPtrSet<VNInfo *, 8> DominatedVNIs;
-
- // Aggregate VNIs having the same value as ParentVNI.
- for (VNInfo *VNI : LI->valnos) {
- if (VNI->isUnused())
- continue;
- VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
- EqualVNs[ParentVNI->id].insert(VNI);
- }
-
- // For VNI aggregation of each ParentVNI, collect dominated, i.e.,
- // redundant VNIs to BackCopies.
- for (unsigned i = 0, e = Parent->getNumValNums(); i != e; ++i) {
- VNInfo *ParentVNI = Parent->getValNumInfo(i);
- if (!NotToHoistSet.count(ParentVNI->id))
- continue;
- SmallPtrSetIterator<VNInfo *> It1 = EqualVNs[ParentVNI->id].begin();
- SmallPtrSetIterator<VNInfo *> It2 = It1;
- for (; It1 != EqualVNs[ParentVNI->id].end(); ++It1) {
- It2 = It1;
- for (++It2; It2 != EqualVNs[ParentVNI->id].end(); ++It2) {
- if (DominatedVNIs.count(*It1) || DominatedVNIs.count(*It2))
- continue;
-
- MachineBasicBlock *MBB1 = LIS.getMBBFromIndex((*It1)->def);
- MachineBasicBlock *MBB2 = LIS.getMBBFromIndex((*It2)->def);
- if (MBB1 == MBB2) {
- DominatedVNIs.insert((*It1)->def < (*It2)->def ? (*It2) : (*It1));
- } else if (MDT.dominates(MBB1, MBB2)) {
- DominatedVNIs.insert(*It2);
- } else if (MDT.dominates(MBB2, MBB1)) {
- DominatedVNIs.insert(*It1);
- }
- }
- }
- if (!DominatedVNIs.empty()) {
- forceRecompute(0, ParentVNI);
- for (auto VNI : DominatedVNIs) {
- BackCopies.push_back(VNI);
- }
- DominatedVNIs.clear();
- }
- }
-}
-
-/// For SM_Size mode, find a common dominator for all the back-copies for
-/// the same ParentVNI and hoist the backcopies to the dominator BB.
-/// For SM_Speed mode, if the common dominator is hot and it is not beneficial
-/// to do the hoisting, simply remove the dominated backcopies for the same
-/// ParentVNI.
-void SplitEditor::hoistCopies() {
+void SplitEditor::hoistCopiesForSize() {
// Get the complement interval, always RegIdx 0.
LiveInterval *LI = &LIS.getInterval(Edit->get(0));
LiveInterval *Parent = &Edit->getParent();
@@ -786,11 +725,6 @@ void SplitEditor::hoistCopies() {
// indexed by ParentVNI->id.
typedef std::pair<MachineBasicBlock*, SlotIndex> DomPair;
SmallVector<DomPair, 8> NearestDom(Parent->getNumValNums());
- // The total cost of all the back-copies for each ParentVNI.
- SmallVector<BlockFrequency, 8> Costs(Parent->getNumValNums());
- // The ParentVNI->id set for which hoisting back-copies are not beneficial
- // for Speed.
- DenseSet<unsigned> NotToHoistSet;
// Find the nearest common dominator for parent values with multiple
// back-copies. If a single back-copy dominates, put it in DomPair.second.
@@ -806,7 +740,6 @@ void SplitEditor::hoistCopies() {
continue;
MachineBasicBlock *ValMBB = LIS.getMBBFromIndex(VNI->def);
-
DomPair &Dom = NearestDom[ParentVNI->id];
// Keep directly defined parent values. This is either a PHI or an
@@ -841,7 +774,6 @@ void SplitEditor::hoistCopies() {
else if (Near != Dom.first)
// None dominate. Hoist to common dominator, need new def.
Dom = DomPair(Near, SlotIndex());
- Costs[ParentVNI->id] += MBFI.getBlockFreq(ValMBB);
}
DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' << VNI->def
@@ -860,11 +792,6 @@ void SplitEditor::hoistCopies() {
MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(ParentVNI->def);
// Get a less loopy dominator than Dom.first.
Dom.first = findShallowDominator(Dom.first, DefMBB);
- if (SpillMode == SM_Speed &&
- MBFI.getBlockFreq(Dom.first) > Costs[ParentVNI->id]) {
- NotToHoistSet.insert(ParentVNI->id);
- continue;
- }
SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot();
Dom.second =
defFromParent(0, ParentVNI, Last, *Dom.first,
@@ -879,18 +806,11 @@ void SplitEditor::hoistCopies() {
continue;
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
const DomPair &Dom = NearestDom[ParentVNI->id];
- if (!Dom.first || Dom.second == VNI->def ||
- NotToHoistSet.count(ParentVNI->id))
+ if (!Dom.first || Dom.second == VNI->def)
continue;
BackCopies.push_back(VNI);
forceRecompute(0, ParentVNI);
}
-
- // If it is not beneficial to hoist all the BackCopies, simply remove
- // redundant BackCopies in speed mode.
- if (SpillMode == SM_Speed && !NotToHoistSet.empty())
- computeRedundantBackCopies(NotToHoistSet, BackCopies);
-
removeBackCopies(BackCopies);
}
@@ -1084,8 +1004,6 @@ void SplitEditor::deleteRematVictims() {
// Dead defs end at the dead slot.
if (S.end != S.valno->def.getDeadSlot())
continue;
- if (S.valno->isPHIDef())
- continue;
MachineInstr *MI = LIS.getInstructionFromIndex(S.valno->def);
assert(MI && "Missing instruction for dead def");
MI->addRegisterDead(LI->reg, &TRI);
@@ -1130,9 +1048,10 @@ void SplitEditor::finish(SmallVectorImpl
// Leave all back-copies as is.
break;
case SM_Size:
+ hoistCopiesForSize();
+ break;
case SM_Speed:
- // hoistCopies will behave differently between size and speed.
- hoistCopies();
+ llvm_unreachable("Spill mode 'speed' not implemented yet");
}
// Transfer the simply mapped values, check if any are skipped.
Modified: llvm/trunk/lib/CodeGen/SplitKit.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SplitKit.h?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SplitKit.h (original)
+++ llvm/trunk/lib/CodeGen/SplitKit.h Fri Apr 8 10:17:43 2016
@@ -18,7 +18,6 @@
#include "LiveRangeCalc.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -330,14 +329,9 @@ private:
MachineBasicBlock *findShallowDominator(MachineBasicBlock *MBB,
MachineBasicBlock *DefMBB);
- /// Find out all the backCopies dominated by others.
- void computeRedundantBackCopies(DenseSet<unsigned> &NotToHoistSet,
- SmallVectorImpl<VNInfo *> &BackCopies);
-
- /// Hoist back-copies to the complement interval. It tries to hoist all
- /// the back-copies to one BB if it is beneficial, or else simply remove
- /// redundent backcopies dominated by others.
- void hoistCopies();
+ /// hoistCopiesForSize - Hoist back-copies to the complement interval in a
+ /// way that minimizes code size. This implements the SM_Size spill mode.
+ void hoistCopiesForSize();
/// transferValues - Transfer values to the new ranges.
/// Return true if any ranges were skipped.
Added: llvm/trunk/test/CodeGen/AArch64/aarch64-deferred-spilling.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/aarch64-deferred-spilling.ll?rev=265790&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/aarch64-deferred-spilling.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/aarch64-deferred-spilling.ll Fri Apr 8 10:17:43 2016
@@ -0,0 +1,514 @@
+;RUN: llc < %s -mtriple=aarch64--linux-android -regalloc=greedy -enable-deferred-spilling=true -mcpu=cortex-a57 -disable-fp-elim | FileCheck %s --check-prefix=CHECK --check-prefix=DEFERRED
+;RUN: llc < %s -mtriple=aarch64--linux-android -regalloc=greedy -enable-deferred-spilling=false -mcpu=cortex-a57 -disable-fp-elim | FileCheck %s --check-prefix=CHECK --check-prefix=REGULAR
+
+; Check that we do not end up with useless spill code.
+;
+; Move to the basic block we are interested in.
+;
+; CHECK: // %if.then.120
+;
+; REGULAR: str w21, [sp, #[[OFFSET:[0-9]+]]] // 4-byte Folded Spill
+; Check that w21 wouldn't need to be spilled since it is never reused.
+; REGULAR-NOT: {{[wx]}}21{{,?}}
+;
+; Check that w22 is used to carry a value through the call.
+; DEFERRED-NOT: str {{[wx]}}22,
+; DEFERRED: mov {{[wx]}}22,
+; DEFERRED-NOT: str {{[wx]}}22,
+;
+; CHECK: bl fprintf
+;
+; DEFERRED-NOT: ldr {{[wx]}}22,
+; DEFERRED: mov {{[wx][0-9]+}}, {{[wx]}}22
+; DEFERRED-NOT: ldr {{[wx]}}22,
+;
+; REGULAR-NOT: {{[wx]}}21{{,?}}
+; REGULAR: ldr w21, [sp, #[[OFFSET]]] // 4-byte Folded Reload
+;
+; End of the basic block we are interested in.
+; CHECK: b
+; CHECK: {{[^:]+}}: // %sw.bb.123
+
+%struct.__sFILE = type { i8*, i32, i32, i32, i32, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sbuf = type { i8*, i64 }
+%struct.DState = type { %struct.bz_stream*, i32, i8, i32, i8, i32, i32, i32, i32, i32, i8, i32, i32, i32, i32, i32, [256 x i32], i32, [257 x i32], [257 x i32], i32*, i16*, i8*, i32, i32, i32, i32, i32, [256 x i8], [16 x i8], [256 x i8], [4096 x i8], [16 x i32], [18002 x i8], [18002 x i8], [6 x [258 x i8]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, i32*, i32* }
+%struct.bz_stream = type { i8*, i32, i32, i32, i8*, i32, i32, i32, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8* }
+
+ at __sF = external global [0 x %struct.__sFILE], align 8
+ at .str = private unnamed_addr constant [20 x i8] c"\0A [%d: stuff+mf \00", align 1
+
+declare i32 @fprintf(%struct.__sFILE* nocapture, i8* nocapture readonly, ...)
+
+declare void @bar(i32)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
+
+define i32 @foo(%struct.DState* %s) {
+entry:
+ %state = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 1
+ %tmp = load i32, i32* %state, align 4
+ %cmp = icmp eq i32 %tmp, 10
+ %save_i = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 40
+ br i1 %cmp, label %if.end.thread, label %if.end
+
+if.end.thread: ; preds = %entry
+ %save_j = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 41
+ %save_t = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 42
+ %save_alphaSize = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 43
+ %save_nGroups = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 44
+ %save_nSelectors = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 45
+ %save_EOB = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 46
+ %save_groupNo = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 47
+ %save_groupPos = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 48
+ %save_nextSym = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 49
+ %save_nblockMAX = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 50
+ %save_nblock = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 51
+ %save_es = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 52
+ %save_N = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 53
+ %save_curr = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 54
+ %save_zt = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 55
+ %save_zn = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 56
+ %save_zvec = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 57
+ %save_zj = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 58
+ %tmp1 = bitcast i32* %save_i to i8*
+ call void @llvm.memset.p0i8.i64(i8* %tmp1, i8 0, i64 108, i32 4, i1 false)
+ br label %sw.default
+
+if.end: ; preds = %entry
+ %.pre = load i32, i32* %save_i, align 4
+ %save_j3.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 41
+ %.pre406 = load i32, i32* %save_j3.phi.trans.insert, align 4
+ %save_t4.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 42
+ %.pre407 = load i32, i32* %save_t4.phi.trans.insert, align 4
+ %save_alphaSize5.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 43
+ %.pre408 = load i32, i32* %save_alphaSize5.phi.trans.insert, align 4
+ %save_nGroups6.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 44
+ %.pre409 = load i32, i32* %save_nGroups6.phi.trans.insert, align 4
+ %save_nSelectors7.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 45
+ %.pre410 = load i32, i32* %save_nSelectors7.phi.trans.insert, align 4
+ %save_EOB8.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 46
+ %.pre411 = load i32, i32* %save_EOB8.phi.trans.insert, align 4
+ %save_groupNo9.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 47
+ %.pre412 = load i32, i32* %save_groupNo9.phi.trans.insert, align 4
+ %save_groupPos10.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 48
+ %.pre413 = load i32, i32* %save_groupPos10.phi.trans.insert, align 4
+ %save_nextSym11.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 49
+ %.pre414 = load i32, i32* %save_nextSym11.phi.trans.insert, align 4
+ %save_nblockMAX12.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 50
+ %.pre415 = load i32, i32* %save_nblockMAX12.phi.trans.insert, align 4
+ %save_nblock13.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 51
+ %.pre416 = load i32, i32* %save_nblock13.phi.trans.insert, align 4
+ %save_es14.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 52
+ %.pre417 = load i32, i32* %save_es14.phi.trans.insert, align 4
+ %save_N15.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 53
+ %.pre418 = load i32, i32* %save_N15.phi.trans.insert, align 4
+ %save_curr16.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 54
+ %.pre419 = load i32, i32* %save_curr16.phi.trans.insert, align 4
+ %save_zt17.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 55
+ %.pre420 = load i32, i32* %save_zt17.phi.trans.insert, align 4
+ %save_zn18.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 56
+ %.pre421 = load i32, i32* %save_zn18.phi.trans.insert, align 4
+ %save_zvec19.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 57
+ %.pre422 = load i32, i32* %save_zvec19.phi.trans.insert, align 4
+ %save_zj20.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 58
+ %.pre423 = load i32, i32* %save_zj20.phi.trans.insert, align 4
+ switch i32 %tmp, label %sw.default [
+ i32 13, label %sw.bb
+ i32 14, label %if.end.sw.bb.65_crit_edge
+ i32 25, label %if.end.sw.bb.123_crit_edge
+ ]
+
+if.end.sw.bb.123_crit_edge: ; preds = %if.end
+ %.pre433 = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8
+ br label %sw.bb.123
+
+if.end.sw.bb.65_crit_edge: ; preds = %if.end
+ %bsLive69.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8
+ %.pre426 = load i32, i32* %bsLive69.phi.trans.insert, align 4
+ br label %sw.bb.65
+
+sw.bb: ; preds = %if.end
+ %sunkaddr = ptrtoint %struct.DState* %s to i64
+ %sunkaddr485 = add i64 %sunkaddr, 8
+ %sunkaddr486 = inttoptr i64 %sunkaddr485 to i32*
+ store i32 13, i32* %sunkaddr486, align 4
+ %bsLive = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8
+ %tmp2 = load i32, i32* %bsLive, align 4
+ %cmp28.400 = icmp sgt i32 %tmp2, 7
+ br i1 %cmp28.400, label %sw.bb.if.then.29_crit_edge, label %if.end.33.lr.ph
+
+sw.bb.if.then.29_crit_edge: ; preds = %sw.bb
+ %sunkaddr487 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr488 = add i64 %sunkaddr487, 32
+ %sunkaddr489 = inttoptr i64 %sunkaddr488 to i32*
+ %.pre425 = load i32, i32* %sunkaddr489, align 4
+ br label %if.then.29
+
+if.end.33.lr.ph: ; preds = %sw.bb
+ %tmp3 = bitcast %struct.DState* %s to %struct.bz_stream**
+ %.pre424 = load %struct.bz_stream*, %struct.bz_stream** %tmp3, align 8
+ %avail_in.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre424, i64 0, i32 1
+ %.pre430 = load i32, i32* %avail_in.phi.trans.insert, align 4
+ %tmp4 = add i32 %.pre430, -1
+ br label %if.end.33
+
+if.then.29: ; preds = %while.body.backedge, %sw.bb.if.then.29_crit_edge
+ %tmp5 = phi i32 [ %.pre425, %sw.bb.if.then.29_crit_edge ], [ %or, %while.body.backedge ]
+ %.lcssa393 = phi i32 [ %tmp2, %sw.bb.if.then.29_crit_edge ], [ %add, %while.body.backedge ]
+ %sub = add nsw i32 %.lcssa393, -8
+ %shr = lshr i32 %tmp5, %sub
+ %and = and i32 %shr, 255
+ %sunkaddr491 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr492 = add i64 %sunkaddr491, 36
+ %sunkaddr493 = inttoptr i64 %sunkaddr492 to i32*
+ store i32 %sub, i32* %sunkaddr493, align 4
+ %blockSize100k = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 9
+ store i32 %and, i32* %blockSize100k, align 4
+ %and.off = add nsw i32 %and, -49
+ %tmp6 = icmp ugt i32 %and.off, 8
+ br i1 %tmp6, label %save_state_and_return, label %if.end.62
+
+if.end.33: ; preds = %while.body.backedge, %if.end.33.lr.ph
+ %lsr.iv482 = phi i32 [ %tmp4, %if.end.33.lr.ph ], [ %lsr.iv.next483, %while.body.backedge ]
+ %tmp7 = phi i32 [ %tmp2, %if.end.33.lr.ph ], [ %add, %while.body.backedge ]
+ %cmp35 = icmp eq i32 %lsr.iv482, -1
+ br i1 %cmp35, label %save_state_and_return, label %if.end.37
+
+if.end.37: ; preds = %if.end.33
+ %tmp8 = bitcast %struct.bz_stream* %.pre424 to i8**
+ %sunkaddr494 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr495 = add i64 %sunkaddr494, 32
+ %sunkaddr496 = inttoptr i64 %sunkaddr495 to i32*
+ %tmp9 = load i32, i32* %sunkaddr496, align 4
+ %shl = shl i32 %tmp9, 8
+ %tmp10 = load i8*, i8** %tmp8, align 8
+ %tmp11 = load i8, i8* %tmp10, align 1
+ %conv = zext i8 %tmp11 to i32
+ %or = or i32 %conv, %shl
+ store i32 %or, i32* %sunkaddr496, align 4
+ %add = add nsw i32 %tmp7, 8
+ %sunkaddr497 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr498 = add i64 %sunkaddr497, 36
+ %sunkaddr499 = inttoptr i64 %sunkaddr498 to i32*
+ store i32 %add, i32* %sunkaddr499, align 4
+ %incdec.ptr = getelementptr inbounds i8, i8* %tmp10, i64 1
+ store i8* %incdec.ptr, i8** %tmp8, align 8
+ %sunkaddr500 = ptrtoint %struct.bz_stream* %.pre424 to i64
+ %sunkaddr501 = add i64 %sunkaddr500, 8
+ %sunkaddr502 = inttoptr i64 %sunkaddr501 to i32*
+ store i32 %lsr.iv482, i32* %sunkaddr502, align 4
+ %sunkaddr503 = ptrtoint %struct.bz_stream* %.pre424 to i64
+ %sunkaddr504 = add i64 %sunkaddr503, 12
+ %sunkaddr505 = inttoptr i64 %sunkaddr504 to i32*
+ %tmp12 = load i32, i32* %sunkaddr505, align 4
+ %inc = add i32 %tmp12, 1
+ store i32 %inc, i32* %sunkaddr505, align 4
+ %cmp49 = icmp eq i32 %inc, 0
+ br i1 %cmp49, label %if.then.51, label %while.body.backedge
+
+if.then.51: ; preds = %if.end.37
+ %sunkaddr506 = ptrtoint %struct.bz_stream* %.pre424 to i64
+ %sunkaddr507 = add i64 %sunkaddr506, 16
+ %sunkaddr508 = inttoptr i64 %sunkaddr507 to i32*
+ %tmp13 = load i32, i32* %sunkaddr508, align 4
+ %inc53 = add i32 %tmp13, 1
+ store i32 %inc53, i32* %sunkaddr508, align 4
+ br label %while.body.backedge
+
+while.body.backedge: ; preds = %if.then.51, %if.end.37
+ %lsr.iv.next483 = add i32 %lsr.iv482, -1
+ %cmp28 = icmp sgt i32 %add, 7
+ br i1 %cmp28, label %if.then.29, label %if.end.33
+
+if.end.62: ; preds = %if.then.29
+ %sub64 = add nsw i32 %and, -48
+ %sunkaddr509 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr510 = add i64 %sunkaddr509, 40
+ %sunkaddr511 = inttoptr i64 %sunkaddr510 to i32*
+ store i32 %sub64, i32* %sunkaddr511, align 4
+ br label %sw.bb.65
+
+sw.bb.65: ; preds = %if.end.62, %if.end.sw.bb.65_crit_edge
+ %bsLive69.pre-phi = phi i32* [ %bsLive69.phi.trans.insert, %if.end.sw.bb.65_crit_edge ], [ %bsLive, %if.end.62 ]
+ %tmp14 = phi i32 [ %.pre426, %if.end.sw.bb.65_crit_edge ], [ %sub, %if.end.62 ]
+ %sunkaddr512 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr513 = add i64 %sunkaddr512, 8
+ %sunkaddr514 = inttoptr i64 %sunkaddr513 to i32*
+ store i32 14, i32* %sunkaddr514, align 4
+ %cmp70.397 = icmp sgt i32 %tmp14, 7
+ br i1 %cmp70.397, label %if.then.72, label %if.end.82.lr.ph
+
+if.end.82.lr.ph: ; preds = %sw.bb.65
+ %tmp15 = bitcast %struct.DState* %s to %struct.bz_stream**
+ %.pre427 = load %struct.bz_stream*, %struct.bz_stream** %tmp15, align 8
+ %avail_in84.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre427, i64 0, i32 1
+ %.pre431 = load i32, i32* %avail_in84.phi.trans.insert, align 4
+ %tmp16 = add i32 %.pre431, -1
+ br label %if.end.82
+
+if.then.72: ; preds = %while.body.68.backedge, %sw.bb.65
+ %.lcssa390 = phi i32 [ %tmp14, %sw.bb.65 ], [ %add97, %while.body.68.backedge ]
+ %sub76 = add nsw i32 %.lcssa390, -8
+ %sunkaddr516 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr517 = add i64 %sunkaddr516, 36
+ %sunkaddr518 = inttoptr i64 %sunkaddr517 to i32*
+ store i32 %sub76, i32* %sunkaddr518, align 4
+ %currBlockNo = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 11
+ %tmp17 = load i32, i32* %currBlockNo, align 4
+ %inc117 = add nsw i32 %tmp17, 1
+ store i32 %inc117, i32* %currBlockNo, align 4
+ %verbosity = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 12
+ %tmp18 = load i32, i32* %verbosity, align 4
+ %cmp118 = icmp sgt i32 %tmp18, 1
+ br i1 %cmp118, label %if.then.120, label %sw.bb.123, !prof !0
+
+if.end.82: ; preds = %while.body.68.backedge, %if.end.82.lr.ph
+ %lsr.iv480 = phi i32 [ %tmp16, %if.end.82.lr.ph ], [ %lsr.iv.next481, %while.body.68.backedge ]
+ %tmp19 = phi i32 [ %tmp14, %if.end.82.lr.ph ], [ %add97, %while.body.68.backedge ]
+ %cmp85 = icmp eq i32 %lsr.iv480, -1
+ br i1 %cmp85, label %save_state_and_return, label %if.end.88
+
+if.end.88: ; preds = %if.end.82
+ %tmp20 = bitcast %struct.bz_stream* %.pre427 to i8**
+ %sunkaddr519 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr520 = add i64 %sunkaddr519, 32
+ %sunkaddr521 = inttoptr i64 %sunkaddr520 to i32*
+ %tmp21 = load i32, i32* %sunkaddr521, align 4
+ %shl90 = shl i32 %tmp21, 8
+ %tmp22 = load i8*, i8** %tmp20, align 8
+ %tmp23 = load i8, i8* %tmp22, align 1
+ %conv93 = zext i8 %tmp23 to i32
+ %or94 = or i32 %conv93, %shl90
+ store i32 %or94, i32* %sunkaddr521, align 4
+ %add97 = add nsw i32 %tmp19, 8
+ %sunkaddr522 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr523 = add i64 %sunkaddr522, 36
+ %sunkaddr524 = inttoptr i64 %sunkaddr523 to i32*
+ store i32 %add97, i32* %sunkaddr524, align 4
+ %incdec.ptr100 = getelementptr inbounds i8, i8* %tmp22, i64 1
+ store i8* %incdec.ptr100, i8** %tmp20, align 8
+ %sunkaddr525 = ptrtoint %struct.bz_stream* %.pre427 to i64
+ %sunkaddr526 = add i64 %sunkaddr525, 8
+ %sunkaddr527 = inttoptr i64 %sunkaddr526 to i32*
+ store i32 %lsr.iv480, i32* %sunkaddr527, align 4
+ %sunkaddr528 = ptrtoint %struct.bz_stream* %.pre427 to i64
+ %sunkaddr529 = add i64 %sunkaddr528, 12
+ %sunkaddr530 = inttoptr i64 %sunkaddr529 to i32*
+ %tmp24 = load i32, i32* %sunkaddr530, align 4
+ %inc106 = add i32 %tmp24, 1
+ store i32 %inc106, i32* %sunkaddr530, align 4
+ %cmp109 = icmp eq i32 %inc106, 0
+ br i1 %cmp109, label %if.then.111, label %while.body.68.backedge
+
+if.then.111: ; preds = %if.end.88
+ %sunkaddr531 = ptrtoint %struct.bz_stream* %.pre427 to i64
+ %sunkaddr532 = add i64 %sunkaddr531, 16
+ %sunkaddr533 = inttoptr i64 %sunkaddr532 to i32*
+ %tmp25 = load i32, i32* %sunkaddr533, align 4
+ %inc114 = add i32 %tmp25, 1
+ store i32 %inc114, i32* %sunkaddr533, align 4
+ br label %while.body.68.backedge
+
+while.body.68.backedge: ; preds = %if.then.111, %if.end.88
+ %lsr.iv.next481 = add i32 %lsr.iv480, -1
+ %cmp70 = icmp sgt i32 %add97, 7
+ br i1 %cmp70, label %if.then.72, label %if.end.82
+
+if.then.120: ; preds = %if.then.72
+ %call = tail call i32 (%struct.__sFILE*, i8*, ...) @fprintf(%struct.__sFILE* getelementptr inbounds ([0 x %struct.__sFILE], [0 x %struct.__sFILE]* @__sF, i64 0, i64 2), i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str, i64 0, i64 0), i32 %inc117)
+ br label %sw.bb.123
+
+sw.bb.123: ; preds = %if.then.120, %if.then.72, %if.end.sw.bb.123_crit_edge
+ %bsLive127.pre-phi = phi i32* [ %.pre433, %if.end.sw.bb.123_crit_edge ], [ %bsLive69.pre-phi, %if.then.72 ], [ %bsLive69.pre-phi, %if.then.120 ]
+ %sunkaddr534 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr535 = add i64 %sunkaddr534, 8
+ %sunkaddr536 = inttoptr i64 %sunkaddr535 to i32*
+ store i32 25, i32* %sunkaddr536, align 4
+ %tmp26 = load i32, i32* %bsLive127.pre-phi, align 4
+ %cmp128.395 = icmp sgt i32 %tmp26, 7
+ br i1 %cmp128.395, label %sw.bb.123.if.then.130_crit_edge, label %if.end.140.lr.ph
+
+sw.bb.123.if.then.130_crit_edge: ; preds = %sw.bb.123
+ %sunkaddr537 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr538 = add i64 %sunkaddr537, 32
+ %sunkaddr539 = inttoptr i64 %sunkaddr538 to i32*
+ %.pre429 = load i32, i32* %sunkaddr539, align 4
+ br label %if.then.130
+
+if.end.140.lr.ph: ; preds = %sw.bb.123
+ %tmp27 = bitcast %struct.DState* %s to %struct.bz_stream**
+ %.pre428 = load %struct.bz_stream*, %struct.bz_stream** %tmp27, align 8
+ %avail_in142.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre428, i64 0, i32 1
+ %.pre432 = load i32, i32* %avail_in142.phi.trans.insert, align 4
+ %tmp28 = add i32 %.pre432, -1
+ br label %if.end.140
+
+if.then.130: ; preds = %while.body.126.backedge, %sw.bb.123.if.then.130_crit_edge
+ %tmp29 = phi i32 [ %.pre429, %sw.bb.123.if.then.130_crit_edge ], [ %or152, %while.body.126.backedge ]
+ %.lcssa = phi i32 [ %tmp26, %sw.bb.123.if.then.130_crit_edge ], [ %add155, %while.body.126.backedge ]
+ %sub134 = add nsw i32 %.lcssa, -8
+ %shr135 = lshr i32 %tmp29, %sub134
+ store i32 %sub134, i32* %bsLive127.pre-phi, align 4
+ %origPtr = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 13
+ %tmp30 = load i32, i32* %origPtr, align 4
+ %shl175 = shl i32 %tmp30, 8
+ %conv176 = and i32 %shr135, 255
+ %or177 = or i32 %shl175, %conv176
+ store i32 %or177, i32* %origPtr, align 4
+ %nInUse = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 27
+ %tmp31 = load i32, i32* %nInUse, align 4
+ %add179 = add nsw i32 %tmp31, 2
+ br label %save_state_and_return
+
+if.end.140: ; preds = %while.body.126.backedge, %if.end.140.lr.ph
+ %lsr.iv = phi i32 [ %tmp28, %if.end.140.lr.ph ], [ %lsr.iv.next, %while.body.126.backedge ]
+ %tmp32 = phi i32 [ %tmp26, %if.end.140.lr.ph ], [ %add155, %while.body.126.backedge ]
+ %cmp143 = icmp eq i32 %lsr.iv, -1
+ br i1 %cmp143, label %save_state_and_return, label %if.end.146
+
+if.end.146: ; preds = %if.end.140
+ %tmp33 = bitcast %struct.bz_stream* %.pre428 to i8**
+ %sunkaddr541 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr542 = add i64 %sunkaddr541, 32
+ %sunkaddr543 = inttoptr i64 %sunkaddr542 to i32*
+ %tmp34 = load i32, i32* %sunkaddr543, align 4
+ %shl148 = shl i32 %tmp34, 8
+ %tmp35 = load i8*, i8** %tmp33, align 8
+ %tmp36 = load i8, i8* %tmp35, align 1
+ %conv151 = zext i8 %tmp36 to i32
+ %or152 = or i32 %conv151, %shl148
+ store i32 %or152, i32* %sunkaddr543, align 4
+ %add155 = add nsw i32 %tmp32, 8
+ store i32 %add155, i32* %bsLive127.pre-phi, align 4
+ %incdec.ptr158 = getelementptr inbounds i8, i8* %tmp35, i64 1
+ store i8* %incdec.ptr158, i8** %tmp33, align 8
+ %sunkaddr544 = ptrtoint %struct.bz_stream* %.pre428 to i64
+ %sunkaddr545 = add i64 %sunkaddr544, 8
+ %sunkaddr546 = inttoptr i64 %sunkaddr545 to i32*
+ store i32 %lsr.iv, i32* %sunkaddr546, align 4
+ %sunkaddr547 = ptrtoint %struct.bz_stream* %.pre428 to i64
+ %sunkaddr548 = add i64 %sunkaddr547, 12
+ %sunkaddr549 = inttoptr i64 %sunkaddr548 to i32*
+ %tmp37 = load i32, i32* %sunkaddr549, align 4
+ %inc164 = add i32 %tmp37, 1
+ store i32 %inc164, i32* %sunkaddr549, align 4
+ %cmp167 = icmp eq i32 %inc164, 0
+ br i1 %cmp167, label %if.then.169, label %while.body.126.backedge
+
+if.then.169: ; preds = %if.end.146
+ %sunkaddr550 = ptrtoint %struct.bz_stream* %.pre428 to i64
+ %sunkaddr551 = add i64 %sunkaddr550, 16
+ %sunkaddr552 = inttoptr i64 %sunkaddr551 to i32*
+ %tmp38 = load i32, i32* %sunkaddr552, align 4
+ %inc172 = add i32 %tmp38, 1
+ store i32 %inc172, i32* %sunkaddr552, align 4
+ br label %while.body.126.backedge
+
+while.body.126.backedge: ; preds = %if.then.169, %if.end.146
+ %lsr.iv.next = add i32 %lsr.iv, -1
+ %cmp128 = icmp sgt i32 %add155, 7
+ br i1 %cmp128, label %if.then.130, label %if.end.140
+
+sw.default: ; preds = %if.end, %if.end.thread
+ %tmp39 = phi i32 [ 0, %if.end.thread ], [ %.pre, %if.end ]
+ %tmp40 = phi i32 [ 0, %if.end.thread ], [ %.pre406, %if.end ]
+ %tmp41 = phi i32 [ 0, %if.end.thread ], [ %.pre407, %if.end ]
+ %tmp42 = phi i32 [ 0, %if.end.thread ], [ %.pre408, %if.end ]
+ %tmp43 = phi i32 [ 0, %if.end.thread ], [ %.pre409, %if.end ]
+ %tmp44 = phi i32 [ 0, %if.end.thread ], [ %.pre410, %if.end ]
+ %tmp45 = phi i32 [ 0, %if.end.thread ], [ %.pre411, %if.end ]
+ %tmp46 = phi i32 [ 0, %if.end.thread ], [ %.pre412, %if.end ]
+ %tmp47 = phi i32 [ 0, %if.end.thread ], [ %.pre413, %if.end ]
+ %tmp48 = phi i32 [ 0, %if.end.thread ], [ %.pre414, %if.end ]
+ %tmp49 = phi i32 [ 0, %if.end.thread ], [ %.pre415, %if.end ]
+ %tmp50 = phi i32 [ 0, %if.end.thread ], [ %.pre416, %if.end ]
+ %tmp51 = phi i32 [ 0, %if.end.thread ], [ %.pre417, %if.end ]
+ %tmp52 = phi i32 [ 0, %if.end.thread ], [ %.pre418, %if.end ]
+ %tmp53 = phi i32 [ 0, %if.end.thread ], [ %.pre419, %if.end ]
+ %tmp54 = phi i32 [ 0, %if.end.thread ], [ %.pre420, %if.end ]
+ %tmp55 = phi i32 [ 0, %if.end.thread ], [ %.pre421, %if.end ]
+ %tmp56 = phi i32 [ 0, %if.end.thread ], [ %.pre422, %if.end ]
+ %tmp57 = phi i32 [ 0, %if.end.thread ], [ %.pre423, %if.end ]
+ %save_j3.pre-phi469 = phi i32* [ %save_j, %if.end.thread ], [ %save_j3.phi.trans.insert, %if.end ]
+ %save_t4.pre-phi467 = phi i32* [ %save_t, %if.end.thread ], [ %save_t4.phi.trans.insert, %if.end ]
+ %save_alphaSize5.pre-phi465 = phi i32* [ %save_alphaSize, %if.end.thread ], [ %save_alphaSize5.phi.trans.insert, %if.end ]
+ %save_nGroups6.pre-phi463 = phi i32* [ %save_nGroups, %if.end.thread ], [ %save_nGroups6.phi.trans.insert, %if.end ]
+ %save_nSelectors7.pre-phi461 = phi i32* [ %save_nSelectors, %if.end.thread ], [ %save_nSelectors7.phi.trans.insert, %if.end ]
+ %save_EOB8.pre-phi459 = phi i32* [ %save_EOB, %if.end.thread ], [ %save_EOB8.phi.trans.insert, %if.end ]
+ %save_groupNo9.pre-phi457 = phi i32* [ %save_groupNo, %if.end.thread ], [ %save_groupNo9.phi.trans.insert, %if.end ]
+ %save_groupPos10.pre-phi455 = phi i32* [ %save_groupPos, %if.end.thread ], [ %save_groupPos10.phi.trans.insert, %if.end ]
+ %save_nextSym11.pre-phi453 = phi i32* [ %save_nextSym, %if.end.thread ], [ %save_nextSym11.phi.trans.insert, %if.end ]
+ %save_nblockMAX12.pre-phi451 = phi i32* [ %save_nblockMAX, %if.end.thread ], [ %save_nblockMAX12.phi.trans.insert, %if.end ]
+ %save_nblock13.pre-phi449 = phi i32* [ %save_nblock, %if.end.thread ], [ %save_nblock13.phi.trans.insert, %if.end ]
+ %save_es14.pre-phi447 = phi i32* [ %save_es, %if.end.thread ], [ %save_es14.phi.trans.insert, %if.end ]
+ %save_N15.pre-phi445 = phi i32* [ %save_N, %if.end.thread ], [ %save_N15.phi.trans.insert, %if.end ]
+ %save_curr16.pre-phi443 = phi i32* [ %save_curr, %if.end.thread ], [ %save_curr16.phi.trans.insert, %if.end ]
+ %save_zt17.pre-phi441 = phi i32* [ %save_zt, %if.end.thread ], [ %save_zt17.phi.trans.insert, %if.end ]
+ %save_zn18.pre-phi439 = phi i32* [ %save_zn, %if.end.thread ], [ %save_zn18.phi.trans.insert, %if.end ]
+ %save_zvec19.pre-phi437 = phi i32* [ %save_zvec, %if.end.thread ], [ %save_zvec19.phi.trans.insert, %if.end ]
+ %save_zj20.pre-phi435 = phi i32* [ %save_zj, %if.end.thread ], [ %save_zj20.phi.trans.insert, %if.end ]
+ tail call void @bar(i32 4001)
+ br label %save_state_and_return
+
+save_state_and_return: ; preds = %sw.default, %if.end.140, %if.then.130, %if.end.82, %if.end.33, %if.then.29
+ %tmp58 = phi i32 [ %tmp39, %sw.default ], [ %.pre, %if.then.29 ], [ %.pre, %if.then.130 ], [ %.pre, %if.end.140 ], [ %.pre, %if.end.82 ], [ %.pre, %if.end.33 ]
+ %tmp59 = phi i32 [ %tmp40, %sw.default ], [ %.pre406, %if.then.29 ], [ %.pre406, %if.then.130 ], [ %.pre406, %if.end.140 ], [ %.pre406, %if.end.82 ], [ %.pre406, %if.end.33 ]
+ %tmp60 = phi i32 [ %tmp41, %sw.default ], [ %.pre407, %if.then.29 ], [ %.pre407, %if.then.130 ], [ %.pre407, %if.end.140 ], [ %.pre407, %if.end.82 ], [ %.pre407, %if.end.33 ]
+ %tmp61 = phi i32 [ %tmp43, %sw.default ], [ %.pre409, %if.then.29 ], [ %.pre409, %if.then.130 ], [ %.pre409, %if.end.140 ], [ %.pre409, %if.end.82 ], [ %.pre409, %if.end.33 ]
+ %tmp62 = phi i32 [ %tmp44, %sw.default ], [ %.pre410, %if.then.29 ], [ %.pre410, %if.then.130 ], [ %.pre410, %if.end.140 ], [ %.pre410, %if.end.82 ], [ %.pre410, %if.end.33 ]
+ %tmp63 = phi i32 [ %tmp45, %sw.default ], [ %.pre411, %if.then.29 ], [ %.pre411, %if.then.130 ], [ %.pre411, %if.end.140 ], [ %.pre411, %if.end.82 ], [ %.pre411, %if.end.33 ]
+ %tmp64 = phi i32 [ %tmp46, %sw.default ], [ %.pre412, %if.then.29 ], [ %.pre412, %if.then.130 ], [ %.pre412, %if.end.140 ], [ %.pre412, %if.end.82 ], [ %.pre412, %if.end.33 ]
+ %tmp65 = phi i32 [ %tmp47, %sw.default ], [ %.pre413, %if.then.29 ], [ %.pre413, %if.then.130 ], [ %.pre413, %if.end.140 ], [ %.pre413, %if.end.82 ], [ %.pre413, %if.end.33 ]
+ %tmp66 = phi i32 [ %tmp48, %sw.default ], [ %.pre414, %if.then.29 ], [ %.pre414, %if.then.130 ], [ %.pre414, %if.end.140 ], [ %.pre414, %if.end.82 ], [ %.pre414, %if.end.33 ]
+ %tmp67 = phi i32 [ %tmp49, %sw.default ], [ %.pre415, %if.then.29 ], [ %.pre415, %if.then.130 ], [ %.pre415, %if.end.140 ], [ %.pre415, %if.end.82 ], [ %.pre415, %if.end.33 ]
+ %tmp68 = phi i32 [ %tmp51, %sw.default ], [ %.pre417, %if.then.29 ], [ %.pre417, %if.then.130 ], [ %.pre417, %if.end.140 ], [ %.pre417, %if.end.82 ], [ %.pre417, %if.end.33 ]
+ %tmp69 = phi i32 [ %tmp52, %sw.default ], [ %.pre418, %if.then.29 ], [ %.pre418, %if.then.130 ], [ %.pre418, %if.end.140 ], [ %.pre418, %if.end.82 ], [ %.pre418, %if.end.33 ]
+ %tmp70 = phi i32 [ %tmp53, %sw.default ], [ %.pre419, %if.then.29 ], [ %.pre419, %if.then.130 ], [ %.pre419, %if.end.140 ], [ %.pre419, %if.end.82 ], [ %.pre419, %if.end.33 ]
+ %tmp71 = phi i32 [ %tmp54, %sw.default ], [ %.pre420, %if.then.29 ], [ %.pre420, %if.then.130 ], [ %.pre420, %if.end.140 ], [ %.pre420, %if.end.82 ], [ %.pre420, %if.end.33 ]
+ %tmp72 = phi i32 [ %tmp55, %sw.default ], [ %.pre421, %if.then.29 ], [ %.pre421, %if.then.130 ], [ %.pre421, %if.end.140 ], [ %.pre421, %if.end.82 ], [ %.pre421, %if.end.33 ]
+ %tmp73 = phi i32 [ %tmp56, %sw.default ], [ %.pre422, %if.then.29 ], [ %.pre422, %if.then.130 ], [ %.pre422, %if.end.140 ], [ %.pre422, %if.end.82 ], [ %.pre422, %if.end.33 ]
+ %tmp74 = phi i32 [ %tmp57, %sw.default ], [ %.pre423, %if.then.29 ], [ %.pre423, %if.then.130 ], [ %.pre423, %if.end.140 ], [ %.pre423, %if.end.82 ], [ %.pre423, %if.end.33 ]
+ %save_j3.pre-phi468 = phi i32* [ %save_j3.pre-phi469, %sw.default ], [ %save_j3.phi.trans.insert, %if.then.29 ], [ %save_j3.phi.trans.insert, %if.then.130 ], [ %save_j3.phi.trans.insert, %if.end.140 ], [ %save_j3.phi.trans.insert, %if.end.82 ], [ %save_j3.phi.trans.insert, %if.end.33 ]
+ %save_t4.pre-phi466 = phi i32* [ %save_t4.pre-phi467, %sw.default ], [ %save_t4.phi.trans.insert, %if.then.29 ], [ %save_t4.phi.trans.insert, %if.then.130 ], [ %save_t4.phi.trans.insert, %if.end.140 ], [ %save_t4.phi.trans.insert, %if.end.82 ], [ %save_t4.phi.trans.insert, %if.end.33 ]
+ %save_alphaSize5.pre-phi464 = phi i32* [ %save_alphaSize5.pre-phi465, %sw.default ], [ %save_alphaSize5.phi.trans.insert, %if.then.29 ], [ %save_alphaSize5.phi.trans.insert, %if.then.130 ], [ %save_alphaSize5.phi.trans.insert, %if.end.140 ], [ %save_alphaSize5.phi.trans.insert, %if.end.82 ], [ %save_alphaSize5.phi.trans.insert, %if.end.33 ]
+ %save_nGroups6.pre-phi462 = phi i32* [ %save_nGroups6.pre-phi463, %sw.default ], [ %save_nGroups6.phi.trans.insert, %if.then.29 ], [ %save_nGroups6.phi.trans.insert, %if.then.130 ], [ %save_nGroups6.phi.trans.insert, %if.end.140 ], [ %save_nGroups6.phi.trans.insert, %if.end.82 ], [ %save_nGroups6.phi.trans.insert, %if.end.33 ]
+ %save_nSelectors7.pre-phi460 = phi i32* [ %save_nSelectors7.pre-phi461, %sw.default ], [ %save_nSelectors7.phi.trans.insert, %if.then.29 ], [ %save_nSelectors7.phi.trans.insert, %if.then.130 ], [ %save_nSelectors7.phi.trans.insert, %if.end.140 ], [ %save_nSelectors7.phi.trans.insert, %if.end.82 ], [ %save_nSelectors7.phi.trans.insert, %if.end.33 ]
+ %save_EOB8.pre-phi458 = phi i32* [ %save_EOB8.pre-phi459, %sw.default ], [ %save_EOB8.phi.trans.insert, %if.then.29 ], [ %save_EOB8.phi.trans.insert, %if.then.130 ], [ %save_EOB8.phi.trans.insert, %if.end.140 ], [ %save_EOB8.phi.trans.insert, %if.end.82 ], [ %save_EOB8.phi.trans.insert, %if.end.33 ]
+ %save_groupNo9.pre-phi456 = phi i32* [ %save_groupNo9.pre-phi457, %sw.default ], [ %save_groupNo9.phi.trans.insert, %if.then.29 ], [ %save_groupNo9.phi.trans.insert, %if.then.130 ], [ %save_groupNo9.phi.trans.insert, %if.end.140 ], [ %save_groupNo9.phi.trans.insert, %if.end.82 ], [ %save_groupNo9.phi.trans.insert, %if.end.33 ]
+ %save_groupPos10.pre-phi454 = phi i32* [ %save_groupPos10.pre-phi455, %sw.default ], [ %save_groupPos10.phi.trans.insert, %if.then.29 ], [ %save_groupPos10.phi.trans.insert, %if.then.130 ], [ %save_groupPos10.phi.trans.insert, %if.end.140 ], [ %save_groupPos10.phi.trans.insert, %if.end.82 ], [ %save_groupPos10.phi.trans.insert, %if.end.33 ]
+ %save_nextSym11.pre-phi452 = phi i32* [ %save_nextSym11.pre-phi453, %sw.default ], [ %save_nextSym11.phi.trans.insert, %if.then.29 ], [ %save_nextSym11.phi.trans.insert, %if.then.130 ], [ %save_nextSym11.phi.trans.insert, %if.end.140 ], [ %save_nextSym11.phi.trans.insert, %if.end.82 ], [ %save_nextSym11.phi.trans.insert, %if.end.33 ]
+ %save_nblockMAX12.pre-phi450 = phi i32* [ %save_nblockMAX12.pre-phi451, %sw.default ], [ %save_nblockMAX12.phi.trans.insert, %if.then.29 ], [ %save_nblockMAX12.phi.trans.insert, %if.then.130 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.140 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.82 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.33 ]
+ %save_nblock13.pre-phi448 = phi i32* [ %save_nblock13.pre-phi449, %sw.default ], [ %save_nblock13.phi.trans.insert, %if.then.29 ], [ %save_nblock13.phi.trans.insert, %if.then.130 ], [ %save_nblock13.phi.trans.insert, %if.end.140 ], [ %save_nblock13.phi.trans.insert, %if.end.82 ], [ %save_nblock13.phi.trans.insert, %if.end.33 ]
+ %save_es14.pre-phi446 = phi i32* [ %save_es14.pre-phi447, %sw.default ], [ %save_es14.phi.trans.insert, %if.then.29 ], [ %save_es14.phi.trans.insert, %if.then.130 ], [ %save_es14.phi.trans.insert, %if.end.140 ], [ %save_es14.phi.trans.insert, %if.end.82 ], [ %save_es14.phi.trans.insert, %if.end.33 ]
+ %save_N15.pre-phi444 = phi i32* [ %save_N15.pre-phi445, %sw.default ], [ %save_N15.phi.trans.insert, %if.then.29 ], [ %save_N15.phi.trans.insert, %if.then.130 ], [ %save_N15.phi.trans.insert, %if.end.140 ], [ %save_N15.phi.trans.insert, %if.end.82 ], [ %save_N15.phi.trans.insert, %if.end.33 ]
+ %save_curr16.pre-phi442 = phi i32* [ %save_curr16.pre-phi443, %sw.default ], [ %save_curr16.phi.trans.insert, %if.then.29 ], [ %save_curr16.phi.trans.insert, %if.then.130 ], [ %save_curr16.phi.trans.insert, %if.end.140 ], [ %save_curr16.phi.trans.insert, %if.end.82 ], [ %save_curr16.phi.trans.insert, %if.end.33 ]
+ %save_zt17.pre-phi440 = phi i32* [ %save_zt17.pre-phi441, %sw.default ], [ %save_zt17.phi.trans.insert, %if.then.29 ], [ %save_zt17.phi.trans.insert, %if.then.130 ], [ %save_zt17.phi.trans.insert, %if.end.140 ], [ %save_zt17.phi.trans.insert, %if.end.82 ], [ %save_zt17.phi.trans.insert, %if.end.33 ]
+ %save_zn18.pre-phi438 = phi i32* [ %save_zn18.pre-phi439, %sw.default ], [ %save_zn18.phi.trans.insert, %if.then.29 ], [ %save_zn18.phi.trans.insert, %if.then.130 ], [ %save_zn18.phi.trans.insert, %if.end.140 ], [ %save_zn18.phi.trans.insert, %if.end.82 ], [ %save_zn18.phi.trans.insert, %if.end.33 ]
+ %save_zvec19.pre-phi436 = phi i32* [ %save_zvec19.pre-phi437, %sw.default ], [ %save_zvec19.phi.trans.insert, %if.then.29 ], [ %save_zvec19.phi.trans.insert, %if.then.130 ], [ %save_zvec19.phi.trans.insert, %if.end.140 ], [ %save_zvec19.phi.trans.insert, %if.end.82 ], [ %save_zvec19.phi.trans.insert, %if.end.33 ]
+ %save_zj20.pre-phi434 = phi i32* [ %save_zj20.pre-phi435, %sw.default ], [ %save_zj20.phi.trans.insert, %if.then.29 ], [ %save_zj20.phi.trans.insert, %if.then.130 ], [ %save_zj20.phi.trans.insert, %if.end.140 ], [ %save_zj20.phi.trans.insert, %if.end.82 ], [ %save_zj20.phi.trans.insert, %if.end.33 ]
+ %nblock.1 = phi i32 [ %tmp50, %sw.default ], [ %.pre416, %if.then.29 ], [ 0, %if.then.130 ], [ %.pre416, %if.end.140 ], [ %.pre416, %if.end.82 ], [ %.pre416, %if.end.33 ]
+ %alphaSize.1 = phi i32 [ %tmp42, %sw.default ], [ %.pre408, %if.then.29 ], [ %add179, %if.then.130 ], [ %.pre408, %if.end.140 ], [ %.pre408, %if.end.82 ], [ %.pre408, %if.end.33 ]
+ %retVal.0 = phi i32 [ 0, %sw.default ], [ -5, %if.then.29 ], [ -4, %if.then.130 ], [ 0, %if.end.140 ], [ 0, %if.end.82 ], [ 0, %if.end.33 ]
+ store i32 %tmp58, i32* %save_i, align 4
+ store i32 %tmp59, i32* %save_j3.pre-phi468, align 4
+ store i32 %tmp60, i32* %save_t4.pre-phi466, align 4
+ store i32 %alphaSize.1, i32* %save_alphaSize5.pre-phi464, align 4
+ store i32 %tmp61, i32* %save_nGroups6.pre-phi462, align 4
+ store i32 %tmp62, i32* %save_nSelectors7.pre-phi460, align 4
+ store i32 %tmp63, i32* %save_EOB8.pre-phi458, align 4
+ store i32 %tmp64, i32* %save_groupNo9.pre-phi456, align 4
+ store i32 %tmp65, i32* %save_groupPos10.pre-phi454, align 4
+ store i32 %tmp66, i32* %save_nextSym11.pre-phi452, align 4
+ store i32 %tmp67, i32* %save_nblockMAX12.pre-phi450, align 4
+ store i32 %nblock.1, i32* %save_nblock13.pre-phi448, align 4
+ store i32 %tmp68, i32* %save_es14.pre-phi446, align 4
+ store i32 %tmp69, i32* %save_N15.pre-phi444, align 4
+ store i32 %tmp70, i32* %save_curr16.pre-phi442, align 4
+ store i32 %tmp71, i32* %save_zt17.pre-phi440, align 4
+ store i32 %tmp72, i32* %save_zn18.pre-phi438, align 4
+ store i32 %tmp73, i32* %save_zvec19.pre-phi436, align 4
+ store i32 %tmp74, i32* %save_zj20.pre-phi434, align 4
+ ret i32 %retVal.0
+}
+
+!0 = !{!"branch_weights", i32 10, i32 1}
Modified: llvm/trunk/test/CodeGen/X86/fp128-compare.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp128-compare.ll?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fp128-compare.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fp128-compare.ll Fri Apr 8 10:17:43 2016
@@ -86,8 +86,8 @@ entry:
%cond = select i1 %cmp, fp128 %x, fp128 %y
ret fp128 %cond
; CHECK-LABEL: TestMax:
-; CHECK: movaps %xmm0
; CHECK: movaps %xmm1
+; CHECK: movaps %xmm0
; CHECK: callq __gttf2
; CHECK: movaps {{.*}}, %xmm0
; CHECK: testl %eax, %eax
Removed: llvm/trunk/test/CodeGen/X86/hoist-spill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/hoist-spill.ll?rev=265789&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/hoist-spill.ll (original)
+++ llvm/trunk/test/CodeGen/X86/hoist-spill.ll (removed)
@@ -1,121 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-
-; grep 'Spill' |sed 's%.*\(-[0-9]\+(\%rsp)\).*%\1%g' |sort |uniq -d |awk '{if (/rsp/); exit -1}'
-; Check no spills to the same stack slot after hoisting.
-; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET1:-?[0-9]*]](%rsp)
-; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET2:-?[0-9]*]](%rsp)
-; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET3:-?[0-9]*]](%rsp)
-; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET1]](%rsp)
-; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET2]](%rsp)
-; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET3]](%rsp)
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
- at a = external global i32*, align 8
- at b = external global i32, align 4
- at d = external global i32*, align 8
-
-; Function Attrs: norecurse noreturn nounwind uwtable
-define void @fn1(i32 %p1) {
-entry:
- %tmp = load i32*, i32** @d, align 8
- %tmp1 = load i32*, i32** @a, align 8
- %tmp2 = sext i32 %p1 to i64
- br label %for.cond
-
-for.cond: ; preds = %for.inc14, %entry
- %indvar = phi i32 [ %indvar.next, %for.inc14 ], [ 0, %entry ]
- %indvars.iv30.in = phi i32 [ %indvars.iv30, %for.inc14 ], [ %p1, %entry ]
- %c.0 = phi i32 [ %inc15, %for.inc14 ], [ 1, %entry ]
- %k.0 = phi i32 [ %k.1.lcssa, %for.inc14 ], [ undef, %entry ]
- %tmp3 = icmp sgt i32 undef, 0
- %smax52 = select i1 %tmp3, i32 undef, i32 0
- %tmp4 = zext i32 %smax52 to i64
- %tmp5 = icmp sgt i64 undef, %tmp4
- %smax53 = select i1 %tmp5, i64 undef, i64 %tmp4
- %tmp6 = add nsw i64 %smax53, 1
- %tmp7 = sub nsw i64 %tmp6, %tmp4
- %tmp8 = add nsw i64 %tmp7, -8
- %tmp9 = sub i32 undef, %indvar
- %tmp10 = icmp sgt i64 %tmp2, 0
- %smax40 = select i1 %tmp10, i64 %tmp2, i64 0
- %scevgep41 = getelementptr i32, i32* %tmp1, i64 %smax40
- %indvars.iv30 = add i32 %indvars.iv30.in, -1
- %tmp11 = icmp sgt i32 %indvars.iv30, 0
- %smax = select i1 %tmp11, i32 %indvars.iv30, i32 0
- %tmp12 = zext i32 %smax to i64
- %sub = sub nsw i32 %p1, %c.0
- %cmp = icmp sgt i32 %sub, 0
- %sub. = select i1 %cmp, i32 %sub, i32 0
- %cmp326 = icmp sgt i32 %k.0, %p1
- br i1 %cmp326, label %for.cond4.preheader, label %for.body.preheader
-
-for.body.preheader: ; preds = %for.cond
- br label %for.body
-
-for.cond4.preheader: ; preds = %for.body, %for.cond
- %k.1.lcssa = phi i32 [ %k.0, %for.cond ], [ %add, %for.body ]
- %cmp528 = icmp sgt i32 %sub., %p1
- br i1 %cmp528, label %for.inc14, label %for.body6.preheader
-
-for.body6.preheader: ; preds = %for.cond4.preheader
- br i1 undef, label %for.body6, label %min.iters.checked
-
-min.iters.checked: ; preds = %for.body6.preheader
- br i1 undef, label %for.body6, label %vector.memcheck
-
-vector.memcheck: ; preds = %min.iters.checked
- %bound1 = icmp ule i32* undef, %scevgep41
- %memcheck.conflict = and i1 undef, %bound1
- br i1 %memcheck.conflict, label %for.body6, label %vector.body.preheader
-
-vector.body.preheader: ; preds = %vector.memcheck
- %lcmp.mod = icmp eq i64 undef, 0
- br i1 %lcmp.mod, label %vector.body.preheader.split, label %vector.body.prol
-
-vector.body.prol: ; preds = %vector.body.prol, %vector.body.preheader
- %prol.iter.cmp = icmp eq i64 undef, 0
- br i1 %prol.iter.cmp, label %vector.body.preheader.split, label %vector.body.prol
-
-vector.body.preheader.split: ; preds = %vector.body.prol, %vector.body.preheader
- %tmp13 = icmp ult i64 %tmp8, 24
- br i1 %tmp13, label %middle.block, label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.body.preheader.split
- %index = phi i64 [ %index.next.3, %vector.body ], [ 0, %vector.body.preheader.split ]
- %index.next = add i64 %index, 8
- %offset.idx.1 = add i64 %tmp12, %index.next
- %tmp14 = getelementptr inbounds i32, i32* %tmp, i64 %offset.idx.1
- %tmp15 = bitcast i32* %tmp14 to <4 x i32>*
- %wide.load.1 = load <4 x i32>, <4 x i32>* %tmp15, align 4
- %tmp16 = getelementptr inbounds i32, i32* %tmp1, i64 %offset.idx.1
- %tmp17 = bitcast i32* %tmp16 to <4 x i32>*
- store <4 x i32> %wide.load.1, <4 x i32>* %tmp17, align 4
- %index.next.3 = add i64 %index, 32
- br i1 undef, label %middle.block, label %vector.body
-
-middle.block: ; preds = %vector.body, %vector.body.preheader.split
- br i1 undef, label %for.inc14, label %for.body6
-
-for.body: ; preds = %for.body, %for.body.preheader
- %k.127 = phi i32 [ %k.0, %for.body.preheader ], [ %add, %for.body ]
- %add = add nsw i32 %k.127, 1
- %tmp18 = load i32, i32* undef, align 4
- store i32 %tmp18, i32* @b, align 4
- br i1 undef, label %for.body, label %for.cond4.preheader
-
-for.body6: ; preds = %for.body6, %middle.block, %vector.memcheck, %min.iters.checked, %for.body6.preheader
- %indvars.iv32 = phi i64 [ undef, %for.body6 ], [ %tmp12, %vector.memcheck ], [ %tmp12, %min.iters.checked ], [ %tmp12, %for.body6.preheader ], [ undef, %middle.block ]
- %arrayidx8 = getelementptr inbounds i32, i32* %tmp, i64 %indvars.iv32
- %tmp19 = load i32, i32* %arrayidx8, align 4
- %arrayidx10 = getelementptr inbounds i32, i32* %tmp1, i64 %indvars.iv32
- store i32 %tmp19, i32* %arrayidx10, align 4
- %cmp5 = icmp slt i64 %indvars.iv32, undef
- br i1 %cmp5, label %for.body6, label %for.inc14
-
-for.inc14: ; preds = %for.body6, %middle.block, %for.cond4.preheader
- %inc15 = add nuw nsw i32 %c.0, 1
- %indvar.next = add i32 %indvar, 1
- br label %for.cond
-}
Removed: llvm/trunk/test/CodeGen/X86/new-remat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/new-remat.ll?rev=265789&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/new-remat.ll (original)
+++ llvm/trunk/test/CodeGen/X86/new-remat.ll (removed)
@@ -1,70 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-; Check all spills are rematerialized.
-; CHECK-NOT: Spill
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
- at b = common global double 0.000000e+00, align 8
- at a = common global i32 0, align 4
-
-; Function Attrs: nounwind uwtable
-define i32 @uniform_testdata(i32 %p1) {
-entry:
- %cmp3 = icmp sgt i32 %p1, 0
- br i1 %cmp3, label %for.body.preheader, label %for.end
-
-for.body.preheader: ; preds = %entry
- %tmp = add i32 %p1, -1
- %xtraiter = and i32 %p1, 7
- %lcmp.mod = icmp eq i32 %xtraiter, 0
- br i1 %lcmp.mod, label %for.body.preheader.split, label %for.body.prol.preheader
-
-for.body.prol.preheader: ; preds = %for.body.preheader
- br label %for.body.prol
-
-for.body.prol: ; preds = %for.body.prol, %for.body.prol.preheader
- %i.04.prol = phi i32 [ %inc.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ]
- %prol.iter = phi i32 [ %prol.iter.sub, %for.body.prol ], [ %xtraiter, %for.body.prol.preheader ]
- %tmp1 = load double, double* @b, align 8
- %call.prol = tail call double @pow(double %tmp1, double 2.500000e-01)
- %inc.prol = add nuw nsw i32 %i.04.prol, 1
- %prol.iter.sub = add i32 %prol.iter, -1
- %prol.iter.cmp = icmp eq i32 %prol.iter.sub, 0
- br i1 %prol.iter.cmp, label %for.body.preheader.split.loopexit, label %for.body.prol
-
-for.body.preheader.split.loopexit: ; preds = %for.body.prol
- %inc.prol.lcssa = phi i32 [ %inc.prol, %for.body.prol ]
- br label %for.body.preheader.split
-
-for.body.preheader.split: ; preds = %for.body.preheader.split.loopexit, %for.body.preheader
- %i.04.unr = phi i32 [ 0, %for.body.preheader ], [ %inc.prol.lcssa, %for.body.preheader.split.loopexit ]
- %tmp2 = icmp ult i32 %tmp, 7
- br i1 %tmp2, label %for.end.loopexit, label %for.body.preheader.split.split
-
-for.body.preheader.split.split: ; preds = %for.body.preheader.split
- br label %for.body
-
-for.body: ; preds = %for.body, %for.body.preheader.split.split
- %i.04 = phi i32 [ %i.04.unr, %for.body.preheader.split.split ], [ %inc.7, %for.body ]
- %tmp3 = load double, double* @b, align 8
- %call = tail call double @pow(double %tmp3, double 2.500000e-01)
- %tmp4 = load double, double* @b, align 8
- %call.1 = tail call double @pow(double %tmp4, double 2.500000e-01)
- %inc.7 = add nsw i32 %i.04, 8
- %exitcond.7 = icmp eq i32 %inc.7, %p1
- br i1 %exitcond.7, label %for.end.loopexit.unr-lcssa, label %for.body
-
-for.end.loopexit.unr-lcssa: ; preds = %for.body
- br label %for.end.loopexit
-
-for.end.loopexit: ; preds = %for.end.loopexit.unr-lcssa, %for.body.preheader.split
- br label %for.end
-
-for.end: ; preds = %for.end.loopexit, %entry
- %tmp5 = load i32, i32* @a, align 4
- ret i32 %tmp5
-}
-
-; Function Attrs: nounwind
-declare double @pow(double, double)
Modified: llvm/trunk/test/CodeGen/X86/ragreedy-hoist-spill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/ragreedy-hoist-spill.ll?rev=265790&r1=265789&r2=265790&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/ragreedy-hoist-spill.ll (original)
+++ llvm/trunk/test/CodeGen/X86/ragreedy-hoist-spill.ll Fri Apr 8 10:17:43 2016
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=x86_64-apple-macosx -regalloc=greedy | FileCheck %s
; This testing case is reduced from 254.gap SyFgets function.
-; We make sure a spill is hoisted to a cold BB inside the hotter outer loop.
+; We make sure a spill is not hoisted to a hotter outer loop.
%struct.TMP.1 = type { %struct.TMP.2*, %struct.TMP.2*, [1024 x i8] }
%struct.TMP.2 = type { i8*, i32, i32, i16, i16, %struct.TMP.3, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.TMP.3, %struct.TMP.4*, i32, [3 x i8], [1 x i8], %struct.TMP.3, i32, i64 }
@@ -181,10 +181,6 @@ sw.bb474:
br i1 %cmp476, label %if.end517, label %do.body479.preheader
do.body479.preheader:
- ; CHECK: do.body479.preheader
- ; spill is hoisted here. Although loop depth1 is even hotter than loop depth2, do.body479.preheader is cold.
- ; CHECK: movq %r{{.*}}, {{[0-9]+}}(%rsp)
- ; CHECK: land.rhs485
%cmp4833314 = icmp eq i8 undef, 0
br i1 %cmp4833314, label %if.end517, label %land.rhs485
@@ -204,8 +200,8 @@ land.lhs.true490:
lor.rhs500:
; CHECK: lor.rhs500
- ; Make sure spill is hoisted to a cold preheader in outside loop.
- ; CHECK-NOT: movq %r{{.*}}, {{[0-9]+}}(%rsp)
+ ; Make sure that we don't hoist the spill to outer loops.
+ ; CHECK: movq %r{{.*}}, {{[0-9]+}}(%rsp)
; CHECK: callq {{.*}}maskrune
%call3.i.i2792 = call i32 @__maskrune(i32 undef, i64 256)
br i1 undef, label %land.lhs.true504, label %do.body479.backedge
More information about the llvm-commits
mailing list