[llvm] r265309 - Replace analyzeSiblingValues with new algorithm to fix its compile
Teresa Johnson via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 4 10:41:58 PDT 2016
Hi Wei,
I'm getting an error when building with clang:
/usr/local/google/home/tejohnson/llvm/llvm_15/lib/CodeGen/InlineSpiller.cpp:174:8:
error: 'postOptimization' overrides a member function but is not marked
'override' [-Werror,-Winconsistent-missing-override]
void postOptimization();
^
/usr/local/google/home/tejohnson/llvm/llvm_15/lib/CodeGen/Spiller.h:32:18:
note: overridden virtual function is here
virtual void postOptimization() {};
^
1 error generated.
Teresa
On Mon, Apr 4, 2016 at 9:42 AM, Wei Mi via llvm-commits <
llvm-commits at lists.llvm.org> wrote:
> Author: wmi
> Date: Mon Apr 4 11:42:40 2016
> New Revision: 265309
>
> URL: http://llvm.org/viewvc/llvm-project?rev=265309&view=rev
> Log:
> Replace analyzeSiblingValues with new algorithm to fix its compile
> time issue. The patch is to solve PR17409 and its duplicates.
>
> analyzeSiblingValues is a N x N complexity algorithm where N is
> the number of siblings generated by reg splitting. Although it
> causes siginificant compile time issue when N is large, it is also
> important for performance since it removes redundent spills and
> enables rematerialization.
>
> To solve the compile time issue, the patch removes analyzeSiblingValues
> and replaces it with lower cost alternatives containing two parts. The
> first part creates a new spill hoisting method in postOptimization of
> register allocation. It does spill hoisting at once after all the spills
> are generated instead of inside every instance of selectOrSplit. The
> second part queries the define expr of the original register for
> rematerializaiton and keep it always available during register allocation
> even if it is already dead. It deletes those dead instructions only in
> postOptimization. With the two parts in the patch, it can remove
> analyzeSiblingValues without sacrificing performance.
>
> Differential Revision: http://reviews.llvm.org/D15302
>
>
> Added:
> llvm/trunk/test/CodeGen/X86/hoist-spill.ll
> llvm/trunk/test/CodeGen/X86/new-remat.ll
> Removed:
> llvm/trunk/test/CodeGen/AArch64/aarch64-deferred-spilling.ll
> Modified:
> llvm/trunk/include/llvm/CodeGen/LiveRangeEdit.h
> llvm/trunk/lib/CodeGen/InlineSpiller.cpp
> llvm/trunk/lib/CodeGen/LiveRangeEdit.cpp
> llvm/trunk/lib/CodeGen/RegAllocBase.cpp
> llvm/trunk/lib/CodeGen/RegAllocBase.h
> llvm/trunk/lib/CodeGen/RegAllocBasic.cpp
> llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp
> llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp
> llvm/trunk/lib/CodeGen/Spiller.h
> llvm/trunk/lib/CodeGen/SplitKit.cpp
> llvm/trunk/lib/CodeGen/SplitKit.h
> llvm/trunk/test/CodeGen/X86/fp128-compare.ll
> llvm/trunk/test/CodeGen/X86/ragreedy-hoist-spill.ll
>
> Modified: llvm/trunk/include/llvm/CodeGen/LiveRangeEdit.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/LiveRangeEdit.h?rev=265309&r1=265308&r2=265309&view=diff
>
> ==============================================================================
> --- llvm/trunk/include/llvm/CodeGen/LiveRangeEdit.h (original)
> +++ llvm/trunk/include/llvm/CodeGen/LiveRangeEdit.h Mon Apr 4 11:42:40
> 2016
> @@ -72,6 +72,10 @@ private:
> /// ScannedRemattable - true when remattable values have been
> identified.
> bool ScannedRemattable;
>
> + /// DeadRemats - The saved instructions which have already been dead
> after
> + /// rematerialization but not deleted yet -- to be done in
> postOptimization.
> + SmallPtrSet<MachineInstr *, 32> *DeadRemats;
> +
> /// Remattable - Values defined by remattable instructions as
> identified by
> /// tii.isTriviallyReMaterializable().
> SmallPtrSet<const VNInfo*,4> Remattable;
> @@ -116,13 +120,16 @@ public:
> /// @param vrm Map of virtual registers to physical registers for this
> /// function. If NULL, no virtual register map updates will
> /// be done. This could be the case if called before
> Regalloc.
> + /// @param deadRemats The collection of all the instructions defining an
> + /// original reg and are dead after remat.
> LiveRangeEdit(LiveInterval *parent, SmallVectorImpl<unsigned> &newRegs,
> MachineFunction &MF, LiveIntervals &lis, VirtRegMap *vrm,
> - Delegate *delegate = nullptr)
> + Delegate *delegate = nullptr,
> + SmallPtrSet<MachineInstr *, 32> *deadRemats = nullptr)
> : Parent(parent), NewRegs(newRegs), MRI(MF.getRegInfo()), LIS(lis),
> - VRM(vrm), TII(*MF.getSubtarget().getInstrInfo()),
> - TheDelegate(delegate), FirstNew(newRegs.size()),
> - ScannedRemattable(false) {
> + VRM(vrm), TII(*MF.getSubtarget().getInstrInfo()),
> TheDelegate(delegate),
> + FirstNew(newRegs.size()), ScannedRemattable(false),
> + DeadRemats(deadRemats) {
> MRI.setDelegate(this);
> }
>
> @@ -142,6 +149,16 @@ public:
> bool empty() const { return size() == 0; }
> unsigned get(unsigned idx) const { return NewRegs[idx+FirstNew]; }
>
> + /// pop_back - It allows LiveRangeEdit users to drop new registers.
> + /// The context is when an original def instruction of a register is
> + /// dead after rematerialization, we still want to keep it for following
> + /// rematerializations. We save the def instruction in DeadRemats,
> + /// and replace the original dst register with a new dummy register so
> + /// the live range of original dst register can be shrinked normally.
> + /// We don't want to allocate phys register for the dummy register, so
> + /// we want to drop it from the NewRegs set.
> + void pop_back() { NewRegs.pop_back(); }
> +
> ArrayRef<unsigned> regs() const {
> return makeArrayRef(NewRegs).slice(FirstNew);
> }
> @@ -175,15 +192,15 @@ public:
> /// Remat - Information needed to rematerialize at a specific location.
> struct Remat {
> VNInfo *ParentVNI; // parent_'s value at the remat location.
> - MachineInstr *OrigMI; // Instruction defining ParentVNI.
> + MachineInstr *OrigMI; // Instruction defining OrigVNI. It contains
> the
> + // real expr for remat.
> explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI),
> OrigMI(nullptr) {}
> };
>
> /// canRematerializeAt - Determine if ParentVNI can be rematerialized at
> /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI.
> /// When cheapAsAMove is set, only cheap remats are allowed.
> - bool canRematerializeAt(Remat &RM,
> - SlotIndex UseIdx,
> + bool canRematerializeAt(Remat &RM, VNInfo *OrigVNI, SlotIndex UseIdx,
> bool cheapAsAMove);
>
> /// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by
> inserting an
> @@ -208,6 +225,12 @@ public:
> return Rematted.count(ParentVNI);
> }
>
> + void markDeadRemat(MachineInstr *inst) {
> + // DeadRemats is an optional field.
> + if (DeadRemats)
> + DeadRemats->insert(inst);
> + }
> +
> /// eraseVirtReg - Notify the delegate that Reg is no longer in use,
> and try
> /// to erase it from LIS.
> void eraseVirtReg(unsigned Reg);
> @@ -218,8 +241,11 @@ public:
> /// RegsBeingSpilled lists registers currently being spilled by the
> register
> /// allocator. These registers should not be split into new intervals
> /// as currently those new intervals are not guaranteed to spill.
> - void eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
> - ArrayRef<unsigned> RegsBeingSpilled = None);
> + /// NoSplit indicates this func is used after the iterations of
> selectOrSplit
> + /// where registers should not be split into new intervals.
> + void eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
> + ArrayRef<unsigned> RegsBeingSpilled = None,
> + bool NoSplit = false);
>
> /// calculateRegClassAndHint - Recompute register class and hint for
> each new
> /// register.
>
> Modified: llvm/trunk/lib/CodeGen/InlineSpiller.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/InlineSpiller.cpp?rev=265309&r1=265308&r2=265309&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/InlineSpiller.cpp (original)
> +++ llvm/trunk/lib/CodeGen/InlineSpiller.cpp Mon Apr 4 11:42:40 2016
> @@ -48,13 +48,78 @@ STATISTIC(NumReloadsRemoved, "Number of
> STATISTIC(NumFolded, "Number of folded stack accesses");
> STATISTIC(NumFoldedLoads, "Number of folded loads");
> STATISTIC(NumRemats, "Number of rematerialized defs for
> spilling");
> -STATISTIC(NumOmitReloadSpill, "Number of omitted spills of reloads");
> -STATISTIC(NumHoists, "Number of hoisted spills");
>
> static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden,
> cl::desc("Disable inline spill
> hoisting"));
>
> namespace {
> +class HoistSpillHelper {
> + MachineFunction &MF;
> + LiveIntervals &LIS;
> + LiveStacks &LSS;
> + AliasAnalysis *AA;
> + MachineDominatorTree &MDT;
> + MachineLoopInfo &Loops;
> + VirtRegMap &VRM;
> + MachineFrameInfo &MFI;
> + MachineRegisterInfo &MRI;
> + const TargetInstrInfo &TII;
> + const TargetRegisterInfo &TRI;
> + const MachineBlockFrequencyInfo &MBFI;
> +
> + // Map from StackSlot to its original register.
> + DenseMap<int, unsigned> StackSlotToReg;
> + // Map from pair of (StackSlot and Original VNI) to a set of spills
> which
> + // have the same stackslot and have equal values defined by Original
> VNI.
> + // These spills are mergeable and are hoist candiates.
> + typedef DenseMap<std::pair<int, VNInfo *>, SmallPtrSet<MachineInstr *,
> 16>>
> + MergeableSpillsMap;
> + MergeableSpillsMap MergeableSpills;
> +
> + /// This is the map from original register to a set containing all its
> + /// siblings. To hoist a spill to another BB, we need to find out a live
> + /// sibling there and use it as the source of the new spill.
> + DenseMap<unsigned, SmallSetVector<unsigned, 16>> Virt2SiblingsMap;
> +
> + bool isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI, MachineBasicBlock
> &BB,
> + unsigned &LiveReg);
> +
> + void rmRedundantSpills(
> + SmallPtrSet<MachineInstr *, 16> &Spills,
> + SmallVectorImpl<MachineInstr *> &SpillsToRm,
> + DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill);
> +
> + void getVisitOrders(
> + MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills,
> + SmallVectorImpl<MachineDomTreeNode *> &Orders,
> + SmallVectorImpl<MachineInstr *> &SpillsToRm,
> + DenseMap<MachineDomTreeNode *, unsigned> &SpillsToKeep,
> + DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill);
> +
> + void runHoistSpills(unsigned OrigReg, VNInfo &OrigVNI,
> + SmallPtrSet<MachineInstr *, 16> &Spills,
> + SmallVectorImpl<MachineInstr *> &SpillsToRm,
> + DenseMap<MachineBasicBlock *, unsigned>
> &SpillsToIns);
> +
> +public:
> + HoistSpillHelper(MachineFunctionPass &pass, MachineFunction &mf,
> + VirtRegMap &vrm)
> + : MF(mf), LIS(pass.getAnalysis<LiveIntervals>()),
> + LSS(pass.getAnalysis<LiveStacks>()),
> + AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
> + MDT(pass.getAnalysis<MachineDominatorTree>()),
> + Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
> + MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()),
> + TII(*mf.getSubtarget().getInstrInfo()),
> + TRI(*mf.getSubtarget().getRegisterInfo()),
> + MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {}
> +
> + void addToMergeableSpills(MachineInstr *Spill, int StackSlot,
> + unsigned Original);
> + bool rmFromMergeableSpills(MachineInstr *Spill, int StackSlot);
> + void hoistAllSpills(LiveRangeEdit &Edit);
> +};
> +
> class InlineSpiller : public Spiller {
> MachineFunction &MF;
> LiveIntervals &LIS;
> @@ -85,56 +150,12 @@ class InlineSpiller : public Spiller {
> // Values that failed to remat at some point.
> SmallPtrSet<VNInfo*, 8> UsedValues;
>
> -public:
> - // Information about a value that was defined by a copy from a sibling
> - // register.
> - struct SibValueInfo {
> - // True when all reaching defs were reloads: No spill is necessary.
> - bool AllDefsAreReloads;
> -
> - // True when value is defined by an original PHI not from splitting.
> - bool DefByOrigPHI;
> -
> - // True when the COPY defining this value killed its source.
> - bool KillsSource;
> -
> - // The preferred register to spill.
> - unsigned SpillReg;
> -
> - // The value of SpillReg that should be spilled.
> - VNInfo *SpillVNI;
> -
> - // The block where SpillVNI should be spilled. Currently, this must
> be the
> - // block containing SpillVNI->def.
> - MachineBasicBlock *SpillMBB;
> -
> - // A defining instruction that is not a sibling copy or a reload, or
> NULL.
> - // This can be used as a template for rematerialization.
> - MachineInstr *DefMI;
> -
> - // List of values that depend on this one. These values are actually
> the
> - // same, but live range splitting has placed them in different
> registers,
> - // or SSA update needed to insert PHI-defs to preserve SSA form.
> This is
> - // copies of the current value and phi-kills. Usually only phi-kills
> cause
> - // more than one dependent value.
> - TinyPtrVector<VNInfo*> Deps;
> -
> - SibValueInfo(unsigned Reg, VNInfo *VNI)
> - : AllDefsAreReloads(true), DefByOrigPHI(false), KillsSource(false),
> - SpillReg(Reg), SpillVNI(VNI), SpillMBB(nullptr), DefMI(nullptr) {}
> -
> - // Returns true when a def has been found.
> - bool hasDef() const { return DefByOrigPHI || DefMI; }
> - };
> -
> -private:
> - // Values in RegsToSpill defined by sibling copies.
> - typedef DenseMap<VNInfo*, SibValueInfo> SibValueMap;
> - SibValueMap SibValues;
> -
> // Dead defs generated during spilling.
> SmallVector<MachineInstr*, 8> DeadDefs;
>
> + // Object records spills information and does the hoisting.
> + HoistSpillHelper HSpiller;
> +
> ~InlineSpiller() override {}
>
> public:
> @@ -147,9 +168,11 @@ public:
> MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()),
> TII(*mf.getSubtarget().getInstrInfo()),
> TRI(*mf.getSubtarget().getRegisterInfo()),
> - MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {}
> + MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
> + HSpiller(pass, mf, vrm) {}
>
> void spill(LiveRangeEdit &) override;
> + void postOptimization();
>
> private:
> bool isSnippet(const LiveInterval &SnipLI);
> @@ -161,11 +184,7 @@ private:
> }
>
> bool isSibling(unsigned Reg);
> - MachineInstr *traceSiblingValue(unsigned, VNInfo*, VNInfo*);
> - void propagateSiblingValue(SibValueMap::iterator, VNInfo *VNI =
> nullptr);
> - void analyzeSiblingValues();
> -
> - bool hoistSpill(LiveInterval &SpillLI, MachineInstr &CopyMI);
> + bool hoistSpillInsideBB(LiveInterval &SpillLI, MachineInstr &CopyMI);
> void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI);
>
> void markValueUsed(LiveInterval*, VNInfo*);
> @@ -297,417 +316,43 @@ void InlineSpiller::collectRegsToSpill()
> }
> }
>
> -
>
> -//===----------------------------------------------------------------------===//
> -// Sibling Values
>
> -//===----------------------------------------------------------------------===//
> -
> -// After live range splitting, some values to be spilled may be defined by
> -// copies from sibling registers. We trace the sibling copies back to the
> -// original value if it still exists. We need it for rematerialization.
> -//
> -// Even when the value can't be rematerialized, we still want to
> determine if
> -// the value has already been spilled, or we may want to hoist the spill
> from a
> -// loop.
> -
> bool InlineSpiller::isSibling(unsigned Reg) {
> return TargetRegisterInfo::isVirtualRegister(Reg) &&
> VRM.getOriginal(Reg) == Original;
> }
>
> -#ifndef NDEBUG
> -static raw_ostream &operator<<(raw_ostream &OS,
> - const InlineSpiller::SibValueInfo &SVI) {
> - OS << "spill " << PrintReg(SVI.SpillReg) << ':'
> - << SVI.SpillVNI->id << '@' << SVI.SpillVNI->def;
> - if (SVI.SpillMBB)
> - OS << " in BB#" << SVI.SpillMBB->getNumber();
> - if (SVI.AllDefsAreReloads)
> - OS << " all-reloads";
> - if (SVI.DefByOrigPHI)
> - OS << " orig-phi";
> - if (SVI.KillsSource)
> - OS << " kill";
> - OS << " deps[";
> - for (VNInfo *Dep : SVI.Deps)
> - OS << ' ' << Dep->id << '@' << Dep->def;
> - OS << " ]";
> - if (SVI.DefMI)
> - OS << " def: " << *SVI.DefMI;
> - else
> - OS << '\n';
> - return OS;
> -}
> -#endif
> -
> -/// propagateSiblingValue - Propagate the value in SVI to dependents if
> it is
> -/// known. Otherwise remember the dependency for later.
> +/// It is beneficial to spill to earlier place in the same BB in case
> +/// as follows:
> +/// There is an alternative def earlier in the same MBB.
> +/// Hoist the spill as far as possible in SpillMBB. This can ease
> +/// register pressure:
> ///
> -/// @param SVIIter SibValues entry to propagate.
> -/// @param VNI Dependent value, or NULL to propagate to all saved
> dependents.
> -void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVIIter,
> - VNInfo *VNI) {
> - SibValueMap::value_type *SVI = &*SVIIter;
> -
> - // When VNI is non-NULL, add it to SVI's deps, and only propagate to
> that.
> - TinyPtrVector<VNInfo*> FirstDeps;
> - if (VNI) {
> - FirstDeps.push_back(VNI);
> - SVI->second.Deps.push_back(VNI);
> - }
> -
> - // Has the value been completely determined yet? If not, defer
> propagation.
> - if (!SVI->second.hasDef())
> - return;
> -
> - // Work list of values to propagate.
> - SmallSetVector<SibValueMap::value_type *, 8> WorkList;
> - WorkList.insert(SVI);
> -
> - do {
> - SVI = WorkList.pop_back_val();
> - TinyPtrVector<VNInfo*> *Deps = VNI ? &FirstDeps : &SVI->second.Deps;
> - VNI = nullptr;
> -
> - SibValueInfo &SV = SVI->second;
> - if (!SV.SpillMBB)
> - SV.SpillMBB = LIS.getMBBFromIndex(SV.SpillVNI->def);
> -
> - DEBUG(dbgs() << " prop to " << Deps->size() << ": "
> - << SVI->first->id << '@' << SVI->first->def << ":\t" <<
> SV);
> -
> - assert(SV.hasDef() && "Propagating undefined value");
> -
> - // Should this value be propagated as a preferred spill candidate?
> We don't
> - // propagate values of registers that are about to spill.
> - bool PropSpill = !DisableHoisting && !isRegToSpill(SV.SpillReg);
> - unsigned SpillDepth = ~0u;
> -
> - for (VNInfo *Dep : *Deps) {
> - SibValueMap::iterator DepSVI = SibValues.find(Dep);
> - assert(DepSVI != SibValues.end() && "Dependent value not in
> SibValues");
> - SibValueInfo &DepSV = DepSVI->second;
> - if (!DepSV.SpillMBB)
> - DepSV.SpillMBB = LIS.getMBBFromIndex(DepSV.SpillVNI->def);
> -
> - bool Changed = false;
> -
> - // Propagate defining instruction.
> - if (!DepSV.hasDef()) {
> - Changed = true;
> - DepSV.DefMI = SV.DefMI;
> - DepSV.DefByOrigPHI = SV.DefByOrigPHI;
> - }
> -
> - // Propagate AllDefsAreReloads. For PHI values, this computes an
> AND of
> - // all predecessors.
> - if (!SV.AllDefsAreReloads && DepSV.AllDefsAreReloads) {
> - Changed = true;
> - DepSV.AllDefsAreReloads = false;
> - }
> -
> - // Propagate best spill value.
> - if (PropSpill && SV.SpillVNI != DepSV.SpillVNI) {
> - if (SV.SpillMBB == DepSV.SpillMBB) {
> - // DepSV is in the same block. Hoist when dominated.
> - if (DepSV.KillsSource && SV.SpillVNI->def <
> DepSV.SpillVNI->def) {
> - // This is an alternative def earlier in the same MBB.
> - // Hoist the spill as far as possible in SpillMBB. This can
> ease
> - // register pressure:
> - //
> - // x = def
> - // y = use x
> - // s = copy x
> - //
> - // Hoisting the spill of s to immediately after the def
> removes the
> - // interference between x and y:
> - //
> - // x = def
> - // spill x
> - // y = use x<kill>
> - //
> - // This hoist only helps when the DepSV copy kills its source.
> - Changed = true;
> - DepSV.SpillReg = SV.SpillReg;
> - DepSV.SpillVNI = SV.SpillVNI;
> - DepSV.SpillMBB = SV.SpillMBB;
> - }
> - } else {
> - // DepSV is in a different block.
> - if (SpillDepth == ~0u)
> - SpillDepth = Loops.getLoopDepth(SV.SpillMBB);
> -
> - // Also hoist spills to blocks with smaller loop depth, but
> make sure
> - // that the new value dominates. Non-phi dependents are always
> - // dominated, phis need checking.
> -
> - const BranchProbability MarginProb(4, 5); // 80%
> - // Hoist a spill to outer loop if there are multiple dependents
> (it
> - // can be beneficial if more than one dependents are hoisted) or
> - // if DepSV (the hoisting source) is hotter than SV (the
> hoisting
> - // destination) (we add a 80% margin to bias a little towards
> - // loop depth).
> - bool HoistCondition =
> - (MBFI.getBlockFreq(DepSV.SpillMBB) >=
> - (MBFI.getBlockFreq(SV.SpillMBB) * MarginProb)) ||
> - Deps->size() > 1;
> -
> - if ((Loops.getLoopDepth(DepSV.SpillMBB) > SpillDepth) &&
> - HoistCondition &&
> - (!DepSVI->first->isPHIDef() ||
> - MDT.dominates(SV.SpillMBB, DepSV.SpillMBB))) {
> - Changed = true;
> - DepSV.SpillReg = SV.SpillReg;
> - DepSV.SpillVNI = SV.SpillVNI;
> - DepSV.SpillMBB = SV.SpillMBB;
> - }
> - }
> - }
> -
> - if (!Changed)
> - continue;
> -
> - // Something changed in DepSVI. Propagate to dependents.
> - WorkList.insert(&*DepSVI);
> -
> - DEBUG(dbgs() << " update " << DepSVI->first->id << '@'
> - << DepSVI->first->def << " to:\t" << DepSV);
> - }
> - } while (!WorkList.empty());
> -}
> -
> -/// traceSiblingValue - Trace a value that is about to be spilled back to
> the
> -/// real defining instructions by looking through sibling copies. Always
> stay
> -/// within the range of OrigVNI so the registers are known to carry the
> same
> -/// value.
> +/// x = def
> +/// y = use x
> +/// s = copy x
> ///
> -/// Determine if the value is defined by all reloads, so spilling isn't
> -/// necessary - the value is already in the stack slot.
> +/// Hoisting the spill of s to immediately after the def removes the
> +/// interference between x and y:
> ///
> -/// Return a defining instruction that may be a candidate for
> rematerialization.
> +/// x = def
> +/// spill x
> +/// y = use x<kill>
> ///
> -MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo
> *UseVNI,
> - VNInfo *OrigVNI) {
> - // Check if a cached value already exists.
> - SibValueMap::iterator SVI;
> - bool Inserted;
> - std::tie(SVI, Inserted) =
> - SibValues.insert(std::make_pair(UseVNI, SibValueInfo(UseReg,
> UseVNI)));
> - if (!Inserted) {
> - DEBUG(dbgs() << "Cached value " << PrintReg(UseReg) << ':'
> - << UseVNI->id << '@' << UseVNI->def << ' ' <<
> SVI->second);
> - return SVI->second.DefMI;
> - }
> -
> - DEBUG(dbgs() << "Tracing value " << PrintReg(UseReg) << ':'
> - << UseVNI->id << '@' << UseVNI->def << '\n');
> -
> - // List of (Reg, VNI) that have been inserted into SibValues, but need
> to be
> - // processed.
> - SmallVector<std::pair<unsigned, VNInfo*>, 8> WorkList;
> - WorkList.push_back(std::make_pair(UseReg, UseVNI));
> -
> - LiveInterval &OrigLI = LIS.getInterval(Original);
> - do {
> - unsigned Reg;
> - VNInfo *VNI;
> - std::tie(Reg, VNI) = WorkList.pop_back_val();
> - DEBUG(dbgs() << " " << PrintReg(Reg) << ':' << VNI->id << '@' <<
> VNI->def
> - << ":\t");
> -
> - // First check if this value has already been computed.
> - SVI = SibValues.find(VNI);
> - assert(SVI != SibValues.end() && "Missing SibValues entry");
> -
> - // Trace through PHI-defs created by live range splitting.
> - if (VNI->isPHIDef()) {
> - // Stop at original PHIs. We don't know the value at the
> - // predecessors. Look up the VNInfo for the current definition
> - // in OrigLI, to properly determine whether or not this phi was
> - // added by splitting.
> - if (VNI->def == OrigLI.getVNInfoAt(VNI->def)->def) {
> - DEBUG(dbgs() << "orig phi value\n");
> - SVI->second.DefByOrigPHI = true;
> - SVI->second.AllDefsAreReloads = false;
> - propagateSiblingValue(SVI);
> - continue;
> - }
> -
> - // This is a PHI inserted by live range splitting. We could trace
> the
> - // live-out value from predecessor blocks, but that search can be
> very
> - // expensive if there are many predecessors and many more PHIs as
> - // generated by tail-dup when it sees an indirectbr. Instead, look
> at
> - // all the non-PHI defs that have the same value as OrigVNI. They
> must
> - // jointly dominate VNI->def. This is not optimal since VNI may
> actually
> - // be jointly dominated by a smaller subset of defs, so there is a
> change
> - // we will miss a AllDefsAreReloads optimization.
> -
> - // Separate all values dominated by OrigVNI into PHIs and non-PHIs.
> - SmallVector<VNInfo*, 8> PHIs, NonPHIs;
> - LiveInterval &LI = LIS.getInterval(Reg);
> -
> - for (LiveInterval::vni_iterator VI = LI.vni_begin(), VE =
> LI.vni_end();
> - VI != VE; ++VI) {
> - VNInfo *VNI2 = *VI;
> - if (VNI2->isUnused())
> - continue;
> - if (!OrigLI.containsOneValue() &&
> - OrigLI.getVNInfoAt(VNI2->def) != OrigVNI)
> - continue;
> - if (VNI2->isPHIDef() && VNI2->def != OrigVNI->def)
> - PHIs.push_back(VNI2);
> - else
> - NonPHIs.push_back(VNI2);
> - }
> - DEBUG(dbgs() << "split phi value, checking " << PHIs.size()
> - << " phi-defs, and " << NonPHIs.size()
> - << " non-phi/orig defs\n");
> -
> - // Create entries for all the PHIs. Don't add them to the
> worklist, we
> - // are processing all of them in one go here.
> - for (VNInfo *PHI : PHIs)
> - SibValues.insert(std::make_pair(PHI, SibValueInfo(Reg, PHI)));
> -
> - // Add every PHI as a dependent of all the non-PHIs.
> - for (VNInfo *NonPHI : NonPHIs) {
> - // Known value? Try an insertion.
> - std::tie(SVI, Inserted) =
> - SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg,
> NonPHI)));
> - // Add all the PHIs as dependents of NonPHI.
> - SVI->second.Deps.insert(SVI->second.Deps.end(), PHIs.begin(),
> - PHIs.end());
> - // This is the first time we see NonPHI, add it to the worklist.
> - if (Inserted)
> - WorkList.push_back(std::make_pair(Reg, NonPHI));
> - else
> - // Propagate to all inserted PHIs, not just VNI.
> - propagateSiblingValue(SVI);
> - }
> -
> - // Next work list item.
> - continue;
> - }
> -
> - MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
> - assert(MI && "Missing def");
> -
> - // Trace through sibling copies.
> - if (unsigned SrcReg = isFullCopyOf(MI, Reg)) {
> - if (isSibling(SrcReg)) {
> - LiveInterval &SrcLI = LIS.getInterval(SrcReg);
> - LiveQueryResult SrcQ = SrcLI.Query(VNI->def);
> - assert(SrcQ.valueIn() && "Copy from non-existing value");
> - // Check if this COPY kills its source.
> - SVI->second.KillsSource = SrcQ.isKill();
> - VNInfo *SrcVNI = SrcQ.valueIn();
> - DEBUG(dbgs() << "copy of " << PrintReg(SrcReg) << ':'
> - << SrcVNI->id << '@' << SrcVNI->def
> - << " kill=" << unsigned(SVI->second.KillsSource) <<
> '\n');
> - // Known sibling source value? Try an insertion.
> - std::tie(SVI, Inserted) = SibValues.insert(
> - std::make_pair(SrcVNI, SibValueInfo(SrcReg, SrcVNI)));
> - // This is the first time we see Src, add it to the worklist.
> - if (Inserted)
> - WorkList.push_back(std::make_pair(SrcReg, SrcVNI));
> - propagateSiblingValue(SVI, VNI);
> - // Next work list item.
> - continue;
> - }
> - }
> -
> - // Track reachable reloads.
> - SVI->second.DefMI = MI;
> - SVI->second.SpillMBB = MI->getParent();
> - int FI;
> - if (Reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) {
> - DEBUG(dbgs() << "reload\n");
> - propagateSiblingValue(SVI);
> - // Next work list item.
> - continue;
> - }
> -
> - // Potential remat candidate.
> - DEBUG(dbgs() << "def " << *MI);
> - SVI->second.AllDefsAreReloads = false;
> - propagateSiblingValue(SVI);
> - } while (!WorkList.empty());
> -
> - // Look up the value we were looking for. We already did this lookup
> at the
> - // top of the function, but SibValues may have been invalidated.
> - SVI = SibValues.find(UseVNI);
> - assert(SVI != SibValues.end() && "Didn't compute requested info");
> - DEBUG(dbgs() << " traced to:\t" << SVI->second);
> - return SVI->second.DefMI;
> -}
> -
> -/// analyzeSiblingValues - Trace values defined by sibling copies back to
> -/// something that isn't a sibling copy.
> +/// This hoist only helps when the copy kills its source.
> ///
> -/// Keep track of values that may be rematerializable.
> -void InlineSpiller::analyzeSiblingValues() {
> - SibValues.clear();
> -
> - // No siblings at all?
> - if (Edit->getReg() == Original)
> - return;
> -
> - LiveInterval &OrigLI = LIS.getInterval(Original);
> - for (unsigned Reg : RegsToSpill) {
> - LiveInterval &LI = LIS.getInterval(Reg);
> - for (LiveInterval::const_vni_iterator VI = LI.vni_begin(),
> - VE = LI.vni_end(); VI != VE; ++VI) {
> - VNInfo *VNI = *VI;
> - if (VNI->isUnused())
> - continue;
> - MachineInstr *DefMI = nullptr;
> - if (!VNI->isPHIDef()) {
> - DefMI = LIS.getInstructionFromIndex(VNI->def);
> - assert(DefMI && "No defining instruction");
> - }
> - // Check possible sibling copies.
> - if (VNI->isPHIDef() || DefMI->isCopy()) {
> - VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
> - assert(OrigVNI && "Def outside original live range");
> - if (OrigVNI->def != VNI->def)
> - DefMI = traceSiblingValue(Reg, VNI, OrigVNI);
> - }
> - if (DefMI && Edit->checkRematerializable(VNI, DefMI, AA)) {
> - DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@'
> - << VNI->def << " may remat from " << *DefMI);
> - }
> - }
> - }
> -}
> -
> -/// hoistSpill - Given a sibling copy that defines a value to be spilled,
> insert
> -/// a spill at a better location.
> -bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr
> &CopyMI) {
> +bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,
> + MachineInstr &CopyMI) {
> SlotIndex Idx = LIS.getInstructionIndex(CopyMI);
> VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot());
> assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy");
> - SibValueMap::iterator I = SibValues.find(VNI);
> - if (I == SibValues.end())
> - return false;
> -
> - const SibValueInfo &SVI = I->second;
> -
> - // Let the normal folding code deal with the boring case.
> - if (!SVI.AllDefsAreReloads && SVI.SpillVNI == VNI)
> - return false;
> -
> - // SpillReg may have been deleted by remat and DCE.
> - if (!LIS.hasInterval(SVI.SpillReg)) {
> - DEBUG(dbgs() << "Stale interval: " << PrintReg(SVI.SpillReg) << '\n');
> - SibValues.erase(I);
> - return false;
> - }
>
> - LiveInterval &SibLI = LIS.getInterval(SVI.SpillReg);
> - if (!SibLI.containsValue(SVI.SpillVNI)) {
> - DEBUG(dbgs() << "Stale value: " << PrintReg(SVI.SpillReg) << '\n');
> - SibValues.erase(I);
> + unsigned SrcReg = CopyMI.getOperand(1).getReg();
> + LiveInterval &SrcLI = LIS.getInterval(SrcReg);
> + VNInfo *SrcVNI = SrcLI.getVNInfoAt(Idx);
> + LiveQueryResult SrcQ = SrcLI.Query(Idx);
> + MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(SrcVNI->def);
> + if (DefMBB != CopyMI.getParent() || !SrcQ.isKill())
> return false;
> - }
>
> // Conservatively extend the stack slot range to the range of the
> original
> // value. We may be able to do better with stack slot coloring by being
> more
> @@ -719,35 +364,29 @@ bool InlineSpiller::hoistSpill(LiveInter
> DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": "
> << *StackInt << '\n');
>
> - // Already spilled everywhere.
> - if (SVI.AllDefsAreReloads) {
> - DEBUG(dbgs() << "\tno spill needed: " << SVI);
> - ++NumOmitReloadSpill;
> - return true;
> - }
> - // We are going to spill SVI.SpillVNI immediately after its def, so
> clear out
> + // We are going to spill SrcVNI immediately after its def, so clear out
> // any later spills of the same value.
> - eliminateRedundantSpills(SibLI, SVI.SpillVNI);
> + eliminateRedundantSpills(SrcLI, SrcVNI);
>
> - MachineBasicBlock *MBB = LIS.getMBBFromIndex(SVI.SpillVNI->def);
> + MachineBasicBlock *MBB = LIS.getMBBFromIndex(SrcVNI->def);
> MachineBasicBlock::iterator MII;
> - if (SVI.SpillVNI->isPHIDef())
> + if (SrcVNI->isPHIDef())
> MII = MBB->SkipPHIsAndLabels(MBB->begin());
> else {
> - MachineInstr *DefMI = LIS.getInstructionFromIndex(SVI.SpillVNI->def);
> + MachineInstr *DefMI = LIS.getInstructionFromIndex(SrcVNI->def);
> assert(DefMI && "Defining instruction disappeared");
> MII = DefMI;
> ++MII;
> }
> // Insert spill without kill flag immediately after def.
> - TII.storeRegToStackSlot(*MBB, MII, SVI.SpillReg, false, StackSlot,
> - MRI.getRegClass(SVI.SpillReg), &TRI);
> + TII.storeRegToStackSlot(*MBB, MII, SrcReg, false, StackSlot,
> + MRI.getRegClass(SrcReg), &TRI);
> --MII; // Point to store instruction.
> LIS.InsertMachineInstrInMaps(*MII);
> - DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII);
> + DEBUG(dbgs() << "\thoisted: " << SrcVNI->def << '\t' << *MII);
>
> + HSpiller.addToMergeableSpills(&(*MII), StackSlot, Original);
> ++NumSpills;
> - ++NumHoists;
> return true;
> }
>
> @@ -805,7 +444,8 @@ void InlineSpiller::eliminateRedundantSp
> MI->setDesc(TII.get(TargetOpcode::KILL));
> DeadDefs.push_back(MI);
> ++NumSpillsRemoved;
> - --NumSpills;
> + if (HSpiller.rmFromMergeableSpills(MI, StackSlot))
> + --NumSpills;
> }
> }
> } while (!WorkList.empty());
> @@ -876,12 +516,12 @@ bool InlineSpiller::reMaterializeFor(Liv
> if (SnippetCopies.count(&MI))
> return false;
>
> - // Use an OrigVNI from traceSiblingValue when ParentVNI is a sibling
> copy.
> + LiveInterval &OrigLI = LIS.getInterval(Original);
> + VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
> LiveRangeEdit::Remat RM(ParentVNI);
> - SibValueMap::const_iterator SibI = SibValues.find(ParentVNI);
> - if (SibI != SibValues.end())
> - RM.OrigMI = SibI->second.DefMI;
> - if (!Edit->canRematerializeAt(RM, UseIdx, false)) {
> + RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
> +
> + if (!Edit->canRematerializeAt(RM, OrigVNI, UseIdx, false)) {
> markValueUsed(&VirtReg, ParentVNI);
> DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI);
> return false;
> @@ -931,7 +571,6 @@ bool InlineSpiller::reMaterializeFor(Liv
> /// reMaterializeAll - Try to rematerialize as many uses as possible,
> /// and trim the live ranges after.
> void InlineSpiller::reMaterializeAll() {
> - // analyzeSiblingValues has already tested all relevant defining
> instructions.
> if (!Edit->anyRematerializable(AA))
> return;
>
> @@ -1017,6 +656,9 @@ bool InlineSpiller::coalesceStackAccess(
> if (InstrReg != Reg || FI != StackSlot)
> return false;
>
> + if (!IsLoad)
> + HSpiller.rmFromMergeableSpills(MI, StackSlot);
> +
> DEBUG(dbgs() << "Coalescing stack access: " << *MI);
> LIS.RemoveMachineInstrFromMaps(*MI);
> MI->eraseFromParent();
> @@ -1141,6 +783,9 @@ foldMemoryOperand(ArrayRef<std::pair<Mac
> LIS.removePhysRegDefAt(Reg, Idx);
> }
>
> + int FI;
> + if (TII.isStoreToStackSlot(MI, FI) &&
> HSpiller.rmFromMergeableSpills(MI, FI))
> + --NumSpills;
> LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI);
> MI->eraseFromParent();
>
> @@ -1166,9 +811,10 @@ foldMemoryOperand(ArrayRef<std::pair<Mac
>
> if (!WasCopy)
> ++NumFolded;
> - else if (Ops.front().second == 0)
> + else if (Ops.front().second == 0) {
> ++NumSpills;
> - else
> + HSpiller.addToMergeableSpills(FoldMI, StackSlot, Original);
> + } else
> ++NumReloads;
> return true;
> }
> @@ -1203,6 +849,7 @@ void InlineSpiller::insertSpill(unsigned
> DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS,
> "spill"));
> ++NumSpills;
> + HSpiller.addToMergeableSpills(std::next(MI), StackSlot, Original);
> }
>
> /// spillAroundUses - insert spill code around each use of Reg.
> @@ -1266,8 +913,7 @@ void InlineSpiller::spillAroundUses(unsi
> continue;
> }
> if (RI.Writes) {
> - // Hoist the spill of a sib-reg copy.
> - if (hoistSpill(OldLI, *MI)) {
> + if (hoistSpillInsideBB(OldLI, *MI)) {
> // This COPY is now dead, the value is already in the stack
> slot.
> MI->getOperand(0).setIsDead();
> DeadDefs.push_back(MI);
> @@ -1380,7 +1026,6 @@ void InlineSpiller::spill(LiveRangeEdit
> assert(DeadDefs.empty() && "Previous spill didn't remove dead defs");
>
> collectRegsToSpill();
> - analyzeSiblingValues();
> reMaterializeAll();
>
> // Remat may handle everything.
> @@ -1389,3 +1034,394 @@ void InlineSpiller::spill(LiveRangeEdit
>
> Edit->calculateRegClassAndHint(MF, Loops, MBFI);
> }
> +
> +/// Optimizations after all the reg selections and spills are done.
> +///
> +void InlineSpiller::postOptimization() {
> + SmallVector<unsigned, 4> NewVRegs;
> + LiveRangeEdit LRE(nullptr, NewVRegs, MF, LIS, &VRM, nullptr);
> + HSpiller.hoistAllSpills(LRE);
> + assert(NewVRegs.size() == 0 &&
> + "No new vregs should be generated in hoistAllSpills");
> +}
> +
> +/// When a spill is inserted, add the spill to MergeableSpills map.
> +///
> +void HoistSpillHelper::addToMergeableSpills(MachineInstr *Spill, int
> StackSlot,
> + unsigned Original) {
> + StackSlotToReg[StackSlot] = Original;
> + SlotIndex Idx = LIS.getInstructionIndex(*Spill);
> + VNInfo *OrigVNI =
> LIS.getInterval(Original).getVNInfoAt(Idx.getRegSlot());
> + std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI);
> + MergeableSpills[MIdx].insert(Spill);
> +}
> +
> +/// When a spill is removed, remove the spill from MergeableSpills map.
> +/// Return true if the spill is removed successfully.
> +///
> +bool HoistSpillHelper::rmFromMergeableSpills(MachineInstr *Spill,
> + int StackSlot) {
> + int Original = StackSlotToReg[StackSlot];
> + if (!Original)
> + return false;
> + SlotIndex Idx = LIS.getInstructionIndex(*Spill);
> + VNInfo *OrigVNI =
> LIS.getInterval(Original).getVNInfoAt(Idx.getRegSlot());
> + std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI);
> + return MergeableSpills[MIdx].erase(Spill);
> +}
> +
> +/// Check BB to see if it is a possible target BB to place a hoisted
> spill,
> +/// i.e., there should be a living sibling of OrigReg at the insert point.
> +///
> +bool HoistSpillHelper::isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI,
> + MachineBasicBlock &BB, unsigned
> &LiveReg) {
> + SlotIndex Idx;
> + MachineBasicBlock::iterator MI = BB.getFirstTerminator();
> + if (MI != BB.end())
> + Idx = LIS.getInstructionIndex(*MI);
> + else
> + Idx = LIS.getMBBEndIdx(&BB).getPrevSlot();
> + SmallSetVector<unsigned, 16> &Siblings = Virt2SiblingsMap[OrigReg];
> + assert((LIS.getInterval(OrigReg)).getVNInfoAt(Idx) == &OrigVNI &&
> + "Unexpected VNI");
> +
> + for (auto const SibReg : Siblings) {
> + LiveInterval &LI = LIS.getInterval(SibReg);
> + VNInfo *VNI = LI.getVNInfoAt(Idx);
> + if (VNI) {
> + LiveReg = SibReg;
> + return true;
> + }
> + }
> + return false;
> +}
> +
> +/// Remove redundent spills in the same BB. Save those redundent spills in
> +/// SpillsToRm, and save the spill to keep and its BB in SpillBBToSpill
> map.
> +///
> +void HoistSpillHelper::rmRedundantSpills(
> + SmallPtrSet<MachineInstr *, 16> &Spills,
> + SmallVectorImpl<MachineInstr *> &SpillsToRm,
> + DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill) {
> + // For each spill saw, check SpillBBToSpill[] and see if its BB already
> has
> + // another spill inside. If a BB contains more than one spill, only
> keep the
> + // earlier spill with smaller SlotIndex.
> + for (const auto CurrentSpill : Spills) {
> + MachineBasicBlock *Block = CurrentSpill->getParent();
> + MachineDomTreeNode *Node = MDT.DT->getNode(Block);
> + MachineInstr *PrevSpill = SpillBBToSpill[Node];
> + if (PrevSpill) {
> + SlotIndex PIdx = LIS.getInstructionIndex(*PrevSpill);
> + SlotIndex CIdx = LIS.getInstructionIndex(*CurrentSpill);
> + MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill : PrevSpill;
> + MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill :
> CurrentSpill;
> + SpillsToRm.push_back(SpillToRm);
> + SpillBBToSpill[MDT.DT->getNode(Block)] = SpillToKeep;
> + } else {
> + SpillBBToSpill[MDT.DT->getNode(Block)] = CurrentSpill;
> + }
> + }
> + for (const auto SpillToRm : SpillsToRm)
> + Spills.erase(SpillToRm);
> +}
> +
> +/// Starting from \p Root find a top-down traversal order of the dominator
> +/// tree to visit all basic blocks containing the elements of \p Spills.
> +/// Redundant spills will be found and put into \p SpillsToRm at the same
> +/// time. \p SpillBBToSpill will be populated as part of the process and
> +/// maps a basic block to the first store occurring in the basic block.
> +/// \post SpillsToRm.union(Spills at post) == Spills at pre
> +///
> +void HoistSpillHelper::getVisitOrders(
> + MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills,
> + SmallVectorImpl<MachineDomTreeNode *> &Orders,
> + SmallVectorImpl<MachineInstr *> &SpillsToRm,
> + DenseMap<MachineDomTreeNode *, unsigned> &SpillsToKeep,
> + DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill) {
> + // The set contains all the possible BB nodes to which we may hoist
> + // original spills.
> + SmallPtrSet<MachineDomTreeNode *, 8> WorkSet;
> + // Save the BB nodes on the path from the first BB node containing
> + // non-redundent spill to the Root node.
> + SmallPtrSet<MachineDomTreeNode *, 8> NodesOnPath;
> + // All the spills to be hoisted must originate from a single def
> instruction
> + // to the OrigReg. It means the def instruction should dominate all the
> spills
> + // to be hoisted. We choose the BB where the def instruction is located
> as
> + // the Root.
> + MachineDomTreeNode *RootIDomNode = MDT[Root]->getIDom();
> + // For every node on the dominator tree with spill, walk up on the
> dominator
> + // tree towards the Root node until it is reached. If there is other
> node
> + // containing spill in the middle of the path, the previous spill saw
> will
> + // be redundent and the node containing it will be removed. All the
> nodes on
> + // the path starting from the first node with non-redundent spill to
> the Root
> + // node will be added to the WorkSet, which will contain all the
> possible
> + // locations where spills may be hoisted to after the loop below is
> done.
> + for (const auto Spill : Spills) {
> + MachineBasicBlock *Block = Spill->getParent();
> + MachineDomTreeNode *Node = MDT[Block];
> + MachineInstr *SpillToRm = nullptr;
> + while (Node != RootIDomNode) {
> + // If Node dominates Block, and it already contains a spill, the
> spill in
> + // Block will be redundent.
> + if (Node != MDT[Block] && SpillBBToSpill[Node]) {
> + SpillToRm = SpillBBToSpill[MDT[Block]];
> + break;
> + /// If we see the Node already in WorkSet, the path from the Node
> to
> + /// the Root node must already be traversed by another spill.
> + /// Then no need to repeat.
> + } else if (WorkSet.count(Node)) {
> + break;
> + } else {
> + NodesOnPath.insert(Node);
> + }
> + Node = Node->getIDom();
> + }
> + if (SpillToRm) {
> + SpillsToRm.push_back(SpillToRm);
> + } else {
> + // Add a BB containing the original spills to SpillsToKeep -- i.e.,
> + // set the initial status before hoisting start. The value of BBs
> + // containing original spills is set to 0, in order to descriminate
> + // with BBs containing hoisted spills which will be inserted to
> + // SpillsToKeep later during hoisting.
> + SpillsToKeep[MDT[Block]] = 0;
> + WorkSet.insert(NodesOnPath.begin(), NodesOnPath.end());
> + }
> + NodesOnPath.clear();
> + }
> +
> + // Sort the nodes in WorkSet in top-down order and save the nodes
> + // in Orders. Orders will be used for hoisting in runHoistSpills.
> + unsigned idx = 0;
> + Orders.push_back(MDT.DT->getNode(Root));
> + do {
> + MachineDomTreeNode *Node = Orders[idx++];
> + const std::vector<MachineDomTreeNode *> &Children =
> Node->getChildren();
> + unsigned NumChildren = Children.size();
> + for (unsigned i = 0; i != NumChildren; ++i) {
> + MachineDomTreeNode *Child = Children[i];
> + if (WorkSet.count(Child))
> + Orders.push_back(Child);
> + }
> + } while (idx != Orders.size());
> + assert(Orders.size() == WorkSet.size() &&
> + "Orders have different size with WorkSet");
> +
> +#ifndef NDEBUG
> + DEBUG(dbgs() << "Orders size is " << Orders.size() << "\n");
> + SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt =
> Orders.rbegin();
> + for (; RIt != Orders.rend(); RIt++)
> + DEBUG(dbgs() << "BB" << (*RIt)->getBlock()->getNumber() << ",");
> + DEBUG(dbgs() << "\n");
> +#endif
> +}
> +
> +/// Try to hoist spills according to BB hotness. The spills to removed
> will
> +/// be saved in \p SpillsToRm. The spills to be inserted will be saved in
> +/// \p SpillsToIns.
> +///
> +void HoistSpillHelper::runHoistSpills(
> + unsigned OrigReg, VNInfo &OrigVNI, SmallPtrSet<MachineInstr *, 16>
> &Spills,
> + SmallVectorImpl<MachineInstr *> &SpillsToRm,
> + DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns) {
> + // Visit order of dominator tree nodes.
> + SmallVector<MachineDomTreeNode *, 32> Orders;
> + // SpillsToKeep contains all the nodes where spills are to be inserted
> + // during hoisting. If the spill to be inserted is an original spill
> + // (not a hoisted one), the value of the map entry is 0. If the spill
> + // is a hoisted spill, the value of the map entry is the VReg to be used
> + // as the source of the spill.
> + DenseMap<MachineDomTreeNode *, unsigned> SpillsToKeep;
> + // Map from BB to the first spill inside of it.
> + DenseMap<MachineDomTreeNode *, MachineInstr *> SpillBBToSpill;
> +
> + rmRedundantSpills(Spills, SpillsToRm, SpillBBToSpill);
> +
> + MachineBasicBlock *Root = LIS.getMBBFromIndex(OrigVNI.def);
> + getVisitOrders(Root, Spills, Orders, SpillsToRm, SpillsToKeep,
> + SpillBBToSpill);
> +
> + // SpillsInSubTree keeps the map from a dom tree node to a pair of
> + // nodes set and the cost of all the spills inside those nodes.
> + // The nodes set are the locations where spills are to be inserted
> + // in the subtree of current node.
> + typedef std::pair<SmallPtrSet<MachineDomTreeNode *, 16>, BlockFrequency>
> + NodesCostPair;
> + DenseMap<MachineDomTreeNode *, NodesCostPair> SpillsInSubTreeMap;
> + // Iterate Orders set in reverse order, which will be a bottom-up order
> + // in the dominator tree. Once we visit a dom tree node, we know its
> + // children have already been visited and the spill locations in the
> + // subtrees of all the children have been determined.
> + SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt =
> Orders.rbegin();
> + for (; RIt != Orders.rend(); RIt++) {
> + MachineBasicBlock *Block = (*RIt)->getBlock();
> + SmallPtrSet<MachineDomTreeNode *, 16> &SpillsInSubTree =
> + SpillsInSubTreeMap[*RIt].first;
> + // Total spill costs inside the sub tree.
> + BlockFrequency &SubTreeCost = SpillsInSubTreeMap[*RIt].second;
> +
> + // If Block contains an original spill, simply continue.
> + if (SpillsToKeep.find(*RIt) != SpillsToKeep.end() &&
> !SpillsToKeep[*RIt]) {
> + SpillsInSubTree.insert(*RIt);
> + SubTreeCost = MBFI.getBlockFreq(Block);
> + continue;
> + }
> +
> + // Collect spills in subtree of current node (*RIt) to
> + // SpillsInSubTree.
> + const std::vector<MachineDomTreeNode *> &Children =
> (*RIt)->getChildren();
> + unsigned NumChildren = Children.size();
> + for (unsigned i = 0; i != NumChildren; ++i) {
> + MachineDomTreeNode *Child = Children[i];
> + SpillsInSubTree.insert(SpillsInSubTreeMap[Child].first.begin(),
> + SpillsInSubTreeMap[Child].first.end());
> + SubTreeCost += SpillsInSubTreeMap[Child].second;
> + SpillsInSubTreeMap.erase(Child);
> + }
> +
> + // No spills in subtree, simply continue.
> + if (SpillsInSubTree.empty())
> + continue;
> +
> + // Check whether Block is a possible candidate to insert spill.
> + unsigned LiveReg = 0;
> + if (!isSpillCandBB(OrigReg, OrigVNI, *Block, LiveReg))
> + continue;
> +
> + // If there are multiple spills that could be merged, bias a little
> + // to hoist the spill.
> + BranchProbability MarginProb = (SpillsInSubTree.size() > 1)
> + ? BranchProbability(9, 10)
> + : BranchProbability(1, 1);
> + if (SubTreeCost > MBFI.getBlockFreq(Block) * MarginProb) {
> + // Hoist: Move spills to current Block.
> + for (const auto SpillBB : SpillsInSubTree) {
> + // When SpillBB is a BB contains original spill, insert the spill
> + // to SpillsToRm.
> + if (SpillsToKeep.find(SpillBB) != SpillsToKeep.end() &&
> + !SpillsToKeep[SpillBB]) {
> + MachineInstr *SpillToRm = SpillBBToSpill[SpillBB];
> + SpillsToRm.push_back(SpillToRm);
> + }
> + // SpillBB will not contain spill anymore, remove it from
> SpillsToKeep.
> + SpillsToKeep.erase(SpillBB);
> + }
> + // Current Block is the BB containing the new hoisted spill. Add it
> to
> + // SpillsToKeep. LiveReg is the source of the new spill.
> + SpillsToKeep[*RIt] = LiveReg;
> + DEBUG({
> + dbgs() << "spills in BB: ";
> + for (const auto Rspill : SpillsInSubTree)
> + dbgs() << Rspill->getBlock()->getNumber() << " ";
> + dbgs() << "were promoted to BB" << (*RIt)->getBlock()->getNumber()
> + << "\n";
> + });
> + SpillsInSubTree.clear();
> + SpillsInSubTree.insert(*RIt);
> + SubTreeCost = MBFI.getBlockFreq(Block);
> + }
> + }
> + // For spills in SpillsToKeep with LiveReg set (i.e., not original
> spill),
> + // save them to SpillsToIns.
> + for (const auto Ent : SpillsToKeep) {
> + if (Ent.second)
> + SpillsToIns[Ent.first->getBlock()] = Ent.second;
> + }
> +}
> +
> +/// For spills with equal values, remove redundent spills and hoist the
> left
> +/// to less hot spots.
> +///
> +/// Spills with equal values will be collected into the same set in
> +/// MergeableSpills when spill is inserted. These equal spills are
> originated
> +/// from the same define instruction and are dominated by the instruction.
> +/// Before hoisting all the equal spills, redundent spills inside in the
> same
> +/// BB is first marked to be deleted. Then starting from spills left,
> walk up
> +/// on the dominator tree towards the Root node where the define
> instruction
> +/// is located, mark the dominated spills to be deleted along the way and
> +/// collect the BB nodes on the path from non-dominated spills to the
> define
> +/// instruction into a WorkSet. The nodes in WorkSet are the candidate
> places
> +/// where we consider to hoist the spills. We iterate the WorkSet in
> bottom-up
> +/// order, and for each node, we will decide whether to hoist spills
> inside
> +/// its subtree to that node. In this way, we can get benefit locally
> even if
> +/// hoisting all the equal spills to one cold place is impossible.
> +///
> +void HoistSpillHelper::hoistAllSpills(LiveRangeEdit &Edit) {
> + // Save the mapping between stackslot and its original reg.
> + DenseMap<int, unsigned> SlotToOrigReg;
> + for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
> + unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
> + int Slot = VRM.getStackSlot(Reg);
> + if (Slot != VirtRegMap::NO_STACK_SLOT)
> + SlotToOrigReg[Slot] = VRM.getOriginal(Reg);
> + unsigned Original = VRM.getPreSplitReg(Reg);
> + if (!MRI.def_empty(Reg))
> + Virt2SiblingsMap[Original].insert(Reg);
> + }
> +
> + // Each entry in MergeableSpills contains a spill set with equal values.
> + for (auto &Ent : MergeableSpills) {
> + int Slot = Ent.first.first;
> + unsigned OrigReg = SlotToOrigReg[Slot];
> + VNInfo *OrigVNI = Ent.first.second;
> + SmallPtrSet<MachineInstr *, 16> &EqValSpills = Ent.second;
> + if (Ent.second.empty())
> + continue;
> +
> + DEBUG({
> + dbgs() << "\nFor Slot" << Slot << " and VN" << OrigVNI->id << ":\n"
> + << "Equal spills in BB: ";
> + for (const auto spill : EqValSpills)
> + dbgs() << spill->getParent()->getNumber() << " ";
> + dbgs() << "\n";
> + });
> +
> + // SpillsToRm is the spill set to be removed from EqValSpills.
> + SmallVector<MachineInstr *, 16> SpillsToRm;
> + // SpillsToIns is the spill set to be newly inserted after hoisting.
> + DenseMap<MachineBasicBlock *, unsigned> SpillsToIns;
> +
> + runHoistSpills(OrigReg, *OrigVNI, EqValSpills, SpillsToRm,
> SpillsToIns);
> +
> + DEBUG({
> + dbgs() << "Finally inserted spills in BB: ";
> + for (const auto Ispill : SpillsToIns)
> + dbgs() << Ispill.first->getNumber() << " ";
> + dbgs() << "\nFinally removed spills in BB: ";
> + for (const auto Rspill : SpillsToRm)
> + dbgs() << Rspill->getParent()->getNumber() << " ";
> + dbgs() << "\n";
> + });
> +
> + // Stack live range update.
> + LiveInterval &StackIntvl = LSS.getInterval(Slot);
> + if (!SpillsToIns.empty() || !SpillsToRm.empty()) {
> + LiveInterval &OrigLI = LIS.getInterval(OrigReg);
> + StackIntvl.MergeValueInAsValue(OrigLI, OrigVNI,
> + StackIntvl.getValNumInfo(0));
> + }
> +
> + // Insert hoisted spills.
> + for (auto const Insert : SpillsToIns) {
> + MachineBasicBlock *BB = Insert.first;
> + unsigned LiveReg = Insert.second;
> + MachineBasicBlock::iterator MI = BB->getFirstTerminator();
> + TII.storeRegToStackSlot(*BB, MI, LiveReg, false, Slot,
> + MRI.getRegClass(LiveReg), &TRI);
> + LIS.InsertMachineInstrRangeInMaps(std::prev(MI), MI);
> + ++NumSpills;
> + }
> +
> + // Remove redundent spills or change them to dead instructions.
> + NumSpills -= SpillsToRm.size();
> + for (auto const RMEnt : SpillsToRm) {
> + RMEnt->setDesc(TII.get(TargetOpcode::KILL));
> + for (unsigned i = RMEnt->getNumOperands(); i; --i) {
> + MachineOperand &MO = RMEnt->getOperand(i - 1);
> + if (MO.isReg() && MO.isImplicit() && MO.isDef() && !MO.isDead())
> + RMEnt->RemoveOperand(i - 1);
> + }
> + }
> + Edit.eliminateDeadDefs(SpillsToRm, None, true);
> + }
> +}
>
> Modified: llvm/trunk/lib/CodeGen/LiveRangeEdit.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveRangeEdit.cpp?rev=265309&r1=265308&r2=265309&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/LiveRangeEdit.cpp (original)
> +++ llvm/trunk/lib/CodeGen/LiveRangeEdit.cpp Mon Apr 4 11:42:40 2016
> @@ -63,10 +63,13 @@ void LiveRangeEdit::scanRemattable(Alias
> for (VNInfo *VNI : getParent().valnos) {
> if (VNI->isUnused())
> continue;
> - MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def);
> + unsigned Original = VRM->getOriginal(getReg());
> + LiveInterval &OrigLI = LIS.getInterval(Original);
> + VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
> + MachineInstr *DefMI = LIS.getInstructionFromIndex(OrigVNI->def);
> if (!DefMI)
> continue;
> - checkRematerializable(VNI, DefMI, aa);
> + checkRematerializable(OrigVNI, DefMI, aa);
> }
> ScannedRemattable = true;
> }
> @@ -113,24 +116,18 @@ bool LiveRangeEdit::allUsesAvailableAt(c
> return true;
> }
>
> -bool LiveRangeEdit::canRematerializeAt(Remat &RM,
> - SlotIndex UseIdx,
> - bool cheapAsAMove) {
> +bool LiveRangeEdit::canRematerializeAt(Remat &RM, VNInfo *OrigVNI,
> + SlotIndex UseIdx, bool
> cheapAsAMove) {
> assert(ScannedRemattable && "Call anyRematerializable first");
>
> // Use scanRemattable info.
> - if (!Remattable.count(RM.ParentVNI))
> + if (!Remattable.count(OrigVNI))
> return false;
>
> // No defining instruction provided.
> SlotIndex DefIdx;
> - if (RM.OrigMI)
> - DefIdx = LIS.getInstructionIndex(*RM.OrigMI);
> - else {
> - DefIdx = RM.ParentVNI->def;
> - RM.OrigMI = LIS.getInstructionFromIndex(DefIdx);
> - assert(RM.OrigMI && "No defining instruction for remattable value");
> - }
> + assert(RM.OrigMI && "No defining instruction for remattable value");
> + DefIdx = LIS.getInstructionIndex(*RM.OrigMI);
>
> // If only cheap remats were requested, bail out early.
> if (cheapAsAMove && !TII.isAsCheapAsAMove(RM.OrigMI))
> @@ -261,6 +258,15 @@ void LiveRangeEdit::eliminateDeadDef(Mac
> // Collect virtual registers to be erased after MI is gone.
> SmallVector<unsigned, 8> RegsToErase;
> bool ReadsPhysRegs = false;
> + bool isOrigDef = false;
> + unsigned Dest;
> + if (VRM && MI->getOperand(0).isReg()) {
> + Dest = MI->getOperand(0).getReg();
> + unsigned Original = VRM->getOriginal(Dest);
> + LiveInterval &OrigLI = LIS.getInterval(Original);
> + VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx);
> + isOrigDef = SlotIndex::isSameInstr(OrigVNI->def, Idx);
> + }
>
> // Check for live intervals that may shrink
> for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
> @@ -314,11 +320,24 @@ void LiveRangeEdit::eliminateDeadDef(Mac
> }
> DEBUG(dbgs() << "Converted physregs to:\t" << *MI);
> } else {
> - if (TheDelegate)
> - TheDelegate->LRE_WillEraseInstruction(MI);
> - LIS.RemoveMachineInstrFromMaps(*MI);
> - MI->eraseFromParent();
> - ++NumDCEDeleted;
> + // If the dest of MI is an original reg, don't delete the inst.
> Replace
> + // the dest with a new reg, keep the inst for remat of other siblings.
> + // The inst is saved in LiveRangeEdit::DeadRemats and will be deleted
> + // after all the allocations of the func are done.
> + if (isOrigDef) {
> + unsigned NewDest = createFrom(Dest);
> + pop_back();
> + markDeadRemat(MI);
> + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
> + MI->substituteRegister(Dest, NewDest, 0, TRI);
> + MI->getOperand(0).setIsDead(false);
> + } else {
> + if (TheDelegate)
> + TheDelegate->LRE_WillEraseInstruction(MI);
> + LIS.RemoveMachineInstrFromMaps(*MI);
> + MI->eraseFromParent();
> + ++NumDCEDeleted;
> + }
> }
>
> // Erase any virtregs that are now empty and unused. There may be
> <undef>
> @@ -332,8 +351,9 @@ void LiveRangeEdit::eliminateDeadDef(Mac
> }
> }
>
> -void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*>
> &Dead,
> - ArrayRef<unsigned>
> RegsBeingSpilled) {
> +void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *>
> &Dead,
> + ArrayRef<unsigned> RegsBeingSpilled,
> + bool NoSplit) {
> ToShrinkSet ToShrink;
>
> for (;;) {
> @@ -355,6 +375,9 @@ void LiveRangeEdit::eliminateDeadDefs(Sm
> if (!LIS.shrinkToUses(LI, &Dead))
> continue;
>
> + if (NoSplit)
> + continue;
> +
> // Don't create new intervals for a register being spilled.
> // The new intervals would have to be spilled anyway so its not worth
> it.
> // Also they currently aren't spilled so creating them and not
> spilling
>
> Modified: llvm/trunk/lib/CodeGen/RegAllocBase.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocBase.cpp?rev=265309&r1=265308&r2=265309&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/RegAllocBase.cpp (original)
> +++ llvm/trunk/lib/CodeGen/RegAllocBase.cpp Mon Apr 4 11:42:40 2016
> @@ -153,3 +153,12 @@ void RegAllocBase::allocatePhysRegs() {
> }
> }
> }
> +
> +void RegAllocBase::postOptimization() {
> + spiller().postOptimization();
> + for (auto DeadInst : DeadRemats) {
> + LIS->RemoveMachineInstrFromMaps(*DeadInst);
> + DeadInst->eraseFromParent();
> + }
> + DeadRemats.clear();
> +}
>
> Modified: llvm/trunk/lib/CodeGen/RegAllocBase.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocBase.h?rev=265309&r1=265308&r2=265309&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/RegAllocBase.h (original)
> +++ llvm/trunk/lib/CodeGen/RegAllocBase.h Mon Apr 4 11:42:40 2016
> @@ -65,6 +65,12 @@ protected:
> LiveRegMatrix *Matrix;
> RegisterClassInfo RegClassInfo;
>
> + /// Inst which is a def of an original reg and whose defs are already
> all
> + /// dead after remat is saved in DeadRemats. The deletion of such inst
> is
> + /// postponed till all the allocations are done, so its remat expr is
> + /// always available for the remat of all the siblings of the original
> reg.
> + SmallPtrSet<MachineInstr *, 32> DeadRemats;
> +
> RegAllocBase()
> : TRI(nullptr), MRI(nullptr), VRM(nullptr), LIS(nullptr),
> Matrix(nullptr) {}
>
> @@ -77,6 +83,10 @@ protected:
> // physical register assignments.
> void allocatePhysRegs();
>
> + // Include spiller post optimization and removing dead defs left
> because of
> + // rematerialization.
> + virtual void postOptimization();
> +
> // Get a temporary reference to a Spiller instance.
> virtual Spiller &spiller() = 0;
>
>
> Modified: llvm/trunk/lib/CodeGen/RegAllocBasic.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocBasic.cpp?rev=265309&r1=265308&r2=265309&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/RegAllocBasic.cpp (original)
> +++ llvm/trunk/lib/CodeGen/RegAllocBasic.cpp Mon Apr 4 11:42:40 2016
> @@ -199,7 +199,7 @@ bool RABasic::spillInterferences(LiveInt
> Matrix->unassign(Spill);
>
> // Spill the extracted interval.
> - LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM);
> + LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, nullptr,
> &DeadRemats);
> spiller().spill(LRE);
> }
> return true;
> @@ -258,7 +258,7 @@ unsigned RABasic::selectOrSplit(LiveInte
> DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
> if (!VirtReg.isSpillable())
> return ~0u;
> - LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM);
> + LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, nullptr,
> &DeadRemats);
> spiller().spill(LRE);
>
> // The live virtual register requesting allocation was spilled, so tell
> @@ -283,6 +283,7 @@ bool RABasic::runOnMachineFunction(Machi
> SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
>
> allocatePhysRegs();
> + postOptimization();
>
> // Diagnostic output before rewriting
> DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
>
> Modified: llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp?rev=265309&r1=265308&r2=265309&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp (original)
> +++ llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp Mon Apr 4 11:42:40 2016
> @@ -12,7 +12,6 @@
> //
>
> //===----------------------------------------------------------------------===//
>
> -#include "llvm/CodeGen/Passes.h"
> #include "AllocationOrder.h"
> #include "InterferenceCache.h"
> #include "LiveDebugVariables.h"
> @@ -33,6 +32,7 @@
> #include "llvm/CodeGen/MachineFunctionPass.h"
> #include "llvm/CodeGen/MachineLoopInfo.h"
> #include "llvm/CodeGen/MachineRegisterInfo.h"
> +#include "llvm/CodeGen/Passes.h"
> #include "llvm/CodeGen/RegAllocRegistry.h"
> #include "llvm/CodeGen/RegisterClassInfo.h"
> #include "llvm/CodeGen/VirtRegMap.h"
> @@ -44,6 +44,7 @@
> #include "llvm/Support/ErrorHandling.h"
> #include "llvm/Support/Timer.h"
> #include "llvm/Support/raw_ostream.h"
> +#include "llvm/Target/TargetInstrInfo.h"
> #include "llvm/Target/TargetSubtargetInfo.h"
> #include <queue>
>
> @@ -55,14 +56,14 @@ STATISTIC(NumGlobalSplits, "Number of sp
> STATISTIC(NumLocalSplits, "Number of split local live ranges");
> STATISTIC(NumEvicted, "Number of interferences evicted");
>
> -static cl::opt<SplitEditor::ComplementSpillMode>
> -SplitSpillMode("split-spill-mode", cl::Hidden,
> - cl::desc("Spill mode for splitting live ranges"),
> - cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
> - clEnumValN(SplitEditor::SM_Size, "size", "Optimize for
> size"),
> - clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for
> speed"),
> - clEnumValEnd),
> - cl::init(SplitEditor::SM_Partition));
> +static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode(
> + "split-spill-mode", cl::Hidden,
> + cl::desc("Spill mode for splitting live ranges"),
> + cl::values(clEnumValN(SplitEditor::SM_Partition, "default",
> "Default"),
> + clEnumValN(SplitEditor::SM_Size, "size", "Optimize for
> size"),
> + clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for
> speed"),
> + clEnumValEnd),
> + cl::init(SplitEditor::SM_Speed));
>
> static cl::opt<unsigned>
> LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden,
> @@ -1465,7 +1466,7 @@ unsigned RAGreedy::doRegionSplit(LiveInt
> SmallVectorImpl<unsigned> &NewVRegs) {
> SmallVector<unsigned, 8> UsedCands;
> // Prepare split editor.
> - LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
> + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this,
> &DeadRemats);
> SE->reset(LREdit, SplitSpillMode);
>
> // Assign all edge bundles to the preferred candidate, or NoCand.
> @@ -1513,7 +1514,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInt
> assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
> unsigned Reg = VirtReg.reg;
> bool SingleInstrs =
> RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
> - LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
> + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this,
> &DeadRemats);
> SE->reset(LREdit, SplitSpillMode);
> ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
> for (unsigned i = 0; i != UseBlocks.size(); ++i) {
> @@ -1585,7 +1586,7 @@ RAGreedy::tryInstructionSplit(LiveInterv
>
> // Always enable split spill mode, since we're effectively spilling to a
> // register.
> - LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
> + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this,
> &DeadRemats);
> SE->reset(LREdit, SplitEditor::SM_Size);
>
> ArrayRef<SlotIndex> Uses = SA->getUseSlots();
> @@ -1908,7 +1909,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInt
> << '-' << Uses[BestAfter] << ", " << BestDiff
> << ", " << (BestAfter - BestBefore + 1) << " instrs\n");
>
> - LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
> + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this,
> &DeadRemats);
> SE->reset(LREdit);
>
> SE->openIntv();
> @@ -2551,7 +2552,7 @@ unsigned RAGreedy::selectOrSplitImpl(Liv
> NewVRegs.push_back(VirtReg.reg);
> } else {
> NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
> - LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
> + LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this,
> &DeadRemats);
> spiller().spill(LRE);
> setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
>
> @@ -2609,6 +2610,8 @@ bool RAGreedy::runOnMachineFunction(Mach
>
> allocatePhysRegs();
> tryHintsRecoloring();
> + postOptimization();
> +
> releaseMemory();
> return true;
> }
>
> Modified: llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp?rev=265309&r1=265308&r2=265309&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp (original)
> +++ llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp Mon Apr 4 11:42:40 2016
> @@ -123,6 +123,12 @@ private:
>
> RegSet VRegsToAlloc, EmptyIntervalVRegs;
>
> + /// Inst which is a def of an original reg and whose defs are already
> all
> + /// dead after remat is saved in DeadRemats. The deletion of such inst
> is
> + /// postponed till all the allocations are done, so its remat expr is
> + /// always available for the remat of all the siblings of the original
> reg.
> + SmallPtrSet<MachineInstr *, 32> DeadRemats;
> +
> /// \brief Finds the initial set of vreg intervals to allocate.
> void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals
> &LIS);
>
> @@ -146,6 +152,7 @@ private:
> void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS,
> VirtRegMap &VRM) const;
>
> + void postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS);
> };
>
> char RegAllocPBQP::ID = 0;
> @@ -631,7 +638,8 @@ void RegAllocPBQP::spillVReg(unsigned VR
> VirtRegMap &VRM, Spiller &VRegSpiller) {
>
> VRegsToAlloc.erase(VReg);
> - LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM);
> + LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM,
> + nullptr, &DeadRemats);
> VRegSpiller.spill(LRE);
>
> const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
> @@ -713,6 +721,16 @@ void RegAllocPBQP::finalizeAlloc(Machine
> }
> }
>
> +void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals
> &LIS) {
> + VRegSpiller.postOptimization();
> + /// Remove dead defs because of rematerialization.
> + for (auto DeadInst : DeadRemats) {
> + LIS.RemoveMachineInstrFromMaps(*DeadInst);
> + DeadInst->eraseFromParent();
> + }
> + DeadRemats.clear();
> +}
> +
> static inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned
> Size,
> unsigned NumInstr) {
> // All intervals have a spill weight that is mostly proportional to the
> number
> @@ -798,6 +816,7 @@ bool RegAllocPBQP::runOnMachineFunction(
>
> // Finalise allocation, allocate empty ranges.
> finalizeAlloc(MF, LIS, VRM);
> + postOptimization(*VRegSpiller, LIS);
> VRegsToAlloc.clear();
> EmptyIntervalVRegs.clear();
>
>
> Modified: llvm/trunk/lib/CodeGen/Spiller.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/Spiller.h?rev=265309&r1=265308&r2=265309&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/Spiller.h (original)
> +++ llvm/trunk/lib/CodeGen/Spiller.h Mon Apr 4 11:42:40 2016
> @@ -16,6 +16,7 @@ namespace llvm {
> class MachineFunction;
> class MachineFunctionPass;
> class VirtRegMap;
> + class LiveIntervals;
>
> /// Spiller interface.
> ///
> @@ -28,7 +29,7 @@ namespace llvm {
>
> /// spill - Spill the LRE.getParent() live interval.
> virtual void spill(LiveRangeEdit &LRE) = 0;
> -
> + virtual void postOptimization() {};
> };
>
> /// Create and return a spiller that will insert spill code directly
> instead
> @@ -36,7 +37,6 @@ namespace llvm {
> Spiller *createInlineSpiller(MachineFunctionPass &pass,
> MachineFunction &mf,
> VirtRegMap &vrm);
> -
> }
>
> #endif
>
> Modified: llvm/trunk/lib/CodeGen/SplitKit.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SplitKit.cpp?rev=265309&r1=265308&r2=265309&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SplitKit.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SplitKit.cpp Mon Apr 4 11:42:40 2016
> @@ -16,6 +16,7 @@
> #include "llvm/ADT/Statistic.h"
> #include "llvm/CodeGen/LiveIntervalAnalysis.h"
> #include "llvm/CodeGen/LiveRangeEdit.h"
> +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
> #include "llvm/CodeGen/MachineDominators.h"
> #include "llvm/CodeGen/MachineInstrBuilder.h"
> #include "llvm/CodeGen/MachineLoopInfo.h"
> @@ -430,8 +431,13 @@ VNInfo *SplitEditor::defFromParent(unsig
> bool Late = RegIdx != 0;
>
> // Attempt cheap-as-a-copy rematerialization.
> + unsigned Original = VRM.getOriginal(Edit->get(RegIdx));
> + LiveInterval &OrigLI = LIS.getInterval(Original);
> + VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
> LiveRangeEdit::Remat RM(ParentVNI);
> - if (Edit->canRematerializeAt(RM, UseIdx, true)) {
> + RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
> +
> + if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) {
> Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
> ++NumRemats;
> } else {
> @@ -716,7 +722,62 @@ SplitEditor::findShallowDominator(Machin
> }
> }
>
> -void SplitEditor::hoistCopiesForSize() {
> +void SplitEditor::computeRedundantBackCopies(
> + DenseSet<unsigned> &NotToHoistSet, SmallVectorImpl<VNInfo *>
> &BackCopies) {
> + LiveInterval *LI = &LIS.getInterval(Edit->get(0));
> + LiveInterval *Parent = &Edit->getParent();
> + SmallVector<SmallPtrSet<VNInfo *, 8>, 8>
> EqualVNs(Parent->getNumValNums());
> + SmallPtrSet<VNInfo *, 8> DominatedVNIs;
> +
> + // Aggregate VNIs having the same value as ParentVNI.
> + for (VNInfo *VNI : LI->valnos) {
> + if (VNI->isUnused())
> + continue;
> + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
> + EqualVNs[ParentVNI->id].insert(VNI);
> + }
> +
> + // For VNI aggregation of each ParentVNI, collect dominated, i.e.,
> + // redundant VNIs to BackCopies.
> + for (unsigned i = 0, e = Parent->getNumValNums(); i != e; ++i) {
> + VNInfo *ParentVNI = Parent->getValNumInfo(i);
> + if (!NotToHoistSet.count(ParentVNI->id))
> + continue;
> + SmallPtrSetIterator<VNInfo *> It1 = EqualVNs[ParentVNI->id].begin();
> + SmallPtrSetIterator<VNInfo *> It2 = It1;
> + for (; It1 != EqualVNs[ParentVNI->id].end(); ++It1) {
> + It2 = It1;
> + for (++It2; It2 != EqualVNs[ParentVNI->id].end(); ++It2) {
> + if (DominatedVNIs.count(*It1) || DominatedVNIs.count(*It2))
> + continue;
> +
> + MachineBasicBlock *MBB1 = LIS.getMBBFromIndex((*It1)->def);
> + MachineBasicBlock *MBB2 = LIS.getMBBFromIndex((*It2)->def);
> + if (MBB1 == MBB2) {
> + DominatedVNIs.insert((*It1)->def < (*It2)->def ? (*It2) :
> (*It1));
> + } else if (MDT.dominates(MBB1, MBB2)) {
> + DominatedVNIs.insert(*It2);
> + } else if (MDT.dominates(MBB2, MBB1)) {
> + DominatedVNIs.insert(*It1);
> + }
> + }
> + }
> + if (!DominatedVNIs.empty()) {
> + forceRecompute(0, ParentVNI);
> + for (auto VNI : DominatedVNIs) {
> + BackCopies.push_back(VNI);
> + }
> + DominatedVNIs.clear();
> + }
> + }
> +}
> +
> +/// For SM_Size mode, find a common dominator for all the back-copies for
> +/// the same ParentVNI and hoist the backcopies to the dominator BB.
> +/// For SM_Speed mode, if the common dominator is hot and it is not
> beneficial
> +/// to do the hoisting, simply remove the dominated backcopies for the
> same
> +/// ParentVNI.
> +void SplitEditor::hoistCopies() {
> // Get the complement interval, always RegIdx 0.
> LiveInterval *LI = &LIS.getInterval(Edit->get(0));
> LiveInterval *Parent = &Edit->getParent();
> @@ -725,6 +786,11 @@ void SplitEditor::hoistCopiesForSize() {
> // indexed by ParentVNI->id.
> typedef std::pair<MachineBasicBlock*, SlotIndex> DomPair;
> SmallVector<DomPair, 8> NearestDom(Parent->getNumValNums());
> + // The total cost of all the back-copies for each ParentVNI.
> + SmallVector<BlockFrequency, 8> Costs(Parent->getNumValNums());
> + // The ParentVNI->id set for which hoisting back-copies are not
> beneficial
> + // for Speed.
> + DenseSet<unsigned> NotToHoistSet;
>
> // Find the nearest common dominator for parent values with multiple
> // back-copies. If a single back-copy dominates, put it in
> DomPair.second.
> @@ -740,6 +806,7 @@ void SplitEditor::hoistCopiesForSize() {
> continue;
>
> MachineBasicBlock *ValMBB = LIS.getMBBFromIndex(VNI->def);
> +
> DomPair &Dom = NearestDom[ParentVNI->id];
>
> // Keep directly defined parent values. This is either a PHI or an
> @@ -774,6 +841,7 @@ void SplitEditor::hoistCopiesForSize() {
> else if (Near != Dom.first)
> // None dominate. Hoist to common dominator, need new def.
> Dom = DomPair(Near, SlotIndex());
> + Costs[ParentVNI->id] += MBFI.getBlockFreq(ValMBB);
> }
>
> DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' <<
> VNI->def
> @@ -792,6 +860,11 @@ void SplitEditor::hoistCopiesForSize() {
> MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(ParentVNI->def);
> // Get a less loopy dominator than Dom.first.
> Dom.first = findShallowDominator(Dom.first, DefMBB);
> + if (SpillMode == SM_Speed &&
> + MBFI.getBlockFreq(Dom.first) > Costs[ParentVNI->id]) {
> + NotToHoistSet.insert(ParentVNI->id);
> + continue;
> + }
> SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot();
> Dom.second =
> defFromParent(0, ParentVNI, Last, *Dom.first,
> @@ -806,11 +879,18 @@ void SplitEditor::hoistCopiesForSize() {
> continue;
> VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
> const DomPair &Dom = NearestDom[ParentVNI->id];
> - if (!Dom.first || Dom.second == VNI->def)
> + if (!Dom.first || Dom.second == VNI->def ||
> + NotToHoistSet.count(ParentVNI->id))
> continue;
> BackCopies.push_back(VNI);
> forceRecompute(0, ParentVNI);
> }
> +
> + // If it is not beneficial to hoist all the BackCopies, simply remove
> + // redundant BackCopies in speed mode.
> + if (SpillMode == SM_Speed && !NotToHoistSet.empty())
> + computeRedundantBackCopies(NotToHoistSet, BackCopies);
> +
> removeBackCopies(BackCopies);
> }
>
> @@ -1004,6 +1084,8 @@ void SplitEditor::deleteRematVictims() {
> // Dead defs end at the dead slot.
> if (S.end != S.valno->def.getDeadSlot())
> continue;
> + if (S.valno->isPHIDef())
> + continue;
> MachineInstr *MI = LIS.getInstructionFromIndex(S.valno->def);
> assert(MI && "Missing instruction for dead def");
> MI->addRegisterDead(LI->reg, &TRI);
> @@ -1048,10 +1130,9 @@ void SplitEditor::finish(SmallVectorImpl
> // Leave all back-copies as is.
> break;
> case SM_Size:
> - hoistCopiesForSize();
> - break;
> case SM_Speed:
> - llvm_unreachable("Spill mode 'speed' not implemented yet");
> + // hoistCopies will behave differently between size and speed.
> + hoistCopies();
> }
>
> // Transfer the simply mapped values, check if any are skipped.
>
> Modified: llvm/trunk/lib/CodeGen/SplitKit.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SplitKit.h?rev=265309&r1=265308&r2=265309&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SplitKit.h (original)
> +++ llvm/trunk/lib/CodeGen/SplitKit.h Mon Apr 4 11:42:40 2016
> @@ -18,6 +18,7 @@
> #include "LiveRangeCalc.h"
> #include "llvm/ADT/ArrayRef.h"
> #include "llvm/ADT/DenseMap.h"
> +#include "llvm/ADT/DenseSet.h"
> #include "llvm/ADT/IntervalMap.h"
> #include "llvm/ADT/SmallPtrSet.h"
>
> @@ -329,9 +330,14 @@ private:
> MachineBasicBlock *findShallowDominator(MachineBasicBlock *MBB,
> MachineBasicBlock *DefMBB);
>
> - /// hoistCopiesForSize - Hoist back-copies to the complement interval
> in a
> - /// way that minimizes code size. This implements the SM_Size spill
> mode.
> - void hoistCopiesForSize();
> + /// Find out all the backCopies dominated by others.
> + void computeRedundantBackCopies(DenseSet<unsigned> &NotToHoistSet,
> + SmallVectorImpl<VNInfo *> &BackCopies);
> +
> + /// Hoist back-copies to the complement interval. It tries to hoist all
> + /// the back-copies to one BB if it is beneficial, or else simply remove
> + /// redundent backcopies dominated by others.
> + void hoistCopies();
>
> /// transferValues - Transfer values to the new ranges.
> /// Return true if any ranges were skipped.
>
> Removed: llvm/trunk/test/CodeGen/AArch64/aarch64-deferred-spilling.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/aarch64-deferred-spilling.ll?rev=265308&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/aarch64-deferred-spilling.ll (original)
> +++ llvm/trunk/test/CodeGen/AArch64/aarch64-deferred-spilling.ll (removed)
> @@ -1,514 +0,0 @@
> -;RUN: llc < %s -mtriple=aarch64--linux-android -regalloc=greedy
> -enable-deferred-spilling=true -mcpu=cortex-a57 -disable-fp-elim |
> FileCheck %s --check-prefix=CHECK --check-prefix=DEFERRED
> -;RUN: llc < %s -mtriple=aarch64--linux-android -regalloc=greedy
> -enable-deferred-spilling=false -mcpu=cortex-a57 -disable-fp-elim |
> FileCheck %s --check-prefix=CHECK --check-prefix=REGULAR
> -
> -; Check that we do not end up with useless spill code.
> -;
> -; Move to the basic block we are interested in.
> -;
> -; CHECK: // %if.then.120
> -;
> -; REGULAR: str w21, [sp, #[[OFFSET:[0-9]+]]] // 4-byte Folded Spill
> -; Check that w21 wouldn't need to be spilled since it is never reused.
> -; REGULAR-NOT: {{[wx]}}21{{,?}}
> -;
> -; Check that w22 is used to carry a value through the call.
> -; DEFERRED-NOT: str {{[wx]}}22,
> -; DEFERRED: mov {{[wx]}}22,
> -; DEFERRED-NOT: str {{[wx]}}22,
> -;
> -; CHECK: bl fprintf
> -;
> -; DEFERRED-NOT: ldr {{[wx]}}22,
> -; DEFERRED: mov {{[wx][0-9]+}}, {{[wx]}}22
> -; DEFERRED-NOT: ldr {{[wx]}}22,
> -;
> -; REGULAR-NOT: {{[wx]}}21{{,?}}
> -; REGULAR: ldr w21, [sp, #[[OFFSET]]] // 4-byte Folded Reload
> -;
> -; End of the basic block we are interested in.
> -; CHECK: b
> -; CHECK: {{[^:]+}}: // %sw.bb.123
> -
> -%struct.__sFILE = type { i8*, i32, i32, i32, i32, %struct.__sbuf, i32,
> i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*,
> i32)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32,
> i64 }
> -%struct.__sbuf = type { i8*, i64 }
> -%struct.DState = type { %struct.bz_stream*, i32, i8, i32, i8, i32, i32,
> i32, i32, i32, i8, i32, i32, i32, i32, i32, [256 x i32], i32, [257 x i32],
> [257 x i32], i32*, i16*, i8*, i32, i32, i32, i32, i32, [256 x i8], [16 x
> i8], [256 x i8], [4096 x i8], [16 x i32], [18002 x i8], [18002 x i8], [6 x
> [258 x i8]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x
> i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
> i32, i32, i32, i32, i32, i32, i32, i32*, i32*, i32* }
> -%struct.bz_stream = type { i8*, i32, i32, i32, i8*, i32, i32, i32, i8*,
> i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8* }
> -
> - at __sF = external global [0 x %struct.__sFILE], align 8
> - at .str = private unnamed_addr constant [20 x i8] c"\0A [%d: stuff+mf
> \00", align 1
> -
> -declare i32 @fprintf(%struct.__sFILE* nocapture, i8* nocapture readonly,
> ...)
> -
> -declare void @bar(i32)
> -
> -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
> -
> -define i32 @foo(%struct.DState* %s) {
> -entry:
> - %state = getelementptr inbounds %struct.DState, %struct.DState* %s, i64
> 0, i32 1
> - %tmp = load i32, i32* %state, align 4
> - %cmp = icmp eq i32 %tmp, 10
> - %save_i = getelementptr inbounds %struct.DState, %struct.DState* %s,
> i64 0, i32 40
> - br i1 %cmp, label %if.end.thread, label %if.end
> -
> -if.end.thread: ; preds = %entry
> - %save_j = getelementptr inbounds %struct.DState, %struct.DState* %s,
> i64 0, i32 41
> - %save_t = getelementptr inbounds %struct.DState, %struct.DState* %s,
> i64 0, i32 42
> - %save_alphaSize = getelementptr inbounds %struct.DState,
> %struct.DState* %s, i64 0, i32 43
> - %save_nGroups = getelementptr inbounds %struct.DState, %struct.DState*
> %s, i64 0, i32 44
> - %save_nSelectors = getelementptr inbounds %struct.DState,
> %struct.DState* %s, i64 0, i32 45
> - %save_EOB = getelementptr inbounds %struct.DState, %struct.DState* %s,
> i64 0, i32 46
> - %save_groupNo = getelementptr inbounds %struct.DState, %struct.DState*
> %s, i64 0, i32 47
> - %save_groupPos = getelementptr inbounds %struct.DState, %struct.DState*
> %s, i64 0, i32 48
> - %save_nextSym = getelementptr inbounds %struct.DState, %struct.DState*
> %s, i64 0, i32 49
> - %save_nblockMAX = getelementptr inbounds %struct.DState,
> %struct.DState* %s, i64 0, i32 50
> - %save_nblock = getelementptr inbounds %struct.DState, %struct.DState*
> %s, i64 0, i32 51
> - %save_es = getelementptr inbounds %struct.DState, %struct.DState* %s,
> i64 0, i32 52
> - %save_N = getelementptr inbounds %struct.DState, %struct.DState* %s,
> i64 0, i32 53
> - %save_curr = getelementptr inbounds %struct.DState, %struct.DState* %s,
> i64 0, i32 54
> - %save_zt = getelementptr inbounds %struct.DState, %struct.DState* %s,
> i64 0, i32 55
> - %save_zn = getelementptr inbounds %struct.DState, %struct.DState* %s,
> i64 0, i32 56
> - %save_zvec = getelementptr inbounds %struct.DState, %struct.DState* %s,
> i64 0, i32 57
> - %save_zj = getelementptr inbounds %struct.DState, %struct.DState* %s,
> i64 0, i32 58
> - %tmp1 = bitcast i32* %save_i to i8*
> - call void @llvm.memset.p0i8.i64(i8* %tmp1, i8 0, i64 108, i32 4, i1
> false)
> - br label %sw.default
> -
> -if.end: ; preds = %entry
> - %.pre = load i32, i32* %save_i, align 4
> - %save_j3.phi.trans.insert = getelementptr inbounds %struct.DState,
> %struct.DState* %s, i64 0, i32 41
> - %.pre406 = load i32, i32* %save_j3.phi.trans.insert, align 4
> - %save_t4.phi.trans.insert = getelementptr inbounds %struct.DState,
> %struct.DState* %s, i64 0, i32 42
> - %.pre407 = load i32, i32* %save_t4.phi.trans.insert, align 4
> - %save_alphaSize5.phi.trans.insert = getelementptr inbounds
> %struct.DState, %struct.DState* %s, i64 0, i32 43
> - %.pre408 = load i32, i32* %save_alphaSize5.phi.trans.insert, align 4
> - %save_nGroups6.phi.trans.insert = getelementptr inbounds
> %struct.DState, %struct.DState* %s, i64 0, i32 44
> - %.pre409 = load i32, i32* %save_nGroups6.phi.trans.insert, align 4
> - %save_nSelectors7.phi.trans.insert = getelementptr inbounds
> %struct.DState, %struct.DState* %s, i64 0, i32 45
> - %.pre410 = load i32, i32* %save_nSelectors7.phi.trans.insert, align 4
> - %save_EOB8.phi.trans.insert = getelementptr inbounds %struct.DState,
> %struct.DState* %s, i64 0, i32 46
> - %.pre411 = load i32, i32* %save_EOB8.phi.trans.insert, align 4
> - %save_groupNo9.phi.trans.insert = getelementptr inbounds
> %struct.DState, %struct.DState* %s, i64 0, i32 47
> - %.pre412 = load i32, i32* %save_groupNo9.phi.trans.insert, align 4
> - %save_groupPos10.phi.trans.insert = getelementptr inbounds
> %struct.DState, %struct.DState* %s, i64 0, i32 48
> - %.pre413 = load i32, i32* %save_groupPos10.phi.trans.insert, align 4
> - %save_nextSym11.phi.trans.insert = getelementptr inbounds
> %struct.DState, %struct.DState* %s, i64 0, i32 49
> - %.pre414 = load i32, i32* %save_nextSym11.phi.trans.insert, align 4
> - %save_nblockMAX12.phi.trans.insert = getelementptr inbounds
> %struct.DState, %struct.DState* %s, i64 0, i32 50
> - %.pre415 = load i32, i32* %save_nblockMAX12.phi.trans.insert, align 4
> - %save_nblock13.phi.trans.insert = getelementptr inbounds
> %struct.DState, %struct.DState* %s, i64 0, i32 51
> - %.pre416 = load i32, i32* %save_nblock13.phi.trans.insert, align 4
> - %save_es14.phi.trans.insert = getelementptr inbounds %struct.DState,
> %struct.DState* %s, i64 0, i32 52
> - %.pre417 = load i32, i32* %save_es14.phi.trans.insert, align 4
> - %save_N15.phi.trans.insert = getelementptr inbounds %struct.DState,
> %struct.DState* %s, i64 0, i32 53
> - %.pre418 = load i32, i32* %save_N15.phi.trans.insert, align 4
> - %save_curr16.phi.trans.insert = getelementptr inbounds %struct.DState,
> %struct.DState* %s, i64 0, i32 54
> - %.pre419 = load i32, i32* %save_curr16.phi.trans.insert, align 4
> - %save_zt17.phi.trans.insert = getelementptr inbounds %struct.DState,
> %struct.DState* %s, i64 0, i32 55
> - %.pre420 = load i32, i32* %save_zt17.phi.trans.insert, align 4
> - %save_zn18.phi.trans.insert = getelementptr inbounds %struct.DState,
> %struct.DState* %s, i64 0, i32 56
> - %.pre421 = load i32, i32* %save_zn18.phi.trans.insert, align 4
> - %save_zvec19.phi.trans.insert = getelementptr inbounds %struct.DState,
> %struct.DState* %s, i64 0, i32 57
> - %.pre422 = load i32, i32* %save_zvec19.phi.trans.insert, align 4
> - %save_zj20.phi.trans.insert = getelementptr inbounds %struct.DState,
> %struct.DState* %s, i64 0, i32 58
> - %.pre423 = load i32, i32* %save_zj20.phi.trans.insert, align 4
> - switch i32 %tmp, label %sw.default [
> - i32 13, label %sw.bb
> - i32 14, label %if.end.sw.bb.65_crit_edge
> - i32 25, label %if.end.sw.bb.123_crit_edge
> - ]
> -
> -if.end.sw.bb.123_crit_edge: ; preds = %if.end
> - %.pre433 = getelementptr inbounds %struct.DState, %struct.DState* %s,
> i64 0, i32 8
> - br label %sw.bb.123
> -
> -if.end.sw.bb.65_crit_edge: ; preds = %if.end
> - %bsLive69.phi.trans.insert = getelementptr inbounds %struct.DState,
> %struct.DState* %s, i64 0, i32 8
> - %.pre426 = load i32, i32* %bsLive69.phi.trans.insert, align 4
> - br label %sw.bb.65
> -
> -sw.bb: ; preds = %if.end
> - %sunkaddr = ptrtoint %struct.DState* %s to i64
> - %sunkaddr485 = add i64 %sunkaddr, 8
> - %sunkaddr486 = inttoptr i64 %sunkaddr485 to i32*
> - store i32 13, i32* %sunkaddr486, align 4
> - %bsLive = getelementptr inbounds %struct.DState, %struct.DState* %s,
> i64 0, i32 8
> - %tmp2 = load i32, i32* %bsLive, align 4
> - %cmp28.400 = icmp sgt i32 %tmp2, 7
> - br i1 %cmp28.400, label %sw.bb.if.then.29_crit_edge, label %
> if.end.33.lr.ph
> -
> -sw.bb.if.then.29_crit_edge: ; preds = %sw.bb
> - %sunkaddr487 = ptrtoint %struct.DState* %s to i64
> - %sunkaddr488 = add i64 %sunkaddr487, 32
> - %sunkaddr489 = inttoptr i64 %sunkaddr488 to i32*
> - %.pre425 = load i32, i32* %sunkaddr489, align 4
> - br label %if.then.29
> -
> -if.end.33.lr.ph: ; preds = %sw.bb
> - %tmp3 = bitcast %struct.DState* %s to %struct.bz_stream**
> - %.pre424 = load %struct.bz_stream*, %struct.bz_stream** %tmp3, align 8
> - %avail_in.phi.trans.insert = getelementptr inbounds %struct.bz_stream,
> %struct.bz_stream* %.pre424, i64 0, i32 1
> - %.pre430 = load i32, i32* %avail_in.phi.trans.insert, align 4
> - %tmp4 = add i32 %.pre430, -1
> - br label %if.end.33
> -
> -if.then.29: ; preds =
> %while.body.backedge, %sw.bb.if.then.29_crit_edge
> - %tmp5 = phi i32 [ %.pre425, %sw.bb.if.then.29_crit_edge ], [ %or,
> %while.body.backedge ]
> - %.lcssa393 = phi i32 [ %tmp2, %sw.bb.if.then.29_crit_edge ], [ %add,
> %while.body.backedge ]
> - %sub = add nsw i32 %.lcssa393, -8
> - %shr = lshr i32 %tmp5, %sub
> - %and = and i32 %shr, 255
> - %sunkaddr491 = ptrtoint %struct.DState* %s to i64
> - %sunkaddr492 = add i64 %sunkaddr491, 36
> - %sunkaddr493 = inttoptr i64 %sunkaddr492 to i32*
> - store i32 %sub, i32* %sunkaddr493, align 4
> - %blockSize100k = getelementptr inbounds %struct.DState, %struct.DState*
> %s, i64 0, i32 9
> - store i32 %and, i32* %blockSize100k, align 4
> - %and.off = add nsw i32 %and, -49
> - %tmp6 = icmp ugt i32 %and.off, 8
> - br i1 %tmp6, label %save_state_and_return, label %if.end.62
> -
> -if.end.33: ; preds =
> %while.body.backedge, %if.end.33.lr.ph
> - %lsr.iv482 = phi i32 [ %tmp4, %if.end.33.lr.ph ], [ %lsr.iv.next483,
> %while.body.backedge ]
> - %tmp7 = phi i32 [ %tmp2, %if.end.33.lr.ph ], [ %add,
> %while.body.backedge ]
> - %cmp35 = icmp eq i32 %lsr.iv482, -1
> - br i1 %cmp35, label %save_state_and_return, label %if.end.37
> -
> -if.end.37: ; preds = %if.end.33
> - %tmp8 = bitcast %struct.bz_stream* %.pre424 to i8**
> - %sunkaddr494 = ptrtoint %struct.DState* %s to i64
> - %sunkaddr495 = add i64 %sunkaddr494, 32
> - %sunkaddr496 = inttoptr i64 %sunkaddr495 to i32*
> - %tmp9 = load i32, i32* %sunkaddr496, align 4
> - %shl = shl i32 %tmp9, 8
> - %tmp10 = load i8*, i8** %tmp8, align 8
> - %tmp11 = load i8, i8* %tmp10, align 1
> - %conv = zext i8 %tmp11 to i32
> - %or = or i32 %conv, %shl
> - store i32 %or, i32* %sunkaddr496, align 4
> - %add = add nsw i32 %tmp7, 8
> - %sunkaddr497 = ptrtoint %struct.DState* %s to i64
> - %sunkaddr498 = add i64 %sunkaddr497, 36
> - %sunkaddr499 = inttoptr i64 %sunkaddr498 to i32*
> - store i32 %add, i32* %sunkaddr499, align 4
> - %incdec.ptr = getelementptr inbounds i8, i8* %tmp10, i64 1
> - store i8* %incdec.ptr, i8** %tmp8, align 8
> - %sunkaddr500 = ptrtoint %struct.bz_stream* %.pre424 to i64
> - %sunkaddr501 = add i64 %sunkaddr500, 8
> - %sunkaddr502 = inttoptr i64 %sunkaddr501 to i32*
> - store i32 %lsr.iv482, i32* %sunkaddr502, align 4
> - %sunkaddr503 = ptrtoint %struct.bz_stream* %.pre424 to i64
> - %sunkaddr504 = add i64 %sunkaddr503, 12
> - %sunkaddr505 = inttoptr i64 %sunkaddr504 to i32*
> - %tmp12 = load i32, i32* %sunkaddr505, align 4
> - %inc = add i32 %tmp12, 1
> - store i32 %inc, i32* %sunkaddr505, align 4
> - %cmp49 = icmp eq i32 %inc, 0
> - br i1 %cmp49, label %if.then.51, label %while.body.backedge
> -
> -if.then.51: ; preds = %if.end.37
> - %sunkaddr506 = ptrtoint %struct.bz_stream* %.pre424 to i64
> - %sunkaddr507 = add i64 %sunkaddr506, 16
> - %sunkaddr508 = inttoptr i64 %sunkaddr507 to i32*
> - %tmp13 = load i32, i32* %sunkaddr508, align 4
> - %inc53 = add i32 %tmp13, 1
> - store i32 %inc53, i32* %sunkaddr508, align 4
> - br label %while.body.backedge
> -
> -while.body.backedge: ; preds = %if.then.51,
> %if.end.37
> - %lsr.iv.next483 = add i32 %lsr.iv482, -1
> - %cmp28 = icmp sgt i32 %add, 7
> - br i1 %cmp28, label %if.then.29, label %if.end.33
> -
> -if.end.62: ; preds = %if.then.29
> - %sub64 = add nsw i32 %and, -48
> - %sunkaddr509 = ptrtoint %struct.DState* %s to i64
> - %sunkaddr510 = add i64 %sunkaddr509, 40
> - %sunkaddr511 = inttoptr i64 %sunkaddr510 to i32*
> - store i32 %sub64, i32* %sunkaddr511, align 4
> - br label %sw.bb.65
> -
> -sw.bb.65: ; preds = %if.end.62,
> %if.end.sw.bb.65_crit_edge
> - %bsLive69.pre-phi = phi i32* [ %bsLive69.phi.trans.insert,
> %if.end.sw.bb.65_crit_edge ], [ %bsLive, %if.end.62 ]
> - %tmp14 = phi i32 [ %.pre426, %if.end.sw.bb.65_crit_edge ], [ %sub,
> %if.end.62 ]
> - %sunkaddr512 = ptrtoint %struct.DState* %s to i64
> - %sunkaddr513 = add i64 %sunkaddr512, 8
> - %sunkaddr514 = inttoptr i64 %sunkaddr513 to i32*
> - store i32 14, i32* %sunkaddr514, align 4
> - %cmp70.397 = icmp sgt i32 %tmp14, 7
> - br i1 %cmp70.397, label %if.then.72, label %if.end.82.lr.ph
> -
> -if.end.82.lr.ph: ; preds = %sw.bb.65
> - %tmp15 = bitcast %struct.DState* %s to %struct.bz_stream**
> - %.pre427 = load %struct.bz_stream*, %struct.bz_stream** %tmp15, align 8
> - %avail_in84.phi.trans.insert = getelementptr inbounds
> %struct.bz_stream, %struct.bz_stream* %.pre427, i64 0, i32 1
> - %.pre431 = load i32, i32* %avail_in84.phi.trans.insert, align 4
> - %tmp16 = add i32 %.pre431, -1
> - br label %if.end.82
> -
> -if.then.72: ; preds =
> %while.body.68.backedge, %sw.bb.65
> - %.lcssa390 = phi i32 [ %tmp14, %sw.bb.65 ], [ %add97,
> %while.body.68.backedge ]
> - %sub76 = add nsw i32 %.lcssa390, -8
> - %sunkaddr516 = ptrtoint %struct.DState* %s to i64
> - %sunkaddr517 = add i64 %sunkaddr516, 36
> - %sunkaddr518 = inttoptr i64 %sunkaddr517 to i32*
> - store i32 %sub76, i32* %sunkaddr518, align 4
> - %currBlockNo = getelementptr inbounds %struct.DState, %struct.DState*
> %s, i64 0, i32 11
> - %tmp17 = load i32, i32* %currBlockNo, align 4
> - %inc117 = add nsw i32 %tmp17, 1
> - store i32 %inc117, i32* %currBlockNo, align 4
> - %verbosity = getelementptr inbounds %struct.DState, %struct.DState* %s,
> i64 0, i32 12
> - %tmp18 = load i32, i32* %verbosity, align 4
> - %cmp118 = icmp sgt i32 %tmp18, 1
> - br i1 %cmp118, label %if.then.120, label %sw.bb.123, !prof !0
> -
> -if.end.82: ; preds =
> %while.body.68.backedge, %if.end.82.lr.ph
> - %lsr.iv480 = phi i32 [ %tmp16, %if.end.82.lr.ph ], [ %lsr.iv.next481,
> %while.body.68.backedge ]
> - %tmp19 = phi i32 [ %tmp14, %if.end.82.lr.ph ], [ %add97,
> %while.body.68.backedge ]
> - %cmp85 = icmp eq i32 %lsr.iv480, -1
> - br i1 %cmp85, label %save_state_and_return, label %if.end.88
> -
> -if.end.88: ; preds = %if.end.82
> - %tmp20 = bitcast %struct.bz_stream* %.pre427 to i8**
> - %sunkaddr519 = ptrtoint %struct.DState* %s to i64
> - %sunkaddr520 = add i64 %sunkaddr519, 32
> - %sunkaddr521 = inttoptr i64 %sunkaddr520 to i32*
> - %tmp21 = load i32, i32* %sunkaddr521, align 4
> - %shl90 = shl i32 %tmp21, 8
> - %tmp22 = load i8*, i8** %tmp20, align 8
> - %tmp23 = load i8, i8* %tmp22, align 1
> - %conv93 = zext i8 %tmp23 to i32
> - %or94 = or i32 %conv93, %shl90
> - store i32 %or94, i32* %sunkaddr521, align 4
> - %add97 = add nsw i32 %tmp19, 8
> - %sunkaddr522 = ptrtoint %struct.DState* %s to i64
> - %sunkaddr523 = add i64 %sunkaddr522, 36
> - %sunkaddr524 = inttoptr i64 %sunkaddr523 to i32*
> - store i32 %add97, i32* %sunkaddr524, align 4
> - %incdec.ptr100 = getelementptr inbounds i8, i8* %tmp22, i64 1
> - store i8* %incdec.ptr100, i8** %tmp20, align 8
> - %sunkaddr525 = ptrtoint %struct.bz_stream* %.pre427 to i64
> - %sunkaddr526 = add i64 %sunkaddr525, 8
> - %sunkaddr527 = inttoptr i64 %sunkaddr526 to i32*
> - store i32 %lsr.iv480, i32* %sunkaddr527, align 4
> - %sunkaddr528 = ptrtoint %struct.bz_stream* %.pre427 to i64
> - %sunkaddr529 = add i64 %sunkaddr528, 12
> - %sunkaddr530 = inttoptr i64 %sunkaddr529 to i32*
> - %tmp24 = load i32, i32* %sunkaddr530, align 4
> - %inc106 = add i32 %tmp24, 1
> - store i32 %inc106, i32* %sunkaddr530, align 4
> - %cmp109 = icmp eq i32 %inc106, 0
> - br i1 %cmp109, label %if.then.111, label %while.body.68.backedge
> -
> -if.then.111: ; preds = %if.end.88
> - %sunkaddr531 = ptrtoint %struct.bz_stream* %.pre427 to i64
> - %sunkaddr532 = add i64 %sunkaddr531, 16
> - %sunkaddr533 = inttoptr i64 %sunkaddr532 to i32*
> - %tmp25 = load i32, i32* %sunkaddr533, align 4
> - %inc114 = add i32 %tmp25, 1
> - store i32 %inc114, i32* %sunkaddr533, align 4
> - br label %while.body.68.backedge
> -
> -while.body.68.backedge: ; preds = %if.then.111,
> %if.end.88
> - %lsr.iv.next481 = add i32 %lsr.iv480, -1
> - %cmp70 = icmp sgt i32 %add97, 7
> - br i1 %cmp70, label %if.then.72, label %if.end.82
> -
> -if.then.120: ; preds = %if.then.72
> - %call = tail call i32 (%struct.__sFILE*, i8*, ...)
> @fprintf(%struct.__sFILE* getelementptr inbounds ([0 x %struct.__sFILE], [0
> x %struct.__sFILE]* @__sF, i64 0, i64 2), i8* getelementptr inbounds ([20 x
> i8], [20 x i8]* @.str, i64 0, i64 0), i32 %inc117)
> - br label %sw.bb.123
> -
> -sw.bb.123: ; preds = %if.then.120,
> %if.then.72, %if.end.sw.bb.123_crit_edge
> - %bsLive127.pre-phi = phi i32* [ %.pre433, %if.end.sw.bb.123_crit_edge
> ], [ %bsLive69.pre-phi, %if.then.72 ], [ %bsLive69.pre-phi, %if.then.120 ]
> - %sunkaddr534 = ptrtoint %struct.DState* %s to i64
> - %sunkaddr535 = add i64 %sunkaddr534, 8
> - %sunkaddr536 = inttoptr i64 %sunkaddr535 to i32*
> - store i32 25, i32* %sunkaddr536, align 4
> - %tmp26 = load i32, i32* %bsLive127.pre-phi, align 4
> - %cmp128.395 = icmp sgt i32 %tmp26, 7
> - br i1 %cmp128.395, label %sw.bb.123.if.then.130_crit_edge, label %
> if.end.140.lr.ph
> -
> -sw.bb.123.if.then.130_crit_edge: ; preds = %sw.bb.123
> - %sunkaddr537 = ptrtoint %struct.DState* %s to i64
> - %sunkaddr538 = add i64 %sunkaddr537, 32
> - %sunkaddr539 = inttoptr i64 %sunkaddr538 to i32*
> - %.pre429 = load i32, i32* %sunkaddr539, align 4
> - br label %if.then.130
> -
> -if.end.140.lr.ph: ; preds = %sw.bb.123
> - %tmp27 = bitcast %struct.DState* %s to %struct.bz_stream**
> - %.pre428 = load %struct.bz_stream*, %struct.bz_stream** %tmp27, align 8
> - %avail_in142.phi.trans.insert = getelementptr inbounds
> %struct.bz_stream, %struct.bz_stream* %.pre428, i64 0, i32 1
> - %.pre432 = load i32, i32* %avail_in142.phi.trans.insert, align 4
> - %tmp28 = add i32 %.pre432, -1
> - br label %if.end.140
> -
> -if.then.130: ; preds =
> %while.body.126.backedge, %sw.bb.123.if.then.130_crit_edge
> - %tmp29 = phi i32 [ %.pre429, %sw.bb.123.if.then.130_crit_edge ], [
> %or152, %while.body.126.backedge ]
> - %.lcssa = phi i32 [ %tmp26, %sw.bb.123.if.then.130_crit_edge ], [
> %add155, %while.body.126.backedge ]
> - %sub134 = add nsw i32 %.lcssa, -8
> - %shr135 = lshr i32 %tmp29, %sub134
> - store i32 %sub134, i32* %bsLive127.pre-phi, align 4
> - %origPtr = getelementptr inbounds %struct.DState, %struct.DState* %s,
> i64 0, i32 13
> - %tmp30 = load i32, i32* %origPtr, align 4
> - %shl175 = shl i32 %tmp30, 8
> - %conv176 = and i32 %shr135, 255
> - %or177 = or i32 %shl175, %conv176
> - store i32 %or177, i32* %origPtr, align 4
> - %nInUse = getelementptr inbounds %struct.DState, %struct.DState* %s,
> i64 0, i32 27
> - %tmp31 = load i32, i32* %nInUse, align 4
> - %add179 = add nsw i32 %tmp31, 2
> - br label %save_state_and_return
> -
> -if.end.140: ; preds =
> %while.body.126.backedge, %if.end.140.lr.ph
> - %lsr.iv = phi i32 [ %tmp28, %if.end.140.lr.ph ], [ %lsr.iv.next,
> %while.body.126.backedge ]
> - %tmp32 = phi i32 [ %tmp26, %if.end.140.lr.ph ], [ %add155,
> %while.body.126.backedge ]
> - %cmp143 = icmp eq i32 %lsr.iv, -1
> - br i1 %cmp143, label %save_state_and_return, label %if.end.146
> -
> -if.end.146: ; preds = %if.end.140
> - %tmp33 = bitcast %struct.bz_stream* %.pre428 to i8**
> - %sunkaddr541 = ptrtoint %struct.DState* %s to i64
> - %sunkaddr542 = add i64 %sunkaddr541, 32
> - %sunkaddr543 = inttoptr i64 %sunkaddr542 to i32*
> - %tmp34 = load i32, i32* %sunkaddr543, align 4
> - %shl148 = shl i32 %tmp34, 8
> - %tmp35 = load i8*, i8** %tmp33, align 8
> - %tmp36 = load i8, i8* %tmp35, align 1
> - %conv151 = zext i8 %tmp36 to i32
> - %or152 = or i32 %conv151, %shl148
> - store i32 %or152, i32* %sunkaddr543, align 4
> - %add155 = add nsw i32 %tmp32, 8
> - store i32 %add155, i32* %bsLive127.pre-phi, align 4
> - %incdec.ptr158 = getelementptr inbounds i8, i8* %tmp35, i64 1
> - store i8* %incdec.ptr158, i8** %tmp33, align 8
> - %sunkaddr544 = ptrtoint %struct.bz_stream* %.pre428 to i64
> - %sunkaddr545 = add i64 %sunkaddr544, 8
> - %sunkaddr546 = inttoptr i64 %sunkaddr545 to i32*
> - store i32 %lsr.iv, i32* %sunkaddr546, align 4
> - %sunkaddr547 = ptrtoint %struct.bz_stream* %.pre428 to i64
> - %sunkaddr548 = add i64 %sunkaddr547, 12
> - %sunkaddr549 = inttoptr i64 %sunkaddr548 to i32*
> - %tmp37 = load i32, i32* %sunkaddr549, align 4
> - %inc164 = add i32 %tmp37, 1
> - store i32 %inc164, i32* %sunkaddr549, align 4
> - %cmp167 = icmp eq i32 %inc164, 0
> - br i1 %cmp167, label %if.then.169, label %while.body.126.backedge
> -
> -if.then.169: ; preds = %if.end.146
> - %sunkaddr550 = ptrtoint %struct.bz_stream* %.pre428 to i64
> - %sunkaddr551 = add i64 %sunkaddr550, 16
> - %sunkaddr552 = inttoptr i64 %sunkaddr551 to i32*
> - %tmp38 = load i32, i32* %sunkaddr552, align 4
> - %inc172 = add i32 %tmp38, 1
> - store i32 %inc172, i32* %sunkaddr552, align 4
> - br label %while.body.126.backedge
> -
> -while.body.126.backedge: ; preds = %if.then.169,
> %if.end.146
> - %lsr.iv.next = add i32 %lsr.iv, -1
> - %cmp128 = icmp sgt i32 %add155, 7
> - br i1 %cmp128, label %if.then.130, label %if.end.140
> -
> -sw.default: ; preds = %if.end,
> %if.end.thread
> - %tmp39 = phi i32 [ 0, %if.end.thread ], [ %.pre, %if.end ]
> - %tmp40 = phi i32 [ 0, %if.end.thread ], [ %.pre406, %if.end ]
> - %tmp41 = phi i32 [ ...
>
> [Message clipped]
--
Teresa Johnson | Software Engineer | tejohnson at google.com | 408-460-2413
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160404/91e5a459/attachment-0001.html>
More information about the llvm-commits
mailing list