[llvm-commits] [llvm] r149468 - in /llvm/trunk: docs/ include/llvm-c/ include/llvm-c/Transforms/ include/llvm/ include/llvm/Transforms/ include/llvm/Transforms/IPO/ lib/Transforms/ lib/Transforms/IPO/ lib/Transforms/Vectorize/ test/Transforms/BBVectorize/ tools/bugpoint/ tools/llvm-ld/ tools/lto/ tools/opt/
Nick Lewycky
nicholas at mxc.ca
Tue Jan 31 20:59:29 PST 2012
Hal Finkel wrote:
> Author: hfinkel
> Date: Tue Jan 31 21:51:43 2012
> New Revision: 149468
>
> URL: http://llvm.org/viewvc/llvm-project?rev=149468&view=rev
> Log:
> Add a basic-block autovectorization pass.
>
> This is the initial checkin of the basic-block autovectorization pass along with some supporting vectorization infrastructure.
> Special thanks to everyone who helped review this code over the last several months (especially Tobias Grosser).
Great stuff!
> Copied: llvm/trunk/include/llvm-c/Transforms/Vectorize.h (from r149457, llvm/trunk/include/llvm-c/Initialization.h)
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm-c/Transforms/Vectorize.h?p2=llvm/trunk/include/llvm-c/Transforms/Vectorize.h&p1=llvm/trunk/include/llvm-c/Initialization.h&r1=149457&r2=149468&rev=149468&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm-c/Initialization.h (original)
> +++ llvm/trunk/include/llvm-c/Transforms/Vectorize.h Tue Jan 31 21:51:43 2012
> @@ -1,4 +1,5 @@
> -/*===-- llvm-c/Initialization.h - Initialization C Interface ------*- C -*-===*\
> +/*===---------------------------Vectorize.h ------------------- -*- C++ -*-===*\
-*- C -*- not C++.
> Modified: llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp?rev=149468&r1=149467&r2=149468&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp (original)
> +++ llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp Tue Jan 31 21:51:43 2012
> @@ -21,14 +21,20 @@
> #include "llvm/DefaultPasses.h"
> #include "llvm/PassManager.h"
> #include "llvm/Analysis/Passes.h"
> +#include "llvm/Analysis/Verifier.h"
> +#include "llvm/Support/CommandLine.h"
> #include "llvm/Target/TargetLibraryInfo.h"
> #include "llvm/Transforms/Scalar.h"
> +#include "llvm/Transforms/Vectorize.h"
> #include "llvm/Transforms/IPO.h"
> #include "llvm/ADT/SmallVector.h"
> #include "llvm/Support/ManagedStatic.h"
>
> using namespace llvm;
>
> +static cl::opt<bool>
> +RunVectorization("vectorize", cl::desc("Run vectorization passes"));
> +
> PassManagerBuilder::PassManagerBuilder() {
> OptLevel = 2;
> SizeLevel = 0;
> @@ -37,6 +43,7 @@
> DisableSimplifyLibCalls = false;
> DisableUnitAtATime = false;
> DisableUnrollLoops = false;
> + Vectorize = RunVectorization;
> }
>
> PassManagerBuilder::~PassManagerBuilder() {
> @@ -172,6 +179,13 @@
>
> addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
>
> + if (Vectorize) {
> + MPM.add(createBBVectorizePass());
> + MPM.add(createInstructionCombiningPass());
> + if (OptLevel> 1)
> + MPM.add(createGVNPass()); // Remove redundancies
Whooooaa... GVN is *really* expensive, I find it hard to believe that
you want to run it twice even with vectorization on. Are you sure? What
is this doing that instcombine isn't?
> Added: llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp?rev=149468&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp (added)
> +++ llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp Tue Jan 31 21:51:43 2012
> @@ -0,0 +1,1796 @@
> +//===- BBVectorize.cpp - A Basic-Block Vectorizer -------------------------===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file implements a basic-block vectorization pass. The algorithm was
> +// inspired by that used by the Vienna MAP Vectorizor by Franchetti and Kral,
> +// et al. It works by looking for chains of pairable operations and then
> +// pairing them.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#define BBV_NAME "bb-vectorize"
I think it's safe to constant propagate this away. :)
> +#define DEBUG_TYPE BBV_NAME
> +#include "llvm/Constants.h"
> +#include "llvm/DerivedTypes.h"
> +#include "llvm/Function.h"
> +#include "llvm/Instructions.h"
> +#include "llvm/IntrinsicInst.h"
> +#include "llvm/Intrinsics.h"
> +#include "llvm/LLVMContext.h"
> +#include "llvm/Pass.h"
> +#include "llvm/Type.h"
> +#include "llvm/ADT/DenseMap.h"
> +#include "llvm/ADT/DenseSet.h"
> +#include "llvm/ADT/SmallVector.h"
> +#include "llvm/ADT/Statistic.h"
> +#include "llvm/ADT/STLExtras.h"
> +#include "llvm/ADT/StringExtras.h"
> +#include "llvm/Analysis/AliasAnalysis.h"
> +#include "llvm/Analysis/AliasSetTracker.h"
> +#include "llvm/Analysis/ScalarEvolution.h"
> +#include "llvm/Analysis/ScalarEvolutionExpressions.h"
> +#include "llvm/Analysis/ValueTracking.h"
> +#include "llvm/Support/CommandLine.h"
> +#include "llvm/Support/Debug.h"
> +#include "llvm/Support/raw_ostream.h"
> +#include "llvm/Support/ValueHandle.h"
> +#include "llvm/Target/TargetData.h"
> +#include "llvm/Transforms/Vectorize.h"
> +#include<algorithm>
> +#include<map>
> +using namespace llvm;
> +
> +static cl::opt<unsigned>
> +ReqChainDepth("bb-vectorize-req-chain-depth", cl::init(6), cl::Hidden,
> + cl::desc("The required chain depth for vectorization"));
> +
> +static cl::opt<unsigned>
> +SearchLimit("bb-vectorize-search-limit", cl::init(400), cl::Hidden,
> + cl::desc("The maximum search distance for instruction pairs"));
> +
> +static cl::opt<bool>
> +SplatBreaksChain("bb-vectorize-splat-breaks-chain", cl::init(false), cl::Hidden,
> + cl::desc("Replicating one element to a pair breaks the chain"));
> +
> +static cl::opt<unsigned>
> +VectorBits("bb-vectorize-vector-bits", cl::init(128), cl::Hidden,
> + cl::desc("The size of the native vector registers"));
> +
> +static cl::opt<unsigned>
> +MaxIter("bb-vectorize-max-iter", cl::init(0), cl::Hidden,
> + cl::desc("The maximum number of pairing iterations"));
> +
> +static cl::opt<unsigned>
> +MaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200),
> + cl::Hidden, cl::desc("The maximum number of candidate pairs with which to use"
> + " a full cycle check"));
> +
> +static cl::opt<bool>
> +NoInts("bb-vectorize-no-ints", cl::init(false), cl::Hidden,
> + cl::desc("Don't try to vectorize integer values"));
> +
> +static cl::opt<bool>
> +NoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden,
> + cl::desc("Don't try to vectorize floating-point values"));
> +
> +static cl::opt<bool>
> +NoCasts("bb-vectorize-no-casts", cl::init(false), cl::Hidden,
> + cl::desc("Don't try to vectorize casting (conversion) operations"));
> +
> +static cl::opt<bool>
> +NoMath("bb-vectorize-no-math", cl::init(false), cl::Hidden,
> + cl::desc("Don't try to vectorize floating-point math intrinsics"));
> +
> +static cl::opt<bool>
> +NoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden,
> + cl::desc("Don't try to vectorize the fused-multiply-add intrinsic"));
> +
> +static cl::opt<bool>
> +NoMemOps("bb-vectorize-no-mem-ops", cl::init(false), cl::Hidden,
> + cl::desc("Don't try to vectorize loads and stores"));
> +
> +static cl::opt<bool>
> +AlignedOnly("bb-vectorize-aligned-only", cl::init(false), cl::Hidden,
> + cl::desc("Only generate aligned loads and stores"));
> +
> +static cl::opt<bool>
> +FastDep("bb-vectorize-fast-dep", cl::init(false), cl::Hidden,
> + cl::desc("Use a fast instruction dependency analysis"));
> +
> +#ifndef NDEBUG
> +static cl::opt<bool>
> +DebugInstructionExamination("bb-vectorize-debug-instruction-examination",
> + cl::init(false), cl::Hidden,
> + cl::desc("When debugging is enabled, output information on the"
> + " instruction-examination process"));
> +static cl::opt<bool>
> +DebugCandidateSelection("bb-vectorize-debug-candidate-selection",
> + cl::init(false), cl::Hidden,
> + cl::desc("When debugging is enabled, output information on the"
> + " candidate-selection process"));
> +static cl::opt<bool>
> +DebugPairSelection("bb-vectorize-debug-pair-selection",
> + cl::init(false), cl::Hidden,
> + cl::desc("When debugging is enabled, output information on the"
> + " pair-selection process"));
> +static cl::opt<bool>
> +DebugCycleCheck("bb-vectorize-debug-cycle-check",
> + cl::init(false), cl::Hidden,
> + cl::desc("When debugging is enabled, output information on the"
> + " cycle-checking process"));
> +#endif
> +
> +STATISTIC(NumFusedOps, "Number of operations fused by bb-vectorize");
> +
> +namespace {
> + struct BBVectorize : public BasicBlockPass {
> + static char ID; // Pass identification, replacement for typeid
> + BBVectorize() : BasicBlockPass(ID) {
> + initializeBBVectorizePass(*PassRegistry::getPassRegistry());
> + }
> +
> + typedef std::pair<Value *, Value *> ValuePair;
> + typedef std::pair<ValuePair, size_t> ValuePairWithDepth;
> + typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair
> + typedef std::pair<std::multimap<Value *, Value *>::iterator,
> + std::multimap<Value *, Value *>::iterator> VPIteratorPair;
> + typedef std::pair<std::multimap<ValuePair, ValuePair>::iterator,
> + std::multimap<ValuePair, ValuePair>::iterator>
> + VPPIteratorPair;
> +
> + AliasAnalysis *AA;
> + ScalarEvolution *SE;
> + TargetData *TD;
> +
> + // FIXME: const correct?
> +
> + bool vectorizePairs(BasicBlock&BB);
> +
> + void getCandidatePairs(BasicBlock&BB,
> + std::multimap<Value *, Value *> &CandidatePairs,
> + std::vector<Value *> &PairableInsts);
> +
> + void computeConnectedPairs(std::multimap<Value *, Value *> &CandidatePairs,
> + std::vector<Value *> &PairableInsts,
> + std::multimap<ValuePair, ValuePair> &ConnectedPairs);
> +
> + void buildDepMap(BasicBlock&BB,
> + std::multimap<Value *, Value *> &CandidatePairs,
> + std::vector<Value *> &PairableInsts,
> + DenseSet<ValuePair> &PairableInstUsers);
> +
> + void choosePairs(std::multimap<Value *, Value *> &CandidatePairs,
> + std::vector<Value *> &PairableInsts,
> + std::multimap<ValuePair, ValuePair> &ConnectedPairs,
> + DenseSet<ValuePair> &PairableInstUsers,
> + DenseMap<Value *, Value *>& ChosenPairs);
> +
> + void fuseChosenPairs(BasicBlock&BB,
> + std::vector<Value *> &PairableInsts,
> + DenseMap<Value *, Value *>& ChosenPairs);
> +
> + bool isInstVectorizable(Instruction *I, bool&IsSimpleLoadStore);
> +
> + bool areInstsCompatible(Instruction *I, Instruction *J,
> + bool IsSimpleLoadStore);
> +
> + bool trackUsesOfI(DenseSet<Value *> &Users,
> + AliasSetTracker&WriteSet, Instruction *I,
> + Instruction *J, bool UpdateUsers = true,
> + std::multimap<Value *, Value *> *LoadMoveSet = 0);
> +
> + void computePairsConnectedTo(
> + std::multimap<Value *, Value *> &CandidatePairs,
> + std::vector<Value *> &PairableInsts,
> + std::multimap<ValuePair, ValuePair> &ConnectedPairs,
> + ValuePair P);
> +
> + bool pairsConflict(ValuePair P, ValuePair Q,
> + DenseSet<ValuePair> &PairableInstUsers,
> + std::multimap<ValuePair, ValuePair> *PairableInstUserMap = 0);
> +
> + bool pairWillFormCycle(ValuePair P,
> + std::multimap<ValuePair, ValuePair> &PairableInstUsers,
> + DenseSet<ValuePair> &CurrentPairs);
> +
> + void pruneTreeFor(
> + std::multimap<Value *, Value *> &CandidatePairs,
> + std::vector<Value *> &PairableInsts,
> + std::multimap<ValuePair, ValuePair> &ConnectedPairs,
> + DenseSet<ValuePair> &PairableInstUsers,
> + std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
> + DenseMap<Value *, Value *> &ChosenPairs,
> + DenseMap<ValuePair, size_t> &Tree,
> + DenseSet<ValuePair> &PrunedTree, ValuePair J,
> + bool UseCycleCheck);
> +
> + void buildInitialTreeFor(
> + std::multimap<Value *, Value *> &CandidatePairs,
> + std::vector<Value *> &PairableInsts,
> + std::multimap<ValuePair, ValuePair> &ConnectedPairs,
> + DenseSet<ValuePair> &PairableInstUsers,
> + DenseMap<Value *, Value *> &ChosenPairs,
> + DenseMap<ValuePair, size_t> &Tree, ValuePair J);
> +
> + void findBestTreeFor(
> + std::multimap<Value *, Value *> &CandidatePairs,
> + std::vector<Value *> &PairableInsts,
> + std::multimap<ValuePair, ValuePair> &ConnectedPairs,
> + DenseSet<ValuePair> &PairableInstUsers,
> + std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
> + DenseMap<Value *, Value *> &ChosenPairs,
> + DenseSet<ValuePair> &BestTree, size_t&BestMaxDepth,
> + size_t&BestEffSize, VPIteratorPair ChoiceRange,
> + bool UseCycleCheck);
> +
> + Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I,
> + Instruction *J, unsigned o, bool&FlipMemInputs);
> +
> + void fillNewShuffleMask(LLVMContext& Context, Instruction *J,
> + unsigned NumElem, unsigned MaskOffset, unsigned NumInElem,
> + unsigned IdxOffset, std::vector<Constant*> &Mask);
> +
> + Value *getReplacementShuffleMask(LLVMContext& Context, Instruction *I,
> + Instruction *J);
> +
> + Value *getReplacementInput(LLVMContext& Context, Instruction *I,
> + Instruction *J, unsigned o, bool FlipMemInputs);
> +
> + void getReplacementInputsForPair(LLVMContext& Context, Instruction *I,
> + Instruction *J, SmallVector<Value *, 3> &ReplacedOperands,
> + bool&FlipMemInputs);
> +
> + void replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
> + Instruction *J, Instruction *K,
> + Instruction *&InsertionPt, Instruction *&K1,
> + Instruction *&K2, bool&FlipMemInputs);
> +
> + void collectPairLoadMoveSet(BasicBlock&BB,
> + DenseMap<Value *, Value *> &ChosenPairs,
> + std::multimap<Value *, Value *> &LoadMoveSet,
> + Instruction *I);
> +
> + void collectLoadMoveSet(BasicBlock&BB,
> + std::vector<Value *> &PairableInsts,
> + DenseMap<Value *, Value *> &ChosenPairs,
> + std::multimap<Value *, Value *> &LoadMoveSet);
> +
> + bool canMoveUsesOfIAfterJ(BasicBlock&BB,
> + std::multimap<Value *, Value *> &LoadMoveSet,
> + Instruction *I, Instruction *J);
> +
> + void moveUsesOfIAfterJ(BasicBlock&BB,
> + std::multimap<Value *, Value *> &LoadMoveSet,
> + Instruction *&InsertionPt,
> + Instruction *I, Instruction *J);
> +
> + virtual bool runOnBasicBlock(BasicBlock&BB) {
> + AA =&getAnalysis<AliasAnalysis>();
> + SE =&getAnalysis<ScalarEvolution>();
> + TD = getAnalysisIfAvailable<TargetData>();
> +
> + bool changed = false;
> + // Iterate a sufficient number of times to merge types of size 1 bit,
> + // then 2 bits, then 4, etc. up to half of the target vector width of the
> + // target vector register.
> + for (unsigned v = 2, n = 1; v<= VectorBits&& (!MaxIter || n<= MaxIter);
> + v *= 2, ++n) {
> + DEBUG(dbgs()<< "BBV: fusing loop #"<< n<<
> + " for "<< BB.getName()<< " in "<<
> + BB.getParent()->getName()<< "...\n");
> + if (vectorizePairs(BB))
> + changed = true;
> + else
> + break;
> + }
> +
> + DEBUG(dbgs()<< "BBV: done!\n");
> + return changed;
> + }
> +
> + virtual void getAnalysisUsage(AnalysisUsage&AU) const {
Does this pass mutate the CFG (ie., modify terminator instructions)? I
don't see where it does, so AU.setPreservesCFG() should be here?
> + BasicBlockPass::getAnalysisUsage(AU);
> + AU.addRequired<AliasAnalysis>();
> + AU.addRequired<ScalarEvolution>();
> + AU.addPreserved<AliasAnalysis>();
> + AU.addPreserved<ScalarEvolution>();
> + }
> +
> + // This returns the vector type that holds a pair of the provided type.
> + // If the provided type is already a vector, then its length is doubled.
> + static inline VectorType *getVecTypeForPair(Type *ElemTy) {
> + if (VectorType *VTy = dyn_cast<VectorType>(ElemTy)) {
> + unsigned numElem = VTy->getNumElements();
> + return VectorType::get(ElemTy->getScalarType(), numElem*2);
> + } else {
No else-after-return.
http://llvm.org/docs/CodingStandards.html#hl_else_after_return
> + return VectorType::get(ElemTy, 2);
> + }
> + }
> +
> + // Returns the weight associated with the provided value. A chain of
> + // candidate pairs has a length given by the sum of the weights of its
> + // members (one weight per pair; the weight of each member of the pair
> + // is assumed to be the same). This length is then compared to the
> + // chain-length threshold to determine if a given chain is significant
> + // enough to be vectorized. The length is also used in comparing
> + // candidate chains where longer chains are considered to be better.
> + // Note: when this function returns 0, the resulting instructions are
> + // not actually fused.
> + static inline size_t getDepthFactor(Value *V) {
> + // InsertElement and ExtractElement have a depth factor of zero. This is
> + // for two reasons: First, they cannot be usefully fused. Second, because
> + // the pass generates a lot of these, they can confuse the simple metric
> + // used to compare the trees in the next iteration. Thus, giving them a
> + // weight of zero allows the pass to essentially ignore them in
> + // subsequent iterations when looking for vectorization opportunities
> + // while still tracking dependency chains that flow through those
> + // instructions.
> + if (isa<InsertElementInst>(V) || isa<ExtractElementInst>(V))
> + return 0;
> +
> + return 1;
> + }
> +
> + // This determines the relative offset of two loads or stores, returning
> + // true if the offset could be determined to be some constant value.
> + // For example, if OffsetInElmts == 1, then J accesses the memory directly
> + // after I; if OffsetInElmts == -1 then I accesses the memory
> + // directly after J. This function assumes that both instructions
> + // have the same type.
> + bool getPairPtrInfo(Instruction *I, Instruction *J,
> + Value *&IPtr, Value *&JPtr, unsigned&IAlignment, unsigned&JAlignment,
> + int64_t&OffsetInElmts) {
> + OffsetInElmts = 0;
> + if (isa<LoadInst>(I)) {
> + IPtr = cast<LoadInst>(I)->getPointerOperand();
> + JPtr = cast<LoadInst>(J)->getPointerOperand();
> + IAlignment = cast<LoadInst>(I)->getAlignment();
> + JAlignment = cast<LoadInst>(J)->getAlignment();
> + } else {
> + IPtr = cast<StoreInst>(I)->getPointerOperand();
> + JPtr = cast<StoreInst>(J)->getPointerOperand();
> + IAlignment = cast<StoreInst>(I)->getAlignment();
> + JAlignment = cast<StoreInst>(J)->getAlignment();
> + }
> +
> + const SCEV *IPtrSCEV = SE->getSCEV(IPtr);
> + const SCEV *JPtrSCEV = SE->getSCEV(JPtr);
> +
> + // If this is a trivial offset, then we'll get something like
> + // 1*sizeof(type). With target data, which we need anyway, this will get
> + // constant folded into a number.
> + const SCEV *OffsetSCEV = SE->getMinusSCEV(JPtrSCEV, IPtrSCEV);
> + if (const SCEVConstant *ConstOffSCEV =
> + dyn_cast<SCEVConstant>(OffsetSCEV)) {
> + ConstantInt *IntOff = ConstOffSCEV->getValue();
> + int64_t Offset = IntOff->getSExtValue();
> +
> + Type *VTy = cast<PointerType>(IPtr->getType())->getElementType();
> + int64_t VTyTSS = (int64_t) TD->getTypeStoreSize(VTy);
> +
> + assert(VTy == cast<PointerType>(JPtr->getType())->getElementType());
> +
> + OffsetInElmts = Offset/VTyTSS;
> + return (abs64(Offset) % VTyTSS) == 0;
> + }
> +
> + return false;
> + }
> +
> + // Returns true if the provided CallInst represents an intrinsic that can
> + // be vectorized.
> + bool isVectorizableIntrinsic(CallInst* I) {
> + Function *F = I->getCalledFunction();
> + if (!F) return false;
> +
> + unsigned IID = F->getIntrinsicID();
> + if (!IID) return false;
> +
> + switch(IID) {
> + default:
> + return false;
> + case Intrinsic::sqrt:
> + case Intrinsic::powi:
> + case Intrinsic::sin:
> + case Intrinsic::cos:
> + case Intrinsic::log:
> + case Intrinsic::log2:
> + case Intrinsic::log10:
> + case Intrinsic::exp:
> + case Intrinsic::exp2:
> + case Intrinsic::pow:
> + return !NoMath;
> + case Intrinsic::fma:
> + return !NoFMA;
> + }
> + }
> +
> + // Returns true if J is the second element in some pair referenced by
> + // some multimap pair iterator pair.
> + template<typename V>
> + bool isSecondInIteratorPair(V J, std::pair<
> + typename std::multimap<V, V>::iterator,
> + typename std::multimap<V, V>::iterator> PairRange) {
> + for (typename std::multimap<V, V>::iterator K = PairRange.first;
> + K != PairRange.second; ++K)
> + if (K->second == J) return true;
> +
> + return false;
> + }
> + };
> +
> + // This function implements one vectorization iteration on the provided
> + // basic block. It returns true if the block is changed.
> + bool BBVectorize::vectorizePairs(BasicBlock&BB) {
> + std::vector<Value *> PairableInsts;
> + std::multimap<Value *, Value *> CandidatePairs;
> + getCandidatePairs(BB, CandidatePairs, PairableInsts);
> + if (PairableInsts.size() == 0) return false;
> +
> + // Now we have a map of all of the pairable instructions and we need to
> + // select the best possible pairing. A good pairing is one such that the
> + // users of the pair are also paired. This defines a (directed) forest
> + // over the pairs such that two pairs are connected iff the second pair
> + // uses the first.
> +
> + // Note that it only matters that both members of the second pair use some
> + // element of the first pair (to allow for splatting).
> +
> + std::multimap<ValuePair, ValuePair> ConnectedPairs;
> + computeConnectedPairs(CandidatePairs, PairableInsts, ConnectedPairs);
> + if (ConnectedPairs.size() == 0) return false;
ConnectedPairs.empty()
> +
> + // Build the pairable-instruction dependency map
> + DenseSet<ValuePair> PairableInstUsers;
> + buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers);
> +
> + // There is now a graph of the connected pairs. For each variable, pick the
> + // pairing with the largest tree meeting the depth requirement on at least
> + // one branch. Then select all pairings that are part of that tree and
> + // remove them from the list of available pairings and pairable variables.
> +
> + DenseMap<Value *, Value *> ChosenPairs;
> + choosePairs(CandidatePairs, PairableInsts, ConnectedPairs,
> + PairableInstUsers, ChosenPairs);
> +
> + if (ChosenPairs.size() == 0) return false;
ChosenPairs.empty()
> + NumFusedOps += ChosenPairs.size();
> +
> + // A set of pairs has now been selected. It is now necessary to replace the
> + // paired instructions with vector instructions. For this procedure each
> + // operand much be replaced with a vector operand. This vector is formed
> + // by using build_vector on the old operands. The replaced values are then
> + // replaced with a vector_extract on the result. Subsequent optimization
> + // passes should coalesce the build/extract combinations.
> +
> + fuseChosenPairs(BB, PairableInsts, ChosenPairs);
> +
> + return true;
> + }
> +
> + // This function returns true if the provided instruction is capable of being
> + // fused into a vector instruction. This determination is based only on the
> + // type and other attributes of the instruction.
> + bool BBVectorize::isInstVectorizable(Instruction *I,
> + bool&IsSimpleLoadStore) {
> + IsSimpleLoadStore = false;
> +
> + if (CallInst *C = dyn_cast<CallInst>(I)) {
> + if (!isVectorizableIntrinsic(C))
> + return false;
> + } else if (LoadInst *L = dyn_cast<LoadInst>(I)) {
> + // Vectorize simple loads if possbile:
> + IsSimpleLoadStore = L->isSimple();
> + if (!IsSimpleLoadStore || NoMemOps)
> + return false;
> + } else if (StoreInst *S = dyn_cast<StoreInst>(I)) {
> + // Vectorize simple stores if possbile:
> + IsSimpleLoadStore = S->isSimple();
> + if (!IsSimpleLoadStore || NoMemOps)
> + return false;
> + } else if (CastInst *C = dyn_cast<CastInst>(I)) {
> + // We can vectorize casts, but not casts of pointer types, etc.
> + if (NoCasts)
> + return false;
> +
> + Type *SrcTy = C->getSrcTy();
> + if (!SrcTy->isSingleValueType() || SrcTy->isPointerTy())
> + return false;
> +
> + Type *DestTy = C->getDestTy();
> + if (!DestTy->isSingleValueType() || DestTy->isPointerTy())
> + return false;
> + } else if (!(I->isBinaryOp() || isa<ShuffleVectorInst>(I) ||
> + isa<ExtractElementInst>(I) || isa<InsertElementInst>(I))) {
> + return false;
> + }
> +
> + // We can't vectorize memory operations without target data
> + if (TD == 0&& IsSimpleLoadStore)
> + return false;
> +
> + Type *T1, *T2;
> + if (isa<StoreInst>(I)) {
> + // For stores, it is the value type, not the pointer type that matters
> + // because the value is what will come from a vector register.
> +
> + Value *IVal = cast<StoreInst>(I)->getValueOperand();
> + T1 = IVal->getType();
> + } else {
> + T1 = I->getType();
> + }
> +
> + if (I->isCast())
> + T2 = cast<CastInst>(I)->getSrcTy();
> + else
> + T2 = T1;
> +
> + // Not every type can be vectorized...
> + if (!(VectorType::isValidElementType(T1) || T1->isVectorTy()) ||
> + !(VectorType::isValidElementType(T2) || T2->isVectorTy()))
> + return false;
> +
> + if (NoInts&& (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy()))
> + return false;
> +
> + if (NoFloats&& (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy()))
> + return false;
> +
> + if (T1->getPrimitiveSizeInBits()> VectorBits/2 ||
> + T2->getPrimitiveSizeInBits()> VectorBits/2)
> + return false;
> +
> + return true;
> + }
> +
> + // This function returns true if the two provided instructions are compatible
> + // (meaning that they can be fused into a vector instruction). This assumes
> + // that I has already been determined to be vectorizable and that J is not
> + // in the use tree of I.
> + bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J,
> + bool IsSimpleLoadStore) {
> + DEBUG(if (DebugInstructionExamination) dbgs()<< "BBV: looking at "<< *I<<
> + "<-> "<< *J<< "\n");
> +
> + // Loads and stores can be merged if they have different alignments,
> + // but are otherwise the same.
> + LoadInst *LI, *LJ;
> + StoreInst *SI, *SJ;
> + if ((LI = dyn_cast<LoadInst>(I))&& (LJ = dyn_cast<LoadInst>(J))) {
> + if (I->getType() != J->getType())
> + return false;
> +
> + if (LI->getPointerOperand()->getType() !=
> + LJ->getPointerOperand()->getType() ||
> + LI->isVolatile() != LJ->isVolatile() ||
You don't combine two separate volatile loads, do you? That sounds bad.
I'm also not sure about merging two atomic load/stores...
> + LI->getOrdering() != LJ->getOrdering() ||
> + LI->getSynchScope() != LJ->getSynchScope())
> + return false;
> + } else if ((SI = dyn_cast<StoreInst>(I))&& (SJ = dyn_cast<StoreInst>(J))) {
> + if (SI->getValueOperand()->getType() !=
> + SJ->getValueOperand()->getType() ||
> + SI->getPointerOperand()->getType() !=
> + SJ->getPointerOperand()->getType() ||
> + SI->isVolatile() != SJ->isVolatile() ||
> + SI->getOrdering() != SJ->getOrdering() ||
> + SI->getSynchScope() != SJ->getSynchScope())
> + return false;
> + } else if (!J->isSameOperationAs(I)) {
> + return false;
> + }
> + // FIXME: handle addsub-type operations!
> +
> + if (IsSimpleLoadStore) {
> + Value *IPtr, *JPtr;
> + unsigned IAlignment, JAlignment;
> + int64_t OffsetInElmts = 0;
> + if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
> + OffsetInElmts)&& abs64(OffsetInElmts) == 1) {
> + if (AlignedOnly) {
> + Type *aType = isa<StoreInst>(I) ?
> + cast<StoreInst>(I)->getValueOperand()->getType() : I->getType();
> + // An aligned load or store is possible only if the instruction
> + // with the lower offset has an alignment suitable for the
> + // vector type.
> +
> + unsigned BottomAlignment = IAlignment;
> + if (OffsetInElmts< 0) BottomAlignment = JAlignment;
> +
> + Type *VType = getVecTypeForPair(aType);
> + unsigned VecAlignment = TD->getPrefTypeAlignment(VType);
> + if (BottomAlignment< VecAlignment)
> + return false;
> + }
> + } else {
> + return false;
> + }
> + } else if (isa<ShuffleVectorInst>(I)) {
> + // Only merge two shuffles if they're both constant
> + return isa<Constant>(I->getOperand(2))&&
> + isa<Constant>(J->getOperand(2));
> + // FIXME: We may want to vectorize non-constant shuffles also.
> + }
> +
> + return true;
> + }
> +
> + // Figure out whether or not J uses I and update the users and write-set
> + // structures associated with I. Specifically, Users represents the set of
> + // instructions that depend on I. WriteSet represents the set
> + // of memory locations that are dependent on I. If UpdateUsers is true,
> + // and J uses I, then Users is updated to contain J and WriteSet is updated
> + // to contain any memory locations to which J writes. The function returns
> + // true if J uses I. By default, alias analysis is used to determine
> + // whether J reads from memory that overlaps with a location in WriteSet.
> + // If LoadMoveSet is not null, then it is a previously-computed multimap
> + // where the key is the memory-based user instruction and the value is
> + // the instruction to be compared with I. So, if LoadMoveSet is provided,
> + // then the alias analysis is not used. This is necessary because this
> + // function is called during the process of moving instructions during
> + // vectorization and the results of the alias analysis are not stable during
> + // that process.
> + bool BBVectorize::trackUsesOfI(DenseSet<Value *> &Users,
> + AliasSetTracker&WriteSet, Instruction *I,
> + Instruction *J, bool UpdateUsers,
> + std::multimap<Value *, Value *> *LoadMoveSet) {
> + bool UsesI = false;
> +
> + // This instruction may already be marked as a user due, for example, to
> + // being a member of a selected pair.
> + if (Users.count(J))
> + UsesI = true;
> +
> + if (!UsesI)
> + for (User::op_iterator JU = J->op_begin(), e = J->op_end();
> + JU != e; ++JU) {
This is correct, but it's common to say "JU = ..., JE = ..." for
consistency.
> + Value *V = *JU;
> + if (I == V || Users.count(V)) {
> + UsesI = true;
> + break;
> + }
> + }
> + if (!UsesI&& J->mayReadFromMemory()) {
> + if (LoadMoveSet) {
> + VPIteratorPair JPairRange = LoadMoveSet->equal_range(J);
> + UsesI = isSecondInIteratorPair<Value*>(I, JPairRange);
> + } else {
> + for (AliasSetTracker::iterator W = WriteSet.begin(),
> + WE = WriteSet.end(); W != WE; ++W) {
> + for (AliasSet::iterator A = W->begin(), AE = W->end();
> + A != AE; ++A) {
> + AliasAnalysis::Location ptrLoc(A->getValue(), A->getSize(),
> + A->getTBAAInfo());
> + if (AA->getModRefInfo(J, ptrLoc) != AliasAnalysis::NoModRef) {
> + UsesI = true;
> + break;
> + }
> + }
> + if (UsesI) break;
> + }
> + }
> + }
> +
> + if (UsesI&& UpdateUsers) {
> + if (J->mayWriteToMemory()) WriteSet.add(J);
> + Users.insert(J);
> + }
> +
> + return UsesI;
> + }
> +
> + // This function iterates over all instruction pairs in the provided
> + // basic block and collects all candidate pairs for vectorization.
> + void BBVectorize::getCandidatePairs(BasicBlock&BB,
> + std::multimap<Value *, Value *> &CandidatePairs,
> + std::vector<Value *> &PairableInsts) {
> + BasicBlock::iterator E = BB.end();
> + for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) {
> + bool IsSimpleLoadStore;
> + if (!isInstVectorizable(I, IsSimpleLoadStore)) continue;
> +
> + // Look for an instruction with which to pair instruction *I...
> + DenseSet<Value *> Users;
> + AliasSetTracker WriteSet(*AA);
> + BasicBlock::iterator J = I; ++J;
> + for (unsigned ss = 0; J != E&& ss<= SearchLimit; ++J, ++ss) {
> + // Determine if J uses I, if so, exit the loop.
> + bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !FastDep);
> + if (FastDep) {
> + // Note: For this heuristic to be effective, independent operations
> + // must tend to be intermixed. This is likely to be true from some
> + // kinds of grouped loop unrolling (but not the generic LLVM pass),
> + // but otherwise may require some kind of reordering pass.
> +
> + // When using fast dependency analysis,
> + // stop searching after first use:
> + if (UsesI) break;
> + } else {
> + if (UsesI) continue;
> + }
> +
> + // J does not use I, and comes before the first use of I, so it can be
> + // merged with I if the instructions are compatible.
> + if (!areInstsCompatible(I, J, IsSimpleLoadStore)) continue;
> +
> + // J is a candidate for merging with I.
> + if (!PairableInsts.size() ||
> + PairableInsts[PairableInsts.size()-1] != I) {
> + PairableInsts.push_back(I);
> + }
> + CandidatePairs.insert(ValuePair(I, J));
> + DEBUG(if (DebugCandidateSelection) dbgs()<< "BBV: candidate pair"
> +<< *I<< "<-> "<< *J<< "\n");
> + }
> + }
> +
> + DEBUG(dbgs()<< "BBV: found "<< PairableInsts.size()
> +<< " instructions with candidate pairs\n");
> + }
> +
> + // Finds candidate pairs connected to the pair P =<PI, PJ>. This means that
> + // it looks for pairs such that both members have an input which is an
> + // output of PI or PJ.
> + void BBVectorize::computePairsConnectedTo(
> + std::multimap<Value *, Value *> &CandidatePairs,
> + std::vector<Value *> &PairableInsts,
> + std::multimap<ValuePair, ValuePair> &ConnectedPairs,
> + ValuePair P) {
> + // For each possible pairing for this variable, look at the uses of
> + // the first value...
> + for (Value::use_iterator I = P.first->use_begin(),
> + E = P.first->use_end(); I != E; ++I) {
> + VPIteratorPair IPairRange = CandidatePairs.equal_range(*I);
> +
> + // For each use of the first variable, look for uses of the second
> + // variable...
> + for (Value::use_iterator J = P.second->use_begin(),
> + E2 = P.second->use_end(); J != E2; ++J) {
> + VPIteratorPair JPairRange = CandidatePairs.equal_range(*J);
> +
> + // Look for<I, J>:
> + if (isSecondInIteratorPair<Value*>(*J, IPairRange))
> + ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J)));
> +
> + // Look for<J, I>:
> + if (isSecondInIteratorPair<Value*>(*I, JPairRange))
> + ConnectedPairs.insert(VPPair(P, ValuePair(*J, *I)));
> + }
> +
> + if (SplatBreaksChain) continue;
> + // Look for cases where just the first value in the pair is used by
> + // both members of another pair (splatting).
> + for (Value::use_iterator J = P.first->use_begin(); J != E; ++J) {
> + if (isSecondInIteratorPair<Value*>(*J, IPairRange))
> + ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J)));
> + }
> + }
> +
> + if (SplatBreaksChain) return;
> + // Look for cases where just the second value in the pair is used by
> + // both members of another pair (splatting).
> + for (Value::use_iterator I = P.second->use_begin(),
> + E = P.second->use_end(); I != E; ++I) {
> + VPIteratorPair IPairRange = CandidatePairs.equal_range(*I);
> +
> + for (Value::use_iterator J = P.second->use_begin(); J != E; ++J) {
> + if (isSecondInIteratorPair<Value*>(*J, IPairRange))
> + ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J)));
> + }
> + }
> + }
> +
> + // This function figures out which pairs are connected. Two pairs are
> + // connected if some output of the first pair forms an input to both members
> + // of the second pair.
> + void BBVectorize::computeConnectedPairs(
> + std::multimap<Value *, Value *> &CandidatePairs,
> + std::vector<Value *> &PairableInsts,
> + std::multimap<ValuePair, ValuePair> &ConnectedPairs) {
> +
> + for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
> + PE = PairableInsts.end(); PI != PE; ++PI) {
> + VPIteratorPair choiceRange = CandidatePairs.equal_range(*PI);
> +
> + for (std::multimap<Value *, Value *>::iterator P = choiceRange.first;
> + P != choiceRange.second; ++P)
> + computePairsConnectedTo(CandidatePairs, PairableInsts,
> + ConnectedPairs, *P);
> + }
> +
> + DEBUG(dbgs()<< "BBV: found "<< ConnectedPairs.size()
> +<< " pair connections.\n");
> + }
> +
> + // This function builds a set of use tuples such that<A, B> is in the set
> + // if B is in the use tree of A. If B is in the use tree of A, then B
> + // depends on the output of A.
> + void BBVectorize::buildDepMap(
> + BasicBlock&BB,
> + std::multimap<Value *, Value *> &CandidatePairs,
> + std::vector<Value *> &PairableInsts,
> + DenseSet<ValuePair> &PairableInstUsers) {
> + DenseSet<Value *> IsInPair;
> + for (std::multimap<Value *, Value *>::iterator C = CandidatePairs.begin(),
> + E = CandidatePairs.end(); C != E; ++C) {
> + IsInPair.insert(C->first);
> + IsInPair.insert(C->second);
> + }
> +
> + // Iterate through the basic block, recording all Users of each
> + // pairable instruction.
> +
> + BasicBlock::iterator E = BB.end();
> + for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) {
"for (...; !isa<TerminatorInst>(I); ++I) {" should also work, and avoid
the need to declare 'E' above.
> + if (IsInPair.find(I) == IsInPair.end()) continue;
> +
> + DenseSet<Value *> Users;
> + AliasSetTracker WriteSet(*AA);
> + for (BasicBlock::iterator J = llvm::next(I); J != E; ++J)
> + (void) trackUsesOfI(Users, WriteSet, I, J);
> +
> + for (DenseSet<Value *>::iterator U = Users.begin(), E = Users.end();
> + U != E; ++U)
> + PairableInstUsers.insert(ValuePair(I, *U));
> + }
> + }
> +
> + // Returns true if an input to pair P is an output of pair Q and also an
> + // input of pair Q is an output of pair P. If this is the case, then these
> + // two pairs cannot be simultaneously fused.
> + bool BBVectorize::pairsConflict(ValuePair P, ValuePair Q,
> + DenseSet<ValuePair> &PairableInstUsers,
> + std::multimap<ValuePair, ValuePair> *PairableInstUserMap) {
> + // Two pairs are in conflict if they are mutual Users of eachother.
> + bool QUsesP = PairableInstUsers.count(ValuePair(P.first, Q.first)) ||
> + PairableInstUsers.count(ValuePair(P.first, Q.second)) ||
> + PairableInstUsers.count(ValuePair(P.second, Q.first)) ||
> + PairableInstUsers.count(ValuePair(P.second, Q.second));
> + bool PUsesQ = PairableInstUsers.count(ValuePair(Q.first, P.first)) ||
> + PairableInstUsers.count(ValuePair(Q.first, P.second)) ||
> + PairableInstUsers.count(ValuePair(Q.second, P.first)) ||
> + PairableInstUsers.count(ValuePair(Q.second, P.second));
> + if (PairableInstUserMap) {
> + // FIXME: The expensive part of the cycle check is not so much the cycle
> + // check itself but this edge insertion procedure. This needs some
> + // profiling and probably a different data structure (same is true of
> + // most uses of std::multimap).
> + if (PUsesQ) {
> + VPPIteratorPair QPairRange = PairableInstUserMap->equal_range(Q);
> + if (!isSecondInIteratorPair(P, QPairRange))
> + PairableInstUserMap->insert(VPPair(Q, P));
> + }
> + if (QUsesP) {
> + VPPIteratorPair PPairRange = PairableInstUserMap->equal_range(P);
> + if (!isSecondInIteratorPair(Q, PPairRange))
> + PairableInstUserMap->insert(VPPair(P, Q));
> + }
> + }
> +
> + return (QUsesP&& PUsesQ);
> + }
> +
> + // This function walks the use graph of current pairs to see if, starting
> + // from P, the walk returns to P.
> + bool BBVectorize::pairWillFormCycle(ValuePair P,
> + std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
> + DenseSet<ValuePair> &CurrentPairs) {
> + DEBUG(if (DebugCycleCheck)
> + dbgs()<< "BBV: starting cycle check for : "<< *P.first<< "<-> "
> +<< *P.second<< "\n");
> + // A lookup table of visisted pairs is kept because the PairableInstUserMap
> + // contains non-direct associations.
> + DenseSet<ValuePair> Visited;
> + std::vector<ValuePair> Q;
> + // General depth-first post-order traversal:
> + Q.push_back(P);
> + while (!Q.empty()) {
This is always true on the first iteration. Please make this a:
SmallVector<ValuePair, 32> Q;
Q.push_back(P);
do {
ValuePair QTop = Q.pop_back_val();
Visited.insert(QTop);
// ...
} while(!Q.empty());
loop.
> + ValuePair QTop = Q.back();
> +
> + Visited.insert(QTop);
> + Q.pop_back();
> +
> + DEBUG(if (DebugCycleCheck)
> + dbgs()<< "BBV: cycle check visiting: "<< *QTop.first<< "<-> "
> +<< *QTop.second<< "\n");
> + VPPIteratorPair QPairRange = PairableInstUserMap.equal_range(QTop);
> + for (std::multimap<ValuePair, ValuePair>::iterator C = QPairRange.first;
> + C != QPairRange.second; ++C) {
> + if (C->second == P) {
> + DEBUG(dbgs()
> +<< "BBV: rejected to prevent non-trivial cycle formation:"
> +<< *C->first.first<< "<-> "<< *C->first.second<< "\n");
> + return true;
> + }
> +
> + if (CurrentPairs.count(C->second)> 0&&
> + Visited.count(C->second) == 0)
> + Q.push_back(C->second);
> + }
> + }
> +
> + return false;
> + }
> +
> + // This function builds the initial tree of connected pairs with the
> + // pair J at the root.
> + void BBVectorize::buildInitialTreeFor(
> + std::multimap<Value *, Value *> &CandidatePairs,
> + std::vector<Value *> &PairableInsts,
> + std::multimap<ValuePair, ValuePair> &ConnectedPairs,
> + DenseSet<ValuePair> &PairableInstUsers,
> + DenseMap<Value *, Value *> &ChosenPairs,
> + DenseMap<ValuePair, size_t> &Tree, ValuePair J) {
> + // Each of these pairs is viewed as the root node of a Tree. The Tree
> + // is then walked (depth-first). As this happens, we keep track of
> + // the pairs that compose the Tree and the maximum depth of the Tree.
> + std::vector<ValuePairWithDepth> Q;
> + // General depth-first post-order traversal:
> + Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
> + while (!Q.empty()) {
> + ValuePairWithDepth QTop = Q.back();
This loop can be rotated too, though you may not want to switch to using
pop_back_val() here (I see that you do additional pushes and optional
pops in the loop).
> +
> + // Push each child onto the queue:
> + bool MoreChildren = false;
> + size_t MaxChildDepth = QTop.second;
> + VPPIteratorPair qtRange = ConnectedPairs.equal_range(QTop.first);
> + for (std::map<ValuePair, ValuePair>::iterator k = qtRange.first;
> + k != qtRange.second; ++k) {
> + // Make sure that this child pair is still a candidate:
> + bool IsStillCand = false;
> + VPIteratorPair checkRange =
> + CandidatePairs.equal_range(k->second.first);
> + for (std::multimap<Value *, Value *>::iterator m = checkRange.first;
> + m != checkRange.second; ++m) {
> + if (m->second == k->second.second) {
> + IsStillCand = true;
> + break;
> + }
> + }
> +
> + if (IsStillCand) {
> + DenseMap<ValuePair, size_t>::iterator C = Tree.find(k->second);
> + if (C == Tree.end()) {
> + size_t d = getDepthFactor(k->second.first);
> + Q.push_back(ValuePairWithDepth(k->second, QTop.second+d));
> + MoreChildren = true;
> + } else {
> + MaxChildDepth = std::max(MaxChildDepth, C->second);
> + }
> + }
> + }
> +
> + if (!MoreChildren) {
> + // Record the current pair as part of the Tree:
> + Tree.insert(ValuePairWithDepth(QTop.first, MaxChildDepth));
> + Q.pop_back();
> + }
> + }
> + }
> +
> + // Given some initial tree, prune it by removing conflicting pairs (pairs
> + // that cannot be simultaneously chosen for vectorization).
> + void BBVectorize::pruneTreeFor(
> + std::multimap<Value *, Value *> &CandidatePairs,
> + std::vector<Value *> &PairableInsts,
> + std::multimap<ValuePair, ValuePair> &ConnectedPairs,
> + DenseSet<ValuePair> &PairableInstUsers,
> + std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
> + DenseMap<Value *, Value *> &ChosenPairs,
> + DenseMap<ValuePair, size_t> &Tree,
> + DenseSet<ValuePair> &PrunedTree, ValuePair J,
> + bool UseCycleCheck) {
> + std::vector<ValuePairWithDepth> Q;
> + // General depth-first post-order traversal:
> + Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
> + while (!Q.empty()) {
> + ValuePairWithDepth QTop = Q.back();
> + PrunedTree.insert(QTop.first);
> + Q.pop_back();
Another loop to restructure.
(Stopped reviewing at this point.)
Nick
More information about the llvm-commits
mailing list