[llvm-commits] [Review Request][Patch]Introduce the VectorizeConfig class.
Hal Finkel
hfinkel at anl.gov
Thu Apr 5 08:36:50 PDT 2012
On Thu, 5 Apr 2012 23:24:35 +0800
Hongbin Zheng <etherzhhb at gmail.com> wrote:
> On Thu, Apr 5, 2012 at 11:23 PM, Hongbin Zheng <etherzhhb at gmail.com>
> wrote:
> > hi,
> >
> > The update to date patch attached, please review :)
> >
> > best regards
> > ether
> >
> > ---
> > include/llvm/Transforms/Vectorize.h | 68
> > ++++++++++++++++++++++- lib/Transforms/Vectorize/BBVectorize.cpp |
> > 90 +++++++++++++++++++----------- 2 files changed, 124
> > insertions(+), 34 deletions(-)
> >
> > diff --git a/include/llvm/Transforms/Vectorize.h
> > b/include/llvm/Transforms/Vectorize.h
> > index ad06937..80ccf0c 100644
> > --- a/include/llvm/Transforms/Vectorize.h
> > +++ b/include/llvm/Transforms/Vectorize.h
> > @@ -20,10 +20,73 @@ class BasicBlock;
> > class BasicBlockPass;
> >
> > //===----------------------------------------------------------------------===//
> > +/// @brief Vectorize configuration.
> > +struct VectorizeConfig {
> > +
> > //===--------------------------------------------------------------------===//
> > + // Target architecture related parameters
> > +
> > + /// @brief The size of the native vector registers.
> > + unsigned VectorBits;
> > +
> > + /// @brief Don't try to vectorize integer values.
> > + bool NoInts;
> > +
> > + /// @brief Don't try to vectorize floating-point values.
> > + bool NoFloats;
> > +
> > + /// @brief Don't try to vectorize casting (conversion)
> > operations.
> > + bool NoCasts;
> > +
> > + /// @brief Don't try to vectorize floating-point math intrinsics.
> > + bool NoMath;
> > +
> > + /// @brief Don't try to vectorize the fused-multiply-add
> > intrinsic.
> > + bool NoFMA;
> > +
> > + /// @brief Don't try to vectorize loads and stores.
> > + bool NoMemOps;
> > +
> > + /// @brief Only generate aligned loads and stores.
> > + bool AlignedOnly;
> > +
> > +
> > //===--------------------------------------------------------------------===//
> > + // Misc parameters
> > +
> > + /// @brief The required chain depth for vectorization.
> > + unsigned ReqChainDepth;
> > +
> > + /// @brief The maximum search distance for instruction pairs.
> > + unsigned SearchLimit;
> > +
> > + /// @brief The maximum number of candidate pairs with which to
> > use a full
> > + /// cycle check.
> > + unsigned MaxCandPairsForCycleCheck;
> > +
> > + /// @brief Replicating one element to a pair breaks the chain.
> > + bool SplatBreaksChain;
> > +
> > + /// @brief The maximum number of pairable instructions per group.
> > + unsigned MaxInsts;
> > +
> > + /// @brief The maximum number of pairing iterations.
> > + unsigned MaxIter;
> > +
> > + /// @brief Don't boost the chain-depth contribution of loads and
> > stores.
> > + bool NoMemOpBoost;
> > +
> > + /// @brief Use a fast instruction dependency analysis.
> > + bool FastDep;
> > +
> > + /// @brief Initialize the VectorizeConfig from command line
> > options.
> > + VectorizeConfig();
> > +};
> > +
> > +//===----------------------------------------------------------------------===//
> > //
> > // BBVectorize - A basic-block vectorization pass.
> > //
> > -BasicBlockPass *createBBVectorizePass();
> > +BasicBlockPass *
> > +createBBVectorizePass(const VectorizeConfig &C =
> > VectorizeConfig());
> >
> > //===----------------------------------------------------------------------===//
> > /// @brief Vectorize the BasicBlock.
> > @@ -35,7 +98,8 @@ BasicBlockPass *createBBVectorizePass();
> > ///
> > /// @return True if the BB is changed, false otherwise.
> > ///
> > -bool vectorizeBasicBlock(Pass *P, BasicBlock &BB);
> > +bool vectorizeBasicBlock(Pass *P, BasicBlock &BB,
> > + VectorizeConfig C = VectorizeConfig());
> sorry, should be "const" too
Right.
> >
> > } // End llvm namespace
> >
> > diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp
> > b/lib/Transforms/Vectorize/BBVectorize.cpp
> > index 5abb242..a9d09fd 100644
> > --- a/lib/Transforms/Vectorize/BBVectorize.cpp
> > +++ b/lib/Transforms/Vectorize/BBVectorize.cpp
> > @@ -140,11 +140,15 @@ STATISTIC(NumFusedOps, "Number of operations
> > fused by bb-vectorize");
> > namespace {
> > struct BBVectorize : public BasicBlockPass {
> > static char ID; // Pass identification, replacement for typeid
> > - BBVectorize() : BasicBlockPass(ID) {
> > +
> > + VectorizeConfig Config;
> > +
> > + BBVectorize(VectorizeConfig C = VectorizeConfig())
> > + : BasicBlockPass(ID), Config(C) {
> > initializeBBVectorizePass(*PassRegistry::getPassRegistry());
> > }
> >
> > - BBVectorize(Pass *P) : BasicBlockPass(ID) {
> > + BBVectorize(Pass *P, VectorizeConfig C) : BasicBlockPass(ID),
> > Config(C) { AA = &P->getAnalysis<AliasAnalysis>();
> > SE = &P->getAnalysis<ScalarEvolution>();
> > TD = P->getAnalysisIfAvailable<TargetData>();
> > @@ -291,9 +295,10 @@ namespace {
> > // Iterate a sufficient number of times to merge types of
> > size 1 bit, // then 2 bits, then 4, etc. up to half of the target
> > vector width of the // target vector register.
> > - for (unsigned v = 2, n = 1; v <= VectorBits && (!MaxIter || n
> > <= MaxIter);
> > + for (unsigned v = 2, n = 1;
> > + v <= Config.VectorBits && (!Config.MaxIter || n <=
> > Config.MaxIter); v *= 2, ++n) {
> > - DEBUG(dbgs() << "BBV: fusing loop #" << n <<
> > + DEBUG(dbgs() << "BBV: fusing loop #" << n <<
> > " for " << BB.getName() << " in " <<
> > BB.getParent()->getName() << "...\n");
> > if (vectorizePairs(BB))
> > @@ -343,7 +348,7 @@ namespace {
> > // candidate chains where longer chains are considered to be
> > better. // Note: when this function returns 0, the resulting
> > instructions are // not actually fused.
> > - static inline size_t getDepthFactor(Value *V) {
> > + inline size_t getDepthFactor(Value *V) {
> > // InsertElement and ExtractElement have a depth factor of
> > zero. This is // for two reasons: First, they cannot be usefully
> > fused. Second, because // the pass generates a lot of these, they
> > can confuse the simple metric @@ -357,8 +362,8 @@ namespace {
> >
> > // Give a load or store half of the required depth so that
> > load/store // pairs will vectorize.
> > - if (!NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V)))
> > - return ReqChainDepth/2;
> > + if (!Config.NoMemOpBoost && (isa<LoadInst>(V) ||
> > isa<StoreInst>(V)))
> > + return Config.ReqChainDepth/2;
> >
> > return 1;
> > }
> > @@ -431,9 +436,9 @@ namespace {
> > case Intrinsic::exp:
> > case Intrinsic::exp2:
> > case Intrinsic::pow:
> > - return !NoMath;
> > + return !Config.NoMath;
> > case Intrinsic::fma:
> > - return !NoFMA;
> > + return !Config.NoFMA;
> > }
> > }
> >
> > @@ -527,16 +532,16 @@ namespace {
> > } else if (LoadInst *L = dyn_cast<LoadInst>(I)) {
> > // Vectorize simple loads if possbile:
> > IsSimpleLoadStore = L->isSimple();
> > - if (!IsSimpleLoadStore || NoMemOps)
> > + if (!IsSimpleLoadStore || Config.NoMemOps)
> > return false;
> > } else if (StoreInst *S = dyn_cast<StoreInst>(I)) {
> > // Vectorize simple stores if possbile:
> > IsSimpleLoadStore = S->isSimple();
> > - if (!IsSimpleLoadStore || NoMemOps)
> > + if (!IsSimpleLoadStore || Config.NoMemOps)
> > return false;
> > } else if (CastInst *C = dyn_cast<CastInst>(I)) {
> > // We can vectorize casts, but not casts of pointer types,
> > etc.
> > - if (NoCasts)
> > + if (Config.NoCasts)
> > return false;
> >
> > Type *SrcTy = C->getSrcTy();
> > @@ -576,14 +581,14 @@ namespace {
> > !(VectorType::isValidElementType(T2) || T2->isVectorTy()))
> > return false;
> >
> > - if (NoInts && (T1->isIntOrIntVectorTy() ||
> > T2->isIntOrIntVectorTy()))
> > + if (Config.NoInts && (T1->isIntOrIntVectorTy() ||
> > T2->isIntOrIntVectorTy()))
> > return false;
> >
> > - if (NoFloats && (T1->isFPOrFPVectorTy() ||
> > T2->isFPOrFPVectorTy()))
> > + if (Config.NoFloats && (T1->isFPOrFPVectorTy() ||
> > T2->isFPOrFPVectorTy())) return false;
> >
> > - if (T1->getPrimitiveSizeInBits() > VectorBits/2 ||
> > - T2->getPrimitiveSizeInBits() > VectorBits/2)
> > + if (T1->getPrimitiveSizeInBits() > Config.VectorBits/2 ||
> > + T2->getPrimitiveSizeInBits() > Config.VectorBits/2)
> > return false;
> >
> > return true;
> > @@ -611,7 +616,7 @@ namespace {
> > LI->isVolatile() != LJ->isVolatile() ||
> > LI->getOrdering() != LJ->getOrdering() ||
> > LI->getSynchScope() != LJ->getSynchScope())
> > - return false;
> > + return false;
> > } else if ((SI = dyn_cast<StoreInst>(I)) && (SJ =
> > dyn_cast<StoreInst>(J))) {
> > if (SI->getValueOperand()->getType() !=
> > SJ->getValueOperand()->getType() ||
> > @@ -632,7 +637,7 @@ namespace {
> > int64_t OffsetInElmts = 0;
> > if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
> > OffsetInElmts) && abs64(OffsetInElmts) == 1) {
> > - if (AlignedOnly) {
> > + if (Config.AlignedOnly) {
> > Type *aType = isa<StoreInst>(I) ?
> > cast<StoreInst>(I)->getValueOperand()->getType() :
> > I->getType(); // An aligned load or store is possible only if the
> > instruction @@ -753,12 +758,12 @@ namespace {
> > AliasSetTracker WriteSet(*AA);
> > bool JAfterStart = IAfterStart;
> > BasicBlock::iterator J = llvm::next(I);
> > - for (unsigned ss = 0; J != E && ss <= SearchLimit; ++J,
> > ++ss) {
> > + for (unsigned ss = 0; J != E && ss <= Config.SearchLimit;
> > ++J, ++ss) { if (J == Start) JAfterStart = true;
> >
> > // Determine if J uses I, if so, exit the loop.
> > - bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !FastDep);
> > - if (FastDep) {
> > + bool UsesI = trackUsesOfI(Users, WriteSet, I,
> > J, !Config.FastDep);
> > + if (Config.FastDep) {
> > // Note: For this heuristic to be effective, independent
> > operations // must tend to be intermixed. This is likely to be true
> > from some // kinds of grouped loop unrolling (but not the generic
> > LLVM pass), @@ -796,7 +801,7 @@ namespace {
> > // If we have already found too many pairs, break here and
> > this function
> > // will be called again starting after the last instruction
> > selected // during this invocation.
> > - if (PairableInsts.size() >= MaxInsts) {
> > + if (PairableInsts.size() >= Config.MaxInsts) {
> > ShouldContinue = true;
> > break;
> > }
> > @@ -841,7 +846,7 @@ namespace {
> > ConnectedPairs.insert(VPPair(P, ValuePair(*J, *I)));
> > }
> >
> > - if (SplatBreaksChain) continue;
> > + if (Config.SplatBreaksChain) continue;
> > // Look for cases where just the first value in the pair is
> > used by // both members of another pair (splatting).
> > for (Value::use_iterator J = P.first->use_begin(); J != E;
> > ++J) { @@ -850,7 +855,7 @@ namespace {
> > }
> > }
> >
> > - if (SplatBreaksChain) return;
> > + if (Config.SplatBreaksChain) return;
> > // Look for cases where just the second value in the pair is
> > used by // both members of another pair (splatting).
> > for (Value::use_iterator I = P.second->use_begin(),
> > @@ -1280,7 +1285,7 @@ namespace {
> > << *J->first << " <-> " << *J->second << "} of depth "
> > << MaxDepth << " and size " << PrunedTree.size() <<
> > " (effective size: " << EffSize << ")\n");
> > - if (MaxDepth >= ReqChainDepth && EffSize > BestEffSize) {
> > + if (MaxDepth >= Config.ReqChainDepth && EffSize >
> > BestEffSize) { BestMaxDepth = MaxDepth;
> > BestEffSize = EffSize;
> > BestTree = PrunedTree;
> > @@ -1296,7 +1301,8 @@ namespace {
> > std::multimap<ValuePair, ValuePair>
> > &ConnectedPairs, DenseSet<ValuePair> &PairableInstUsers,
> > DenseMap<Value *, Value *>& ChosenPairs) {
> > - bool UseCycleCheck = CandidatePairs.size() <=
> > MaxCandPairsForCycleCheck;
> > + bool UseCycleCheck =
> > + CandidatePairs.size() <= Config.MaxCandPairsForCycleCheck;
> > std::multimap<ValuePair, ValuePair> PairableInstUserMap;
> > for (std::vector<Value *>::iterator I = PairableInsts.begin(),
> > E = PairableInsts.end(); I != E; ++I) {
> > @@ -1547,11 +1553,11 @@ namespace {
> > unsigned IID = F->getIntrinsicID();
> > if (o == NumOperands-1) {
> > BasicBlock &BB = *I->getParent();
> > -
> > +
> > Module *M = BB.getParent()->getParent();
> > Type *ArgType = I->getType();
> > Type *VArgType = getVecTypeForPair(ArgType);
> > -
> > +
> > // FIXME: is it safe to do this here?
> > ReplacedOperands[o] = Intrinsic::getDeclaration(M,
> > (Intrinsic::ID) IID, VArgType);
> > @@ -1867,11 +1873,31 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
> > INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
> > INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name,
> > false, false)
> >
> > -BasicBlockPass *llvm::createBBVectorizePass() {
> > - return new BBVectorize();
> > +BasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig
> > &C) {
> > + return new BBVectorize(C);
> > }
> >
> > -bool llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB) {
> > - BBVectorize BBVectorizer(P);
> > +bool llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB,
> > VectorizeConfig C) {
Same here too.
-Hal
> > + BBVectorize BBVectorizer(P, C);
> > return BBVectorizer.vectorizeBB(BB);
> > }
> > +
> > +//===----------------------------------------------------------------------===//
> > +VectorizeConfig::VectorizeConfig() {
> > + VectorBits = ::VectorBits;
> > + NoInts = ::NoInts;
> > + NoFloats = ::NoFloats;
> > + NoCasts = ::NoCasts;
> > + NoMath = ::NoMath;
> > + NoFMA = ::NoFMA;
> > + NoMemOps = ::NoMemOps;
> > + AlignedOnly = ::AlignedOnly;
> > + ReqChainDepth= ::ReqChainDepth;
> > + SearchLimit = ::SearchLimit;
> > + MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck;
> > + SplatBreaksChain = ::SplatBreaksChain;
> > + MaxInsts = ::MaxInsts;
> > + MaxIter = ::MaxIter;
> > + NoMemOpBoost = ::NoMemOpBoost;
> > + FastDep = ::FastDep;
> > +}
> > --
> > 1.7.5.4
> >
> >> Hal Finkel
> >> Postdoctoral Appointee
> >> Leadership Computing Facility
> >> Argonne National Laboratory
--
Hal Finkel
Postdoctoral Appointee
Leadership Computing Facility
Argonne National Laboratory
More information about the llvm-commits
mailing list